我有一個名爲'Date'的熊貓數據幀列,其格式爲:'%Y%m%d%H%M%H%M'(第一個%H%M是當地時間&第二個%H%M是UTC)。將Pandas列轉換爲數據幀
我想將其轉換爲格式:%Y-%m-%d_%H%M(保持UTC%H%M)。
obs_df = pd.read_csv(obs, names= ['WBAN','Date','Extinc Coeff', 'D/N', 'Dir 2min av wind',
'Spd 2min av wind(kts)', 'Dir max 5min av wind','Spd_max_5min_av_wind(kts)',
'Constant','Runway vis range'], usecols= ['WBAN', 'Date',
'Spd_max_5min_av_wind(kts)'],
na_filter=False)
這裏的數據框的樣子:
Date WBAN Spd_max_5min_av_wind(kts)
0 2014100108481348 KACK 19
1 2014100108491349 KACK 18
2 2014100108501350 KACK 20
3 2014100108511351 KACK 19
4 2014100108521352 KACK 17
這裏就是我已經試過:
import datetime as dt
obs_df['Date'] = obs_df['Date'].apply(lambda x: dt.datetime.strptime(x, '%Y%m%d%H%M%H%M'))
和
obs_df['Date'] = pd.to_datetime(obs_df['Date'], format = '%Y%m%d%H%M%H%M')
這是我得到的錯誤對於這兩種嘗試:
---------------------------------------------------------------------------
error Traceback (most recent call last)
<ipython-input-200-27e83cc1348d> in <module>()
----> 1 obs_df['Date'] = obs_df['Date'].apply(lambda x: dt.datetime.strptime(x, '%Y%m%d%H%M%H%M'))
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pandas/core/series.py in apply(self, func, convert_dtype, args, **kwds)
2353 else:
2354 values = self.asobject
-> 2355 mapped = lib.map_infer(values, f, convert=convert_dtype)
2356
2357 if len(mapped) and isinstance(mapped[0], Series):
pandas/_libs/src/inference.pyx in pandas._libs.lib.map_infer (pandas/_libs/lib.c:66645)()
<ipython-input-200-27e83cc1348d> in <lambda>(x)
----> 1 obs_df['Date'] = obs_df['Date'].apply(lambda x: dt.datetime.strptime(x, '%Y%m%d%H%M%H%M'))
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/_strptime.py in _strptime_datetime(cls, data_string, format)
498 """Return a class cls instance based on the input string and the
499 format string."""
--> 500 tt, fraction = _strptime(data_string, format)
501 tzname, gmtoff = tt[-2:]
502 args = tt[:6] + (fraction,)
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/_strptime.py in _strptime(data_string, format)
318 if not format_regex:
319 try:
--> 320 format_regex = _TimeRE_cache.compile(format)
321 # KeyError raised when a bad format is found; can be specified as
322 # \\, in which case it was a stray % but with a space after it
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/_strptime.py in compile(self, format)
266 def compile(self, format):
267 """Return a compiled re object for the format string."""
--> 268 return re_compile(self.pattern(format), IGNORECASE)
269
270 _cache_lock = _thread_allocate_lock()
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/re.py in compile(pattern, flags)
222 def compile(pattern, flags=0):
223 "Compile a regular expression pattern, returning a pattern object."
--> 224 return _compile(pattern, flags)
225
226 def purge():
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/re.py in _compile(pattern, flags)
291 if not sre_compile.isstring(pattern):
292 raise TypeError("first argument must be string or compiled pattern")
--> 293 p = sre_compile.compile(pattern, flags)
294 if not (flags & DEBUG):
295 if len(_cache) >= _MAXCACHE:
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/sre_compile.py in compile(p, flags)
534 if isstring(p):
535 pattern = p
--> 536 p = sre_parse.parse(p, flags)
537 else:
538 pattern = None
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/sre_parse.py in parse(str, flags, pattern)
827 pattern.str = str
828
--> 829 p = _parse_sub(source, pattern, 0)
830 p.pattern.flags = fix_flags(str, p.pattern.flags)
831
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/sre_parse.py in _parse_sub(source, state, nested)
435 start = source.tell()
436 while True:
--> 437 itemsappend(_parse(source, state))
438 if not sourcematch("|"):
439 break
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/sre_parse.py in _parse(source, state)
772 group = state.opengroup(name)
773 except error as err:
--> 774 raise source.error(err.msg, len(name) + 1) from None
775 if condgroup:
776 p = _parse_sub_cond(source, state, condgroup)
error: redefinition of group name 'H' as group 6; was group 4 at position 127
我嘗試這樣做,以及:
obs_df['Date'] = pd.to_datetime(obs_df['Date'], errors='raise', yearfirst=True, utc=True, box=False, format="%Y-%m-%d_%H%M", exact=False,
infer_datetime_format=True)
但得到這個錯誤:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime (pandas/_libs/tslib.c:44294)()
pandas/_libs/src/datetime.pxd in datetime._string_to_dts (pandas/_libs/tslib.c:98425)()
ValueError: Error parsing datetime string "2014100108481348" at position 8
During handling of the above exception, another exception occurred:
OverflowError Traceback (most recent call last)
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime (pandas/_libs/tslib.c:44703)()
pandas/_libs/tslib.pyx in pandas._libs.tslib.parse_datetime_string (pandas/_libs/tslib.c:35351)()
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/dateutil/parser.py in parse(timestr, parserinfo, **kwargs)
1181 else:
-> 1182 return DEFAULTPARSER.parse(timestr, **kwargs)
1183
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/dateutil/parser.py in parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
577
--> 578 if cday > monthrange(cyear, cmonth)[1]:
579 repl['day'] = monthrange(cyear, cmonth)[1]
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/calendar.py in monthrange(year, month)
120 raise IllegalMonthError(month)
--> 121 day1 = weekday(year, month, 1)
122 ndays = mdays[month] + (month == February and isleap(year))
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/calendar.py in weekday(year, month, day)
112 day (1-31)."""
--> 113 return datetime.date(year, month, day).weekday()
114
OverflowError: signed integer is greater than maximum
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime (pandas/_libs/tslib.c:44803)()
TypeError: invalid string coercion to datetime
During handling of the above exception, another exception occurred:
OverflowError Traceback (most recent call last)
<ipython-input-205-5e9ed01bf0eb> in <module>()
1 obs_df['Date'] = pd.to_datetime(obs_df['Date'], errors='raise', yearfirst=True, utc=True, box=False, format="%Y-%m-%d_%H%M", exact=False,
----> 2 infer_datetime_format=True)
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pandas/core/tools/datetimes.py in to_datetime(arg, errors, dayfirst, yearfirst, utc, box, format, exact, unit, infer_datetime_format, origin)
507 elif isinstance(arg, ABCSeries):
508 from pandas import Series
--> 509 values = _convert_listlike(arg._values, False, format)
510 result = Series(values, index=arg.index, name=arg.name)
511 elif isinstance(arg, (ABCDataFrame, MutableMapping)):
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pandas/core/tools/datetimes.py in _convert_listlike(arg, box, format, name, tz)
433 dayfirst=dayfirst,
434 yearfirst=yearfirst,
--> 435 require_iso8601=require_iso8601
436 )
437
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime (pandas/_libs/tslib.c:46617)()
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime (pandas/_libs/tslib.c:46233)()
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime (pandas/_libs/tslib.c:46122)()
pandas/_libs/tslib.pyx in pandas._libs.tslib.parse_datetime_string (pandas/_libs/tslib.c:35351)()
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/dateutil/parser.py in parse(timestr, parserinfo, **kwargs)
1180 return parser(parserinfo).parse(timestr, **kwargs)
1181 else:
-> 1182 return DEFAULTPARSER.parse(timestr, **kwargs)
1183
1184
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/dateutil/parser.py in parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
576 cday = default.day if res.day is None else res.day
577
--> 578 if cday > monthrange(cyear, cmonth)[1]:
579 repl['day'] = monthrange(cyear, cmonth)[1]
580
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/calendar.py in monthrange(year, month)
119 if not 1 <= month <= 12:
120 raise IllegalMonthError(month)
--> 121 day1 = weekday(year, month, 1)
122 ndays = mdays[month] + (month == February and isleap(year))
123 return day1, ndays
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/calendar.py in weekday(year, month, day)
111 """Return weekday (0-6 ~ Mon-Sun) for year (1970-...), month (1-12),
112 day (1-31)."""
--> 113 return datetime.date(year, month, day).weekday()
114
115
OverflowError: signed integer is greater than maximum
正如你所知道的,我還沒有試過加連字符和下劃線,因爲這可能需要另一個步驟,但任何指導將不勝感激。