Skip to content

Commit 2541b3d

Browse files
committed
ENH: Add support for more placeholders in guess_datetime_format (#43901)
Add support for day of week and meridiem placeholders and any combination of placeholders supported by `strftime` that do not correspond to a datetime attribute.
1 parent a7e9183 commit 2541b3d

File tree

3 files changed

+17
-9
lines changed

3 files changed

+17
-9
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ Other enhancements
129129
- :meth:`DataFrame.__pos__`, :meth:`DataFrame.__neg__` now retain ``ExtensionDtype`` dtypes (:issue:`43883`)
130130
- The error raised when an optional dependency can't be imported now includes the original exception, for easier investigation (:issue:`43882`)
131131
- Added :meth:`.ExponentialMovingWindow.sum` (:issue:`13297`)
132+
- :func:`guess_datetime_format` now correctly guesses the format of datetime strings with day of week and AM/PM placeholders (:issue:`43901`)
132133

133134
.. ---------------------------------------------------------------------------
134135

pandas/_libs/tslibs/parsing.pyx

+15-9
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,9 @@ def guess_datetime_format(
911911
(('second', 'microsecond'), '%S.%f', 0),
912912
(('tzinfo',), '%z', 0),
913913
(('tzinfo',), '%Z', 0),
914+
(('day_of_week',), '%a', 0),
915+
(('day_of_week',), '%A', 0),
916+
(('meridiem',), '%p', 0),
914917
]
915918

916919
if dayfirst:
@@ -967,15 +970,18 @@ def guess_datetime_format(
967970
if set(attrs) & found_attrs:
968971
continue
969972

970-
if all(getattr(parsed_datetime, attr) is not None for attr in attrs):
971-
for i, token_format in enumerate(format_guess):
972-
token_filled = tokens[i].zfill(padding)
973-
if (token_format is None and
974-
token_filled == parsed_datetime.strftime(attr_format)):
975-
format_guess[i] = attr_format
976-
tokens[i] = token_filled
977-
found_attrs.update(attrs)
978-
break
973+
parsed_formatted = parsed_datetime.strftime(attr_format)
974+
# The result of `.strftime("%Z")` on a dt with no timezone is ""
975+
if len(parsed_formatted) == 0:
976+
continue
977+
978+
for i, token_format in enumerate(format_guess):
979+
token_filled = tokens[i].zfill(padding)
980+
if token_format is None and token_filled == parsed_formatted:
981+
format_guess[i] = attr_format
982+
tokens[i] = token_filled
983+
found_attrs.update(attrs)
984+
break
979985

980986
# Only consider it a valid guess if we have a year, month and day
981987
if len({'year', 'month', 'day'} & found_attrs) != 3:

pandas/tests/tslibs/test_parsing.py

+1
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ def test_parsers_month_freq(date_str, expected):
167167
("2011-12-30T00:00:00.000000+9:0", "%Y-%m-%dT%H:%M:%S.%f%z"),
168168
("2011-12-30T00:00:00.000000+09:", None),
169169
("2011-12-30 00:00:00.000000", "%Y-%m-%d %H:%M:%S.%f"),
170+
("Tue 24 Aug 2021 01:30:48 AM", "%a %d %b %Y %H:%M:%S %p"),
170171
],
171172
)
172173
def test_guess_datetime_format_with_parseable_formats(string, fmt):

0 commit comments

Comments
 (0)