Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Catch exception around much smaller piece of code #23702

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
229 changes: 164 additions & 65 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,77 +236,176 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
require_iso8601 = not infer_datetime_format
format = None

try:
result = None
result = None
tz_parsed = None

if format is not None:
# shortcut formatting here
if format == '%Y%m%d':
try:
result = _attempt_YYYYMMDD(arg, errors=errors)
except (ValueError, TypeError, tslibs.OutOfBoundsDatetime):
raise ValueError("cannot convert the input to "
"'%Y%m%d' date format")
if format is not None:
try:
result = _parse_with_format(arg, tz, name, box, format,
errors, exact, infer_datetime_format)
except ValueError as e:
return _parse_fallback(arg, name, tz, e)

# fallback
if result is None:
try:
result, timezones = array_strptime(
arg, format, exact=exact, errors=errors)
if '%Z' in format or '%z' in format:
return _return_parsed_timezone_results(
result, timezones, box, tz, name)
except tslibs.OutOfBoundsDatetime:
if errors == 'raise':
raise
result = arg
except ValueError:
# if format was inferred, try falling back
# to array_to_datetime - terminate here
# for specified formats
if not infer_datetime_format:
if errors == 'raise':
raise
result = arg

if result is None and (format is None or infer_datetime_format):
result, tz_parsed = tslib.array_to_datetime(
arg,
errors=errors,
utc=tz == 'utc',
dayfirst=dayfirst,
yearfirst=yearfirst,
require_iso8601=require_iso8601
)
if tz_parsed is not None:
if box:
# We can take a shortcut since the datetime64 numpy array
# is in UTC
return DatetimeIndex._simple_new(result, name=name,
tz=tz_parsed)
else:
# Convert the datetime64 numpy array to an numpy array
# of datetime objects
result = [Timestamp(ts, tz=tz_parsed).to_pydatetime()
for ts in result]
return np.array(result, dtype=object)
if result is not None:
return _maybe_box_date_results(result, box, tz, name, tz_parsed)

if box:
# Ensure we return an Index in all cases where box=True
if is_datetime64_dtype(result):
return DatetimeIndex(result, tz=tz, name=name)
elif is_object_dtype(result):
# e.g. an Index of datetime objects
from pandas import Index
return Index(result, name=name)
return result
assert result is None
assert format is None or infer_datetime_format

try:
result, tz_parsed = tslib.array_to_datetime(
arg,
errors=errors,
utc=tz == 'utc',
dayfirst=dayfirst,
yearfirst=yearfirst,
require_iso8601=require_iso8601
)
except ValueError as e:
return _parse_fallback(arg, name, tz, e)
else:
return _maybe_box_date_results(result, box, tz, name, tz_parsed)


def _parse_with_format(data, tz, name, box, fmt,
errors, exact, infer_datetime_format):
"""
Parse the given data using a user-provided string format.

Parameters
----------
data : np.ndarray[object]
tz : {None, 'utc'}
box : bool
Whether to wrap the results in an Index
fmt : str
strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse
all the way up to nanoseconds.
errors : {'ignore', 'raise', 'coerce'}
- If 'raise', then invalid parsing will raise an exception
- If 'coerce', then invalid parsing will be set as NaT
- If 'ignore', then invalid parsing will return the input
exact : bool
- If True, require an exact format match.
- If False, allow the format to match anywhere in the target string.
infer_datetime_format : bool

Returns
-------
result : np.ndarray[object] or Index, depending on `box` argument

Raises
------
ValueError : Data cannot be parsed using the given format.
"""
result = None

if fmt == '%Y%m%d':
# shortcut formatting here
try:
values, tz = conversion.datetime_to_datetime64(arg)
return DatetimeIndex._simple_new(values, name=name, tz=tz)
except (ValueError, TypeError):
raise e
result = _attempt_YYYYMMDD(data, errors=errors)
except (ValueError, TypeError, tslibs.OutOfBoundsDatetime):
raise ValueError("cannot convert the input to "
"'%Y%m%d' date format")

if result is None:
# fallback
try:
result, timezones = array_strptime(data, fmt,
exact=exact, errors=errors)
if '%Z' in fmt or '%z' in fmt:
return _return_parsed_timezone_results(result, timezones,
box, tz, name)
except tslibs.OutOfBoundsDatetime:
if errors == 'raise':
raise
result = data
except ValueError:
# if format was inferred, try falling back
# to array_to_datetime - terminate here
# for specified formats
if not infer_datetime_format:
if errors == 'raise':
raise
result = data

return result


def _parse_fallback(data, name, tz, err):
"""
If a ValueError is raised by either _parse_with_format or
array_to_datetime, try to interpret the data as datetime objects.

Parameters
----------
data : np.ndarray[object]
name : object
Name to attach to returned DatetimeIndex
tz : None, str, or tzinfo object
err : ValueError instance

Returns
-------
DatetimeIndex

Raises
------
ValueError : if data cannot be interpreted as datetime objects.
"""
from pandas import DatetimeIndex
try:
values, tz = conversion.datetime_to_datetime64(data)
return DatetimeIndex._simple_new(values, name=name, tz=tz)
except (ValueError, TypeError):
raise err


def _maybe_box_date_results(result, box, tz, name, tz_parsed=None):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a docstring here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do.

"""
If requested, wrap the parsing results in an Index object, DatetimeIndex
if possible.

Parameters
----------
result : np.ndarray[object], np.ndarray[int64], or Index
box : bool
tz : {None, 'utc'}
name : str
tz_parsed : None or tzinfo
pytz tzinfo object inferred during parsing

Returns
-------
result : np.ndarray, Index, or DatetimeIndex
"""
from pandas import Index, DatetimeIndex

if isinstance(result, Index):
# already boxed by e.g. _return_parsed_timezone_results
return result

if tz_parsed is not None:
if box:
# We can take a shortcut since the datetime64 numpy array
# is in UTC
return DatetimeIndex._simple_new(result, name=name,
tz=tz_parsed)
else:
# Convert the datetime64 numpy array to an numpy array
# of datetime objects
result = [Timestamp(ts, tz=tz_parsed).to_pydatetime()
for ts in result]
return np.array(result, dtype=object)

if box:
# Ensure we return an Index in all cases where box=True
if is_datetime64_dtype(result):
return DatetimeIndex(result, tz=tz, name=name)
elif is_object_dtype(result):
# e.g. an Index of datetime objects
return Index(result, name=name)
return result


def _adjust_to_origin(arg, origin, unit):
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,27 @@ def test_to_datetime_parse_timezone_keeps_name(self):


class TestToDatetime(object):
def test_to_datetime_format_typeerror_fallback(self):
# GH#23702 pass format and non-string inputs, fallback
now = Timestamp.now()
values = np.array([now.tz_localize('Asia/Tokyo')], dtype=np.object_)
result = to_datetime(values, format="%Y%m%d")

expected = DatetimeIndex([now], tz='Asia/Tokyo')
tm.assert_index_equal(result, expected)

# FIXME: flaky test; this does NOT raise on OSX py27
values = np.array([now, "2018-11-12"], dtype=np.object_)
with pytest.raises(ValueError):
result = to_datetime(values, format="%Y%m%d",
infer_datetime_format=True)

with pytest.raises(ValueError):
# without infer_datetime_format, we fall back to
# datetime_to_datetime64 but not array_to_datetime,
# and so raise on seeing a string
to_datetime(values, format="%Y%m%d")

def test_to_datetime_pydatetime(self):
actual = pd.to_datetime(datetime(2008, 1, 15))
assert actual == datetime(2008, 1, 15)
Expand Down