Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: pd.Timedelta(big_int, unit=W) silent overflow #47268

Merged
merged 4 commits into from
Jun 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,7 @@ Other API changes
The ``auth_local_webserver = False`` option is planned to stop working in
October 2022. (:issue:`46312`)
- :func:`read_json` now raises ``FileNotFoundError`` (previously ``ValueError``) when input is a string ending in ``.json``, ``.json.gz``, ``.json.bz2``, etc. but no such file exists. (:issue:`29102`)
- Operations with :class:`Timestamp` or :class:`Timedelta` that would previously raise ``OverflowError`` instead raise ``OutOfBoundsDatetime`` or ``OutOfBoundsTimedelta`` where appropriate (:issue:`47268`)
-

.. ---------------------------------------------------------------------------
Expand Down Expand Up @@ -736,6 +737,7 @@ Timedelta
^^^^^^^^^
- Bug in :func:`astype_nansafe` astype("timedelta64[ns]") fails when np.nan is included (:issue:`45798`)
- Bug in constructing a :class:`Timedelta` with a ``np.timedelta64`` object and a ``unit`` sometimes silently overflowing and returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`46827`)
- Bug in constructing a :class:`Timedelta` from a large integer or float with ``unit="W"`` silently overflowing and returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`47268`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you also add a note in the Other API Changes section mentioning that various timstamp/timedelta constructors that would overflow now raise a OutOfBoundsTimedelta/OutOfBoundsTimestamp instead of an OverflowError (they appear to be based off of different subclasses).

Should hopefully be a catchall for all these improvements you've been making

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good idea, updated

-

Time Zones
Expand Down
62 changes: 34 additions & 28 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,8 @@ cdef convert_to_timedelta64(object ts, str unit):

Return an ns based int64
"""
# Caller is responsible for checking unit not in ["Y", "y", "M"]

if checknull_with_nat(ts):
return np.timedelta64(NPY_NAT, "ns")
elif isinstance(ts, _Timedelta):
Expand All @@ -329,17 +331,9 @@ cdef convert_to_timedelta64(object ts, str unit):
if ts == NPY_NAT:
return np.timedelta64(NPY_NAT, "ns")
else:
if unit in ["Y", "M", "W"]:
ts = np.timedelta64(ts, unit)
else:
ts = cast_from_unit(ts, unit)
ts = np.timedelta64(ts, "ns")
ts = _maybe_cast_from_unit(ts, unit)
elif is_float_object(ts):
if unit in ["Y", "M", "W"]:
ts = np.timedelta64(int(ts), unit)
else:
ts = cast_from_unit(ts, unit)
ts = np.timedelta64(ts, "ns")
ts = _maybe_cast_from_unit(ts, unit)
elif isinstance(ts, str):
if (len(ts) > 0 and ts[0] == "P") or (len(ts) > 1 and ts[:2] == "-P"):
ts = parse_iso_format_string(ts)
Expand All @@ -356,6 +350,20 @@ cdef convert_to_timedelta64(object ts, str unit):
return ts.astype("timedelta64[ns]")


cdef _maybe_cast_from_unit(ts, str unit):
# caller is responsible for checking
# assert unit not in ["Y", "y", "M"]
try:
ts = cast_from_unit(ts, unit)
except OverflowError as err:
raise OutOfBoundsTimedelta(
f"Cannot cast {ts} from {unit} to 'ns' without overflow."
) from err

ts = np.timedelta64(ts, "ns")
return ts


@cython.boundscheck(False)
@cython.wraparound(False)
def array_to_timedelta64(
Expand All @@ -370,6 +378,8 @@ def array_to_timedelta64(
-------
np.ndarray[timedelta64ns]
"""
# Caller is responsible for checking
assert unit not in ["Y", "y", "M"]

cdef:
Py_ssize_t i, n = values.size
Expand Down Expand Up @@ -652,24 +662,20 @@ cdef inline timedelta_from_spec(object number, object frac, object unit):
cdef:
str n

try:
unit = ''.join(unit)

if unit in ["M", "Y", "y"]:
warnings.warn(
"Units 'M', 'Y' and 'y' do not represent unambiguous "
"timedelta values and will be removed in a future version.",
FutureWarning,
stacklevel=2,
)
unit = ''.join(unit)
if unit in ["M", "Y", "y"]:
warnings.warn(
"Units 'M', 'Y' and 'y' do not represent unambiguous "
"timedelta values and will be removed in a future version.",
FutureWarning,
stacklevel=3,
)

if unit == 'M':
# To parse ISO 8601 string, 'M' should be treated as minute,
# not month
unit = 'm'
unit = parse_timedelta_unit(unit)
except KeyError:
raise ValueError(f"invalid abbreviation: {unit}")
if unit == 'M':
# To parse ISO 8601 string, 'M' should be treated as minute,
# not month
unit = 'm'
unit = parse_timedelta_unit(unit)

n = ''.join(number) + '.' + ''.join(frac)
return cast_from_unit(float(n), unit)
Expand All @@ -696,7 +702,7 @@ cpdef inline str parse_timedelta_unit(str unit):
return unit
try:
return timedelta_abbrevs[unit.lower()]
except (KeyError, AttributeError):
except KeyError:
raise ValueError(f"invalid unit abbreviation: {unit}")

# ----------------------------------------------------------------------
Expand Down
7 changes: 5 additions & 2 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,10 @@ from pandas._libs.tslibs.np_datetime cimport (
pydatetime_to_dt64,
)

from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
from pandas._libs.tslibs.np_datetime import (
OutOfBoundsDatetime,
OutOfBoundsTimedelta,
)

from pandas._libs.tslibs.offsets cimport (
BaseOffset,
Expand Down Expand Up @@ -455,7 +458,7 @@ cdef class _Timestamp(ABCTimestamp):
# Timedelta
try:
return Timedelta(self.value - other.value)
except (OverflowError, OutOfBoundsDatetime) as err:
except (OverflowError, OutOfBoundsDatetime, OutOfBoundsTimedelta) as err:
if isinstance(other, _Timestamp):
if both_timestamps:
raise OutOfBoundsDatetime(
Expand Down
8 changes: 6 additions & 2 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,8 @@ def _from_sequence_not_strict(
if dtype:
_validate_td64_dtype(dtype)

assert unit not in ["Y", "y", "M"] # caller is responsible for checking

explicit_none = freq is None
freq = freq if freq is not lib.no_default else None

Expand Down Expand Up @@ -923,6 +925,8 @@ def sequence_to_td64ns(
errors to be ignored; they are caught and subsequently ignored at a
higher level.
"""
assert unit not in ["Y", "y", "M"] # caller is responsible for checking

inferred_freq = None
if unit is not None:
unit = parse_timedelta_unit(unit)
Expand Down Expand Up @@ -954,7 +958,7 @@ def sequence_to_td64ns(
# Convert whatever we have into timedelta64[ns] dtype
if is_object_dtype(data.dtype) or is_string_dtype(data.dtype):
# no need to make a copy, need to convert if string-dtyped
data = objects_to_td64ns(data, unit=unit, errors=errors)
data = _objects_to_td64ns(data, unit=unit, errors=errors)
copy = False

elif is_integer_dtype(data.dtype):
Expand Down Expand Up @@ -1032,7 +1036,7 @@ def ints_to_td64ns(data, unit="ns"):
return data, copy_made


def objects_to_td64ns(data, unit=None, errors="raise"):
def _objects_to_td64ns(data, unit=None, errors="raise"):
"""
Convert a object-dtyped or string-dtyped array into an
timedelta64[ns]-dtyped array.
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/scalar/timedelta/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ def test_td_add_datetimelike_scalar(self, op):
assert result is NaT

def test_td_add_timestamp_overflow(self):
msg = "int too (large|big) to convert"
with pytest.raises(OverflowError, match=msg):
msg = "Cannot cast 259987 from D to 'ns' without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D")

msg = "Cannot cast 259987 days, 0:00:00 to unit=ns without overflow"
Expand Down
17 changes: 13 additions & 4 deletions pandas/tests/scalar/timedelta/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,15 @@
)


def test_construct_with_weeks_unit_overflow():
# GH#47268 don't silently wrap around
with pytest.raises(OutOfBoundsTimedelta, match="without overflow"):
Timedelta(1000000000000000000, unit="W")

with pytest.raises(OutOfBoundsTimedelta, match="without overflow"):
Timedelta(1000000000000000000.0, unit="W")


def test_construct_from_td64_with_unit():
# ignore the unit, as it may cause silently overflows leading to incorrect
# results, and in non-overflow cases is irrelevant GH#46827
Expand Down Expand Up @@ -204,15 +213,15 @@ def test_td_from_repr_roundtrip(val):


def test_overflow_on_construction():
msg = "int too (large|big) to convert"

# GH#3374
value = Timedelta("1day").value * 20169940
with pytest.raises(OverflowError, match=msg):
msg = "Cannot cast 1742682816000000000000 from ns to 'ns' without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timedelta(value)

# xref GH#17637
with pytest.raises(OverflowError, match=msg):
msg = "Cannot cast 139993 from D to 'ns' without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timedelta(7 * 19999, unit="D")

msg = "Cannot cast 259987 days, 0:00:00 to unit=ns without overflow"
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/scalar/timedelta/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,10 +744,12 @@ def test_implementation_limits(self):
td = Timedelta(min_td.value - 1, "ns")
assert td is NaT

with pytest.raises(OverflowError, match=msg):
msg = "Cannot cast -9223372036854775809 from ns to 'ns' without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timedelta(min_td.value - 2, "ns")

with pytest.raises(OverflowError, match=msg):
msg = "Cannot cast 9223372036854775808 from ns to 'ns' without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timedelta(max_td.value + 1, "ns")

def test_total_seconds_precision(self):
Expand Down