PERF: datetimelike addition (#56373)

jbrockmendel · web-flow · commit a6c0ae48fe4d · 2023-12-09T10:36:28.000-08:00
* PERF: datetimelike addition

* re-add import

* remove no-longer-used

* mypy fixup

* troubleshoot 32bit builds
diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py
@@ -12,7 +12,6 @@
     date_range,
     to_timedelta,
 )
-from pandas.core.algorithms import checked_add_with_arr
 
 from .pandas_vb_common import numeric_dtypes
 
@@ -389,42 +388,6 @@ def time_add_timedeltas(self, df):
         df["timedelta"] + df["timedelta"]
 
 
-class AddOverflowScalar:
-    params = [1, -1, 0]
-    param_names = ["scalar"]
-
-    def setup(self, scalar):
-        N = 10**6
-        self.arr = np.arange(N)
-
-    def time_add_overflow_scalar(self, scalar):
-        checked_add_with_arr(self.arr, scalar)
-
-
-class AddOverflowArray:
-    def setup(self):
-        N = 10**6
-        self.arr = np.arange(N)
-        self.arr_rev = np.arange(-N, 0)
-        self.arr_mixed = np.array([1, -1]).repeat(N / 2)
-        self.arr_nan_1 = np.random.choice([True, False], size=N)
-        self.arr_nan_2 = np.random.choice([True, False], size=N)
-
-    def time_add_overflow_arr_rev(self):
-        checked_add_with_arr(self.arr, self.arr_rev)
-
-    def time_add_overflow_arr_mask_nan(self):
-        checked_add_with_arr(self.arr, self.arr_mixed, arr_mask=self.arr_nan_1)
-
-    def time_add_overflow_b_mask_nan(self):
-        checked_add_with_arr(self.arr, self.arr_mixed, b_mask=self.arr_nan_1)
-
-    def time_add_overflow_both_arg_nan(self):
-        checked_add_with_arr(
-            self.arr, self.arr_mixed, arr_mask=self.arr_nan_1, b_mask=self.arr_nan_2
-        )
-
-
 hcal = pd.tseries.holiday.USFederalHolidayCalendar()
 # These offsets currently raise a NotImplementedError with .apply_index()
 non_apply = [
diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py
@@ -34,6 +34,7 @@
     "npy_unit_to_abbrev",
     "get_supported_reso",
     "guess_datetime_format",
+    "add_overflowsafe",
 ]
 
 from pandas._libs.tslibs import dtypes  # pylint: disable=import-self
@@ -55,6 +56,7 @@
 from pandas._libs.tslibs.np_datetime import (
     OutOfBoundsDatetime,
     OutOfBoundsTimedelta,
+    add_overflowsafe,
     astype_overflowsafe,
     is_unitless,
     py_get_unit_from_dtype as get_unit_from_dtype,
diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd
@@ -118,3 +118,5 @@ cdef int64_t convert_reso(
     NPY_DATETIMEUNIT to_reso,
     bint round_ok,
 ) except? -1
+
+cpdef cnp.ndarray add_overflowsafe(cnp.ndarray left, cnp.ndarray right)
diff --git a/pandas/_libs/tslibs/np_datetime.pyi b/pandas/_libs/tslibs/np_datetime.pyi
@@ -19,3 +19,7 @@ def is_unitless(dtype: np.dtype) -> bool: ...
 def compare_mismatched_resolutions(
     left: np.ndarray, right: np.ndarray, op
 ) -> npt.NDArray[np.bool_]: ...
+def add_overflowsafe(
+    left: npt.NDArray[np.int64],
+    right: npt.NDArray[np.int64],
+) -> npt.NDArray[np.int64]: ...
diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx
@@ -1,3 +1,4 @@
+cimport cython
 from cpython.datetime cimport (
     PyDateTime_CheckExact,
     PyDateTime_DATE_GET_HOUR,
@@ -678,3 +679,43 @@ cdef int64_t _convert_reso_with_dtstruct(
         raise OutOfBoundsDatetime from err
 
     return result
+
+
+@cython.overflowcheck(True)
+cpdef cnp.ndarray add_overflowsafe(cnp.ndarray left, cnp.ndarray right):
+    """
+    Overflow-safe addition for datetime64/timedelta64 dtypes.
+
+    `right` may either be zero-dim or of the same shape as `left`.
+    """
+    cdef:
+        Py_ssize_t N = left.size
+        int64_t lval, rval, res_value
+        ndarray iresult = cnp.PyArray_EMPTY(
+            left.ndim, left.shape, cnp.NPY_INT64, 0
+        )
+        cnp.broadcast mi = cnp.PyArray_MultiIterNew3(iresult, left, right)
+
+    # Note: doing this try/except outside the loop improves performance over
+    #  doing it inside the loop.
+    try:
+        for i in range(N):
+            # Analogous to: lval = lvalues[i]
+            lval = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
+
+            # Analogous to: rval = rvalues[i]
+            rval = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 2))[0]
+
+            if lval == NPY_DATETIME_NAT or rval == NPY_DATETIME_NAT:
+                res_value = NPY_DATETIME_NAT
+            else:
+                res_value = lval + rval
+
+            # Analogous to: result[i] = res_value
+            (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_value
+
+            cnp.PyArray_MultiIter_NEXT(mi)
+    except OverflowError as err:
+        raise OverflowError("Overflow in int64 addition") from err
+
+    return iresult
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -1119,98 +1119,6 @@ def rank(
     return ranks
 
 
-def checked_add_with_arr(
-    arr: npt.NDArray[np.int64],
-    b: int | npt.NDArray[np.int64],
-    arr_mask: npt.NDArray[np.bool_] | None = None,
-    b_mask: npt.NDArray[np.bool_] | None = None,
-) -> npt.NDArray[np.int64]:
-    """
-    Perform array addition that checks for underflow and overflow.
-
-    Performs the addition of an int64 array and an int64 integer (or array)
-    but checks that they do not result in overflow first. For elements that
-    are indicated to be NaN, whether or not there is overflow for that element
-    is automatically ignored.
-
-    Parameters
-    ----------
-    arr : np.ndarray[int64] addend.
-    b : array or scalar addend.
-    arr_mask : np.ndarray[bool] or None, default None
-        array indicating which elements to exclude from checking
-    b_mask : np.ndarray[bool] or None, default None
-        array or scalar indicating which element(s) to exclude from checking
-
-    Returns
-    -------
-    sum : An array for elements x + b for each element x in arr if b is
-          a scalar or an array for elements x + y for each element pair
-          (x, y) in (arr, b).
-
-    Raises
-    ------
-    OverflowError if any x + y exceeds the maximum or minimum int64 value.
-    """
-    # For performance reasons, we broadcast 'b' to the new array 'b2'
-    # so that it has the same size as 'arr'.
-    b2 = np.broadcast_to(b, arr.shape)
-    if b_mask is not None:
-        # We do the same broadcasting for b_mask as well.
-        b2_mask = np.broadcast_to(b_mask, arr.shape)
-    else:
-        b2_mask = None
-
-    # For elements that are NaN, regardless of their value, we should
-    # ignore whether they overflow or not when doing the checked add.
-    if arr_mask is not None and b2_mask is not None:
-        not_nan = np.logical_not(arr_mask | b2_mask)
-    elif arr_mask is not None:
-        not_nan = np.logical_not(arr_mask)
-    elif b_mask is not None:
-        # error: Argument 1 to "__call__" of "_UFunc_Nin1_Nout1" has
-        # incompatible type "Optional[ndarray[Any, dtype[bool_]]]";
-        # expected "Union[_SupportsArray[dtype[Any]], _NestedSequence
-        # [_SupportsArray[dtype[Any]]], bool, int, float, complex, str
-        # , bytes, _NestedSequence[Union[bool, int, float, complex, str
-        # , bytes]]]"
-        not_nan = np.logical_not(b2_mask)  # type: ignore[arg-type]
-    else:
-        not_nan = np.empty(arr.shape, dtype=bool)
-        not_nan.fill(True)
-
-    # gh-14324: For each element in 'arr' and its corresponding element
-    # in 'b2', we check the sign of the element in 'b2'. If it is positive,
-    # we then check whether its sum with the element in 'arr' exceeds
-    # np.iinfo(np.int64).max. If so, we have an overflow error. If it
-    # it is negative, we then check whether its sum with the element in
-    # 'arr' exceeds np.iinfo(np.int64).min. If so, we have an overflow
-    # error as well.
-    i8max = lib.i8max
-    i8min = iNaT
-
-    mask1 = b2 > 0
-    mask2 = b2 < 0
-
-    if not mask1.any():
-        to_raise = ((i8min - b2 > arr) & not_nan).any()
-    elif not mask2.any():
-        to_raise = ((i8max - b2 < arr) & not_nan).any()
-    else:
-        to_raise = ((i8max - b2[mask1] < arr[mask1]) & not_nan[mask1]).any() or (
-            (i8min - b2[mask2] > arr[mask2]) & not_nan[mask2]
-        ).any()
-
-    if to_raise:
-        raise OverflowError("Overflow in int64 addition")
-
-    result = arr + b
-    if arr_mask is not None or b2_mask is not None:
-        np.putmask(result, ~not_nan, iNaT)
-
-    return result
-
-
 # ---- #
 # take #
 # ---- #
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -35,6 +35,7 @@
     Tick,
     Timedelta,
     Timestamp,
+    add_overflowsafe,
     astype_overflowsafe,
     get_unit_from_dtype,
     iNaT,
@@ -112,7 +113,6 @@
     ops,
 )
 from pandas.core.algorithms import (
-    checked_add_with_arr,
     isin,
     map_array,
     unique1d,
@@ -1038,7 +1038,7 @@ def _get_i8_values_and_mask(
         self, other
     ) -> tuple[int | npt.NDArray[np.int64], None | npt.NDArray[np.bool_]]:
         """
-        Get the int64 values and b_mask to pass to checked_add_with_arr.
+        Get the int64 values and b_mask to pass to add_overflowsafe.
         """
         if isinstance(other, Period):
             i8values = other.ordinal
@@ -1094,9 +1094,7 @@ def _add_datetimelike_scalar(self, other) -> DatetimeArray:
         self = cast("TimedeltaArray", self)
 
         other_i8, o_mask = self._get_i8_values_and_mask(other)
-        result = checked_add_with_arr(
-            self.asi8, other_i8, arr_mask=self._isnan, b_mask=o_mask
-        )
+        result = add_overflowsafe(self.asi8, np.asarray(other_i8, dtype="i8"))
         res_values = result.view(f"M8[{self.unit}]")
 
         dtype = tz_to_dtype(tz=other.tz, unit=self.unit)
@@ -1159,9 +1157,7 @@ def _sub_datetimelike(self, other: Timestamp | DatetimeArray) -> TimedeltaArray:
             raise type(err)(new_message) from err
 
         other_i8, o_mask = self._get_i8_values_and_mask(other)
-        res_values = checked_add_with_arr(
-            self.asi8, -other_i8, arr_mask=self._isnan, b_mask=o_mask
-        )
+        res_values = add_overflowsafe(self.asi8, np.asarray(-other_i8, dtype="i8"))
         res_m8 = res_values.view(f"timedelta64[{self.unit}]")
 
         new_freq = self._get_arithmetic_result_freq(other)
@@ -1227,9 +1223,7 @@ def _add_timedeltalike(self, other: Timedelta | TimedeltaArray):
         self = cast("DatetimeArray | TimedeltaArray", self)
 
         other_i8, o_mask = self._get_i8_values_and_mask(other)
-        new_values = checked_add_with_arr(
-            self.asi8, other_i8, arr_mask=self._isnan, b_mask=o_mask
-        )
+        new_values = add_overflowsafe(self.asi8, np.asarray(other_i8, dtype="i8"))
         res_values = new_values.view(self._ndarray.dtype)
 
         new_freq = self._get_arithmetic_result_freq(other)
@@ -1297,9 +1291,7 @@ def _sub_periodlike(self, other: Period | PeriodArray) -> npt.NDArray[np.object_
         self._check_compatible_with(other)
 
         other_i8, o_mask = self._get_i8_values_and_mask(other)
-        new_i8_data = checked_add_with_arr(
-            self.asi8, -other_i8, arr_mask=self._isnan, b_mask=o_mask
-        )
+        new_i8_data = add_overflowsafe(self.asi8, np.asarray(-other_i8, dtype="i8"))
         new_data = np.array([self.freq.base * x for x in new_i8_data])
 
         if o_mask is None:
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -25,6 +25,7 @@
     NaT,
     NaTType,
     Timedelta,
+    add_overflowsafe,
     astype_overflowsafe,
     dt64arr_to_periodarr as c_dt64arr_to_periodarr,
     get_unit_from_dtype,
@@ -72,7 +73,6 @@
 )
 from pandas.core.dtypes.missing import isna
 
-import pandas.core.algorithms as algos
 from pandas.core.arrays import datetimelike as dtl
 import pandas.core.common as com
 
@@ -855,7 +855,7 @@ def _addsub_int_array_or_scalar(
         assert op in [operator.add, operator.sub]
         if op is operator.sub:
             other = -other
-        res_values = algos.checked_add_with_arr(self.asi8, other, arr_mask=self._isnan)
+        res_values = add_overflowsafe(self.asi8, np.asarray(other, dtype="i8"))
         return type(self)(res_values, dtype=self.dtype)
 
     def _add_offset(self, other: BaseOffset):
@@ -920,12 +920,7 @@ def _add_timedelta_arraylike(
                 "not an integer multiple of the PeriodArray's freq."
             ) from err
 
-        b_mask = np.isnat(delta)
-
-        res_values = algos.checked_add_with_arr(
-            self.asi8, delta.view("i8"), arr_mask=self._isnan, b_mask=b_mask
-        )
-        np.putmask(res_values, self._isnan | b_mask, iNaT)
+        res_values = add_overflowsafe(self.asi8, np.asarray(delta.view("i8")))
         return type(self)(res_values, dtype=self.dtype)
 
     def _check_timedeltalike_freq_compat(self, other):
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
@@ -1837,57 +1837,6 @@ def test_pct_max_many_rows(self):
         assert result == 1
 
 
-def test_int64_add_overflow():
-    # see gh-14068
-    msg = "Overflow in int64 addition"
-    m = np.iinfo(np.int64).max
-    n = np.iinfo(np.int64).min
-
-    with pytest.raises(OverflowError, match=msg):
-        algos.checked_add_with_arr(np.array([m, m]), m)
-    with pytest.raises(OverflowError, match=msg):
-        algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]))
-    with pytest.raises(OverflowError, match=msg):
-        algos.checked_add_with_arr(np.array([n, n]), n)
-    with pytest.raises(OverflowError, match=msg):
-        algos.checked_add_with_arr(np.array([n, n]), np.array([n, n]))
-    with pytest.raises(OverflowError, match=msg):
-        algos.checked_add_with_arr(np.array([m, n]), np.array([n, n]))
-    with pytest.raises(OverflowError, match=msg):
-        algos.checked_add_with_arr(
-            np.array([m, m]), np.array([m, m]), arr_mask=np.array([False, True])
-        )
-    with pytest.raises(OverflowError, match=msg):
-        algos.checked_add_with_arr(
-            np.array([m, m]), np.array([m, m]), b_mask=np.array([False, True])
-        )
-    with pytest.raises(OverflowError, match=msg):
-        algos.checked_add_with_arr(
-            np.array([m, m]),
-            np.array([m, m]),
-            arr_mask=np.array([False, True]),
-            b_mask=np.array([False, True]),
-        )
-    with pytest.raises(OverflowError, match=msg):
-        algos.checked_add_with_arr(np.array([m, m]), np.array([np.nan, m]))
-
-    # Check that the nan boolean arrays override whether or not
-    # the addition overflows. We don't check the result but just
-    # the fact that an OverflowError is not raised.
-    algos.checked_add_with_arr(
-        np.array([m, m]), np.array([m, m]), arr_mask=np.array([True, True])
-    )
-    algos.checked_add_with_arr(
-        np.array([m, m]), np.array([m, m]), b_mask=np.array([True, True])
-    )
-    algos.checked_add_with_arr(
-        np.array([m, m]),
-        np.array([m, m]),
-        arr_mask=np.array([True, False]),
-        b_mask=np.array([False, True]),
-    )
-
-
 class TestMode:
     def test_no_mode(self):
         exp = Series([], dtype=np.float64, index=Index([], dtype=int))
diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py