pandas-dev · jreback · Oct 1, 2018 · Sep 18, 2018 · Sep 9, 2018 · Sep 22, 2018
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -646,6 +646,7 @@ Datetimelike
 - Bug in :class:`DatetimeIndex` subtraction that incorrectly failed to raise ``OverflowError`` (:issue:`22492`, :issue:`22508`)
 - Bug in :class:`DatetimeIndex` incorrectly allowing indexing with ``Timedelta`` object (:issue:`20464`)
 - Bug in :class:`DatetimeIndex` where frequency was being set if original frequency was ``None`` (:issue:`22150`)
+- Bug in rounding methods of :class:`DatetimeIndex` (:meth:`~DatetimeIndex.round`, :meth:`~DatetimeIndex.ceil`, :meth:`~DatetimeIndex.floor`) and :class:`Timestamp` (:meth:`~Timestamp.round`, :meth:`~Timestamp.ceil`, :meth:`~Timestamp.floor`) could give rise to loss of precision (:issue:`22591`)
 
 Timedelta
 ^^^^^^^^^

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
@@ -22,6 +22,7 @@ cimport ccalendar
 from conversion import tz_localize_to_utc, normalize_i8_timestamps
 from conversion cimport (tz_convert_single, _TSObject,
                          convert_to_tsobject, convert_datetime_to_tsobject)
+import enum
 from fields import get_start_end_field, get_date_name_field
 from nattype import NaT
 from nattype cimport NPY_NAT
@@ -57,50 +58,114 @@ cdef inline object create_timestamp_from_ts(int64_t value,
     return ts_base
 
 
-def round_ns(values, rounder, freq):
+@enum.unique
+class RoundTo(enum.Enum):
     """
-    Applies rounding function at given frequency
+    enumeration defining the available rounding modes
+
+    Attributes
+    ----------
+    MINUS_INFTY
+        round towards -∞, or floor [2]_
+    PLUS_INFTY
+        round towards +∞, or ceil [3]_
+    NEAREST_HALF_EVEN
+        round to nearest, tie-break half to even [6]_
+    NEAREST_HALF_MINUS_INFTY
+        round to nearest, tie-break half to -∞ [5]_
+    NEAREST_HALF_PLUS_INFTY
+        round to nearest, tie-break half to +∞ [4]_
+
+
+    References
+    ----------
+    .. [1] "Rounding - Wikipedia"
+           https://en.wikipedia.org/wiki/Rounding
+    .. [2] "Rounding down"
+           https://en.wikipedia.org/wiki/Rounding#Rounding_down
+    .. [3] "Rounding up"
+           https://en.wikipedia.org/wiki/Rounding#Rounding_up
+    .. [4] "Round half up"
+           https://en.wikipedia.org/wiki/Rounding#Round_half_up
+    .. [5] "Round half down"
+           https://en.wikipedia.org/wiki/Rounding#Round_half_down
+    .. [6] "Round half to even"
+           https://en.wikipedia.org/wiki/Rounding#Round_half_to_even
+    """
+    MINUS_INFTY = 0
+    PLUS_INFTY = 1
+    NEAREST_HALF_EVEN = 2
+    NEAREST_HALF_PLUS_INFTY = 3
+    NEAREST_HALF_MINUS_INFTY = 4
+
+
+cdef inline _npdivmod(x1, x2):
+    """implement divmod for numpy < 1.13"""
+    return np.floor_divide(x1, x2), np.remainder(x1, x2)
+
+
+try:
+    from numpy import divmod as npdivmod
+except ImportError:
+    npdivmod = _npdivmod
+
+
+cdef inline _floor_int64(values, unit):
+    return values - np.remainder(values, unit)
+
+cdef inline _ceil_int64(values, unit):
+    return values + np.remainder(-values, unit)
+
+cdef inline _rounddown_int64(values, unit):
+    return _ceil_int64(values - unit//2, unit)
+
+cdef inline _roundup_int64(values, unit):
+    return _floor_int64(values + unit//2, unit)
+
+
+def round_nsint64(values, mode, freq):
+    """
+    Applies rounding mode at given frequency
 
     Parameters
     ----------
     values : :obj:`ndarray`
-    rounder : function, eg. 'ceil', 'floor', 'round'
+    mode : instance of `RoundTo` enumeration
     freq : str, obj
 
     Returns
     -------
     :obj:`ndarray`
     """
+
+    if not isinstance(mode, RoundTo):
+        raise ValueError('mode should be a RoundTo member')
+
     unit = to_offset(freq).nanos
 
-    # GH21262 If the Timestamp is multiple of the freq str
-    # don't apply any rounding
-    mask = values % unit == 0
-    if mask.all():
-        return values
-    r = values.copy()
-
-    if unit < 1000:
-        # for nano rounding, work with the last 6 digits separately
-        # due to float precision
-        buff = 1000000
-        r[~mask] = (buff * (values[~mask] // buff) +
-                    unit * (rounder((values[~mask] % buff) *
-                            (1 / float(unit)))).astype('i8'))
-    else:
-        if unit % 1000 != 0:
-            msg = 'Precision will be lost using frequency: {}'
-            warnings.warn(msg.format(freq))
-        # GH19206
-        # to deal with round-off when unit is large
-        if unit >= 1e9:
-            divisor = 10 ** int(np.log10(unit / 1e7))
-        else:
-            divisor = 10
-        r[~mask] = (unit * rounder((values[~mask] *
-                    (divisor / float(unit))) / divisor)
-                    .astype('i8'))
-    return r
+    if mode is RoundTo.MINUS_INFTY:
+        return _floor_int64(values, unit)
+    elif mode is RoundTo.PLUS_INFTY:
+        return _ceil_int64(values, unit)
+    elif mode is RoundTo.NEAREST_HALF_MINUS_INFTY:
+        return _rounddown_int64(values, unit)
+    elif mode is RoundTo.NEAREST_HALF_PLUS_INFTY:
+        return _roundup_int64(values, unit)
+    elif mode is RoundTo.NEAREST_HALF_EVEN:
+        # for odd unit there is no need of a tie break
+        if unit % 2:
+            return _rounddown_int64(values, unit)
+        quotient, remainder = npdivmod(values, unit)
+        mask = np.logical_or(
+            remainder > (unit // 2),
+            np.logical_and(remainder == (unit // 2), quotient % 2)
+        )
+        quotient[mask] += 1
+        return quotient * unit
+
+    # if/elif above should catch all rounding modes defined in enum 'RoundTo':
+    # if flow of control arrives here, it is a bug
+    assert False, "round_nsint64 called with an unrecognized rounding mode"
 
 
 # This is PITA. Because we inherit from datetime, which has very specific
@@ -656,7 +721,7 @@ class Timestamp(_Timestamp):
 
         return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)
 
-    def _round(self, freq, rounder, ambiguous='raise'):
+    def _round(self, freq, mode, ambiguous='raise'):
         if self.tz is not None:
             value = self.tz_localize(None).value
         else:
@@ -665,7 +730,7 @@ class Timestamp(_Timestamp):
         value = np.array([value], dtype=np.int64)
 
         # Will only ever contain 1 element for timestamp
-        r = round_ns(value, rounder, freq)[0]
+        r = round_nsint64(value, mode, freq)[0]
         result = Timestamp(r, unit='ns')
         if self.tz is not None:
             result = result.tz_localize(self.tz, ambiguous=ambiguous)
@@ -694,7 +759,7 @@ class Timestamp(_Timestamp):
         ------
         ValueError if the freq cannot be converted
         """
-        return self._round(freq, np.round, ambiguous)
+        return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous)
 
     def floor(self, freq, ambiguous='raise'):
         """
@@ -715,7 +780,7 @@ class Timestamp(_Timestamp):
         ------
         ValueError if the freq cannot be converted
         """
-        return self._round(freq, np.floor, ambiguous)
+        return self._round(freq, RoundTo.MINUS_INFTY, ambiguous)
 
     def ceil(self, freq, ambiguous='raise'):
         """
@@ -736,7 +801,7 @@ class Timestamp(_Timestamp):
         ------
         ValueError if the freq cannot be converted
         """
-        return self._round(freq, np.ceil, ambiguous)
+        return self._round(freq, RoundTo.PLUS_INFTY, ambiguous)
 
     @property
     def tz(self):

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -11,7 +11,7 @@
 import numpy as np
 
 from pandas._libs import lib, iNaT, NaT
-from pandas._libs.tslibs.timestamps import round_ns
+from pandas._libs.tslibs.timestamps import round_nsint64, RoundTo
 
 from pandas.core.dtypes.common import (
     ensure_int64,
@@ -180,10 +180,10 @@ class TimelikeOps(object):
         """
     )
 
-    def _round(self, freq, rounder, ambiguous):
+    def _round(self, freq, mode, ambiguous):
         # round the local times
         values = _ensure_datetimelike_to_i8(self)
-        result = round_ns(values, rounder, freq)
+        result = round_nsint64(values, mode, freq)
         result = self._maybe_mask_results(result, fill_value=NaT)
 
         attribs = self._get_attributes_dict()
@@ -197,15 +197,15 @@ def _round(self, freq, rounder, ambiguous):
 
     @Appender((_round_doc + _round_example).format(op="round"))
     def round(self, freq, ambiguous='raise'):
-        return self._round(freq, np.round, ambiguous)
+        return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous)
 
     @Appender((_round_doc + _floor_example).format(op="floor"))
     def floor(self, freq, ambiguous='raise'):
-        return self._round(freq, np.floor, ambiguous)
+        return self._round(freq, RoundTo.MINUS_INFTY, ambiguous)
 
     @Appender((_round_doc + _ceil_example).format(op="ceil"))
     def ceil(self, freq, ambiguous='raise'):
-        return self._round(freq, np.ceil, ambiguous)
+        return self._round(freq, RoundTo.PLUS_INFTY, ambiguous)
 
 
 class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):

diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py
@@ -11,6 +11,7 @@
 import pandas as pd
 
 from pandas import date_range, Timestamp, DatetimeIndex
+from pandas.tseries.frequencies import to_offset
 
 
 class TestDatetimeIndexOps(object):
@@ -124,7 +125,7 @@ def test_round(self, tz_naive_fixture):
         expected = DatetimeIndex(['2016-10-17 12:00:00.001501030'])
         tm.assert_index_equal(result, expected)
 
-        with tm.assert_produces_warning():
+        with tm.assert_produces_warning(False):
             ts = '2016-10-17 12:00:00.001501031'
             DatetimeIndex([ts]).round('1010ns')
 
@@ -169,6 +170,46 @@ def test_ceil_floor_edge(self, test_input, rounder, freq, expected):
         expected = DatetimeIndex(list(expected))
         assert expected.equals(result)
 
+    @pytest.mark.parametrize('start, index_freq, periods', [
+        ('2018-01-01', '12H', 25),
+        ('2018-01-01 0:0:0.124999', '1ns', 1000),
+    ])
+    @pytest.mark.parametrize('round_freq', [
+        '2ns', '3ns', '4ns', '5ns', '6ns', '7ns',
+        '250ns', '500ns', '750ns',
+        '1us', '19us', '250us', '500us', '750us',
+        '1s', '2s', '3s',
+        '12H', '1D',
+    ])
+    def test_round_int64(self, start, index_freq, periods, round_freq):
+        dt = DatetimeIndex(start=start, freq=index_freq, periods=periods)
+        unit = to_offset(round_freq).nanos
+
+        # test floor
+        result = dt.floor(round_freq)
+        diff = dt.asi8 - result.asi8
+        mod = result.asi8 % unit
+        assert (mod == 0).all(), "floor not a {} multiple".format(round_freq)
+        assert (0 <= diff).all() and (diff < unit).all(), "floor error"
+
+        # test ceil
+        result = dt.ceil(round_freq)
+        diff = result.asi8 - dt.asi8
+        mod = result.asi8 % unit
+        assert (mod == 0).all(), "ceil not a {} multiple".format(round_freq)
+        assert (0 <= diff).all() and (diff < unit).all(), "ceil error"
+
+        # test round
+        result = dt.round(round_freq)
+        diff = abs(result.asi8 - dt.asi8)
+        mod = result.asi8 % unit
+        assert (mod == 0).all(), "round not a {} multiple".format(round_freq)
+        assert (diff <= unit // 2).all(), "round error"
+        if unit % 2 == 0:
+            assert (
+                result.asi8[diff == unit // 2] % 2 == 0
+            ).all(), "round half to even error"
+
     # ----------------------------------------------------------------
     # DatetimeIndex.normalize
 

diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py
@@ -13,6 +13,7 @@
 from pandas._libs.tslibs import conversion
 from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG
 from pandas import Timestamp, NaT
+from pandas.tseries.frequencies import to_offset
 
 
 class TestTimestampUnaryOps(object):
@@ -70,7 +71,7 @@ def test_round_subsecond(self):
         assert result == expected
 
     def test_round_nonstandard_freq(self):
-        with tm.assert_produces_warning():
+        with tm.assert_produces_warning(False):
             Timestamp('2016-10-17 12:00:00.001501031').round('1010ns')
 
     def test_round_invalid_arg(self):
@@ -154,6 +155,46 @@ def test_round_dst_border(self, method):
         with pytest.raises(pytz.AmbiguousTimeError):
             getattr(ts, method)('H', ambiguous='raise')
 
+    @pytest.mark.parametrize('timestamp', [
+        '2018-01-01 0:0:0.124999360',
+        '2018-01-01 0:0:0.125000367',
+        '2018-01-01 0:0:0.125500',
+        '2018-01-01 0:0:0.126500',
+        '2018-01-01 12:00:00',
+        '2019-01-01 12:00:00',
+    ])
+    @pytest.mark.parametrize('freq', [
+        '2ns', '3ns', '4ns', '5ns', '6ns', '7ns',
+        '250ns', '500ns', '750ns',
+        '1us', '19us', '250us', '500us', '750us',
+        '1s', '2s', '3s',
+        '1D',
+    ])
+    def test_round_int64(self, timestamp, freq):
+        """check that all rounding modes are accurate to int64 precision
+           see GH#22591
+        """
+        dt = Timestamp(timestamp)
+        unit = to_offset(freq).nanos
+
+        # test floor
+        result = dt.floor(freq)
+        assert result.value % unit == 0, "floor not a {} multiple".format(freq)
+        assert 0 <= dt.value - result.value < unit, "floor error"
+
+        # test ceil
+        result = dt.ceil(freq)
+        assert result.value % unit == 0, "ceil not a {} multiple".format(freq)
+        assert 0 <= result.value - dt.value < unit, "ceil error"
+
+        # test round
+        result = dt.round(freq)
+        assert result.value % unit == 0, "round not a {} multiple".format(freq)
+        assert abs(result.value - dt.value) <= unit // 2, "round error"
+        if unit % 2 == 0 and abs(result.value - dt.value) == unit // 2:
+            # round half to even
+            assert result.value // unit % 2 == 0, "round half to even error"
+
     # --------------------------------------------------------------
     # Timestamp.replace