Skip to content

Commit f5361ec

Browse files
committed
add Timedelta floordiv ops
provide out-of-bounds for julian dates
1 parent bc4826f commit f5361ec

File tree

7 files changed

+163
-43
lines changed

7 files changed

+163
-43
lines changed

doc/source/whatsnew/v0.20.0.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ fixed-width text files, and :func:`read_excel` for parsing Excel files.
5959
to_datetime has gained an origin parameter
6060
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6161

62-
``pd.to_datetime`` has gained a new parameter, ``origin``, to define an offset
62+
``pd.to_datetime`` has gained a new parameter, ``origin``, to define a reference date
6363
from where to compute the resulting ``DatetimeIndex``. (:issue:`11276`, :issue:`11745`)
6464

6565
Start with 1960-01-01 as the starting date
@@ -336,7 +336,7 @@ Other Enhancements
336336
- ``pd.DataFrame.to_latex`` and ``pd.DataFrame.to_string`` now allow optional header aliases. (:issue:`15536`)
337337
- Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`)
338338
- Added ``.empty`` property to subclasses of ``Index``. (:issue:`15270`)
339-
339+
- Enabled floor division for ``Timedelta`` and ``TimedeltaIndex`` (:issue:`15828`)
340340
- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`)
341341
- ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`)
342342
- ``pandas.io.json.json_normalize()`` has gained a ``sep`` option that accepts ``str`` to separate joined fields; the default is ".", which is backward compatible. (:issue:`14883`)

pandas/_libs/tslib.pyx

+35-4
Original file line numberDiff line numberDiff line change
@@ -3073,6 +3073,7 @@ class Timedelta(_Timedelta):
30733073
return np.timedelta64(self.value, 'ns')
30743074

30753075
def _validate_ops_compat(self, other):
3076+
30763077
# return True if we are compat with operating
30773078
if _checknull_with_nat(other):
30783079
return True
@@ -3179,11 +3180,41 @@ class Timedelta(_Timedelta):
31793180
__div__ = __truediv__
31803181
__rdiv__ = __rtruediv__
31813182

3182-
def _not_implemented(self, *args, **kwargs):
3183-
return NotImplemented
3183+
def __floordiv__(self, other):
3184+
3185+
if hasattr(other, 'dtype'):
3186+
3187+
# work with i8
3188+
other = other.astype('m8[ns]').astype('i8')
3189+
3190+
return self.value // other
31843191

3185-
__floordiv__ = _not_implemented
3186-
__rfloordiv__ = _not_implemented
3192+
# integers only
3193+
if is_integer_object(other):
3194+
return Timedelta(self.value // other, unit='ns')
3195+
3196+
if not self._validate_ops_compat(other):
3197+
return NotImplemented
3198+
3199+
other = Timedelta(other)
3200+
if other is NaT:
3201+
return np.nan
3202+
return self.value // other.value
3203+
3204+
def __rfloordiv__(self, other):
3205+
if hasattr(other, 'dtype'):
3206+
3207+
# work with i8
3208+
other = other.astype('m8[ns]').astype('i8')
3209+
return other // self.value
3210+
3211+
if not self._validate_ops_compat(other):
3212+
return NotImplemented
3213+
3214+
other = Timedelta(other)
3215+
if other is NaT:
3216+
return NaT
3217+
return other.value // self.value
31873218

31883219
def _op_unary_method(func, name):
31893220

pandas/tests/indexes/datetimes/test_tools.py

+39-5
Original file line numberDiff line numberDiff line change
@@ -1572,29 +1572,63 @@ def test_to_basic(self, julian_dates):
15721572
Timestamp('1970-01-03')])
15731573
assert_series_equal(result, expected)
15741574

1575+
def test_julian_round_trip(self):
1576+
result = pd.to_datetime(2456658, origin='julian', unit='D')
1577+
assert result.to_julian_date() == 2456658
1578+
1579+
# out-of-bounds
1580+
with pytest.raises(ValueError):
1581+
pd.to_datetime(1, origin="julian", unit='D')
1582+
15751583
def test_invalid_unit(self, units, julian_dates):
15761584

15771585
# checking for invalid combination of origin='julian' and unit != D
15781586
if units != 'D':
15791587
with pytest.raises(ValueError):
15801588
pd.to_datetime(julian_dates, unit=units, origin='julian')
15811589

1590+
def test_invalid_origin(self):
1591+
1592+
# need to have a numeric specified
1593+
with pytest.raises(ValueError):
1594+
pd.to_datetime("2005-01-01", origin="1960-01-01")
1595+
1596+
with pytest.raises(ValueError):
1597+
pd.to_datetime("2005-01-01", origin="1960-01-01", unit='D')
1598+
15821599
def test_epoch(self, units, epochs, epoch_1960, units_from_epochs):
15831600

15841601
expected = Series(
15851602
[pd.Timedelta(x, unit=units) +
15861603
epoch_1960 for x in units_from_epochs])
1604+
15871605
result = Series(pd.to_datetime(
15881606
units_from_epochs, unit=units, origin=epochs))
15891607
assert_series_equal(result, expected)
15901608

15911609
@pytest.mark.parametrize("origin, exc",
1592-
[('random_string', 'cannot be converted'),
1593-
('epoch', 'cannot be converted'),
1594-
('13-24-1990', 'cannot be converted'),
1595-
('0001-01-01', 'Out of Bounds')])
1610+
[('random_string', ValueError),
1611+
('epoch', ValueError),
1612+
('13-24-1990', ValueError),
1613+
('0001-01-01', tslib.OutOfBoundsDatetime)])
15961614
def test_invalid_origins(self, origin, exc, units, units_from_epochs):
15971615

1598-
with tm.assertRaisesRegexp(ValueError, exc):
1616+
with pytest.raises(exc):
15991617
pd.to_datetime(units_from_epochs, unit=units,
16001618
origin=origin)
1619+
1620+
def test_processing_order(self):
1621+
# make sure we handle out-of-bounds *before*
1622+
# constructing the dates
1623+
1624+
result = pd.to_datetime(200 * 365, unit='D')
1625+
expected = Timestamp('2169-11-13 00:00:00')
1626+
assert result == expected
1627+
1628+
result = pd.to_datetime(200 * 365, unit='D', origin='1870-01-01')
1629+
expected = Timestamp('2069-11-13 00:00:00')
1630+
assert result == expected
1631+
1632+
result = pd.to_datetime(300 * 365, unit='D', origin='1870-01-01')
1633+
expected = Timestamp('2169-10-20 00:00:00')
1634+
assert result == expected

pandas/tests/indexes/timedeltas/test_ops.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,12 @@ def test_ops_compat(self):
284284
result = rng / offset
285285
tm.assert_index_equal(result, expected, exact=False)
286286

287+
# floor divide
288+
expected = Int64Index((np.arange(10) + 1) * 12, name='foo')
289+
for offset in offsets:
290+
result = rng // offset
291+
tm.assert_index_equal(result, expected, exact=False)
292+
287293
# divide with nats
288294
rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo')
289295
expected = Float64Index([12, np.nan, 24], name='foo')
@@ -867,20 +873,19 @@ def test_ops(self):
867873
self.assertEqual(td * 2, Timedelta(20, unit='d'))
868874
self.assertTrue((td * pd.NaT) is pd.NaT)
869875
self.assertEqual(td / 2, Timedelta(5, unit='d'))
876+
self.assertEqual(td // 2, Timedelta(5, unit='d'))
870877
self.assertEqual(abs(td), td)
871878
self.assertEqual(abs(-td), td)
872879
self.assertEqual(td / td, 1)
873880
self.assertTrue((td / pd.NaT) is np.nan)
881+
self.assertTrue((td // pd.NaT) is np.nan)
874882

875883
# invert
876884
self.assertEqual(-td, Timedelta('-10d'))
877885
self.assertEqual(td * -1, Timedelta('-10d'))
878886
self.assertEqual(-1 * td, Timedelta('-10d'))
879887
self.assertEqual(abs(-td), Timedelta('10d'))
880888

881-
# invalid
882-
self.assertRaises(TypeError, lambda: Timedelta(11, unit='d') // 2)
883-
884889
# invalid multiply with another timedelta
885890
self.assertRaises(TypeError, lambda: td * td)
886891

@@ -991,7 +996,7 @@ class Other:
991996
self.assertTrue(td.__sub__(other) is NotImplemented)
992997
self.assertTrue(td.__truediv__(other) is NotImplemented)
993998
self.assertTrue(td.__mul__(other) is NotImplemented)
994-
self.assertTrue(td.__floordiv__(td) is NotImplemented)
999+
self.assertTrue(td.__floordiv__(other) is NotImplemented)
9951000

9961001
def test_ops_error_str(self):
9971002
# GH 13624

pandas/tests/scalar/test_timedelta.py

+10
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ def test_conversion(self):
216216

217217
def test_freq_conversion(self):
218218

219+
# truediv
219220
td = Timedelta('1 days 2 hours 3 ns')
220221
result = td / np.timedelta64(1, 'D')
221222
self.assertEqual(result, td.value / float(86400 * 1e9))
@@ -224,6 +225,15 @@ def test_freq_conversion(self):
224225
result = td / np.timedelta64(1, 'ns')
225226
self.assertEqual(result, td.value)
226227

228+
# floordiv
229+
td = Timedelta('1 days 2 hours 3 ns')
230+
result = td // np.timedelta64(1, 'D')
231+
self.assertEqual(result, 1)
232+
result = td // np.timedelta64(1, 's')
233+
self.assertEqual(result, 93600)
234+
result = td // np.timedelta64(1, 'ns')
235+
self.assertEqual(result, td.value)
236+
227237
def test_fields(self):
228238
def check(value):
229239
# that we are int/long like

pandas/tseries/tdi.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -326,15 +326,15 @@ def _add_delta(self, delta):
326326
def _evaluate_with_timedelta_like(self, other, op, opstr):
327327

328328
# allow division by a timedelta
329-
if opstr in ['__div__', '__truediv__']:
329+
if opstr in ['__div__', '__truediv__', '__floordiv__']:
330330
if _is_convertible_to_td(other):
331331
other = Timedelta(other)
332332
if isnull(other):
333333
raise NotImplementedError(
334334
"division by pd.NaT not implemented")
335335

336336
i8 = self.asi8
337-
result = i8 / float(other.value)
337+
result = op(i8, other.value)
338338
result = self._maybe_mask_results(result, convert='float64')
339339
return Index(result, name=self.name, copy=False)
340340

pandas/tseries/tools.py

+66-26
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@
99
is_datetime64_dtype,
1010
is_datetime64tz_dtype,
1111
is_integer_dtype,
12-
is_list_like)
12+
is_integer,
13+
is_float,
14+
is_list_like,
15+
is_scalar,
16+
is_numeric_dtype)
1317
from pandas.types.generic import (ABCIndexClass, ABCSeries,
1418
ABCDataFrame)
1519
from pandas.types.missing import notnull
@@ -238,10 +242,8 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
238242
datetime strings, and if it can be inferred, switch to a faster
239243
method of parsing them. In some cases this can increase the parsing
240244
speed by ~5-10x.
241-
origin : scalar convertible to Timestamp / string ('julian', 'unix'),
242-
default 'unix'.
243-
Define reference date. The numeric values would be parsed as number
244-
of units (defined by `unit`) since this reference date.
245+
origin : scalar, default is 'unix'
246+
convertible to Timestamp / string ('julian', 'unix')
245247
246248
- If 'unix' (or POSIX) time; origin is set to 1970-01-01.
247249
- If 'julian', unit must be 'D', and origin is set to beginning of
@@ -312,8 +314,10 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
312314
>>> %timeit pd.to_datetime(s,infer_datetime_format=False)
313315
1 loop, best of 3: 471 ms per loop
314316
315-
Using non-epoch origins to parse date
316-
>>> pd.to_datetime([1,2,3], unit='D', origin=pd.Timestamp('1960-01-01'))
317+
Using a non-unix epoch origin
318+
319+
>>> pd.to_datetime([1, 2, 3], unit='D',
320+
origin=pd.Timestamp('1960-01-01'))
317321
0 1960-01-02
318322
1 1960-01-03
319323
2 1960-01-04
@@ -429,18 +433,68 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
429433
except (ValueError, TypeError):
430434
raise e
431435

432-
# perform the conversion
436+
if arg is None:
437+
return None
438+
439+
# handle origin
433440
if origin == 'julian':
441+
442+
original = arg
443+
j0 = tslib.Timestamp(0).to_julian_date()
434444
if unit != 'D':
435445
raise ValueError("unit must be 'D' for origin='julian'")
436446
try:
437-
arg = arg - tslib.Timestamp(0).to_julian_date()
447+
arg = arg - j0
438448
except:
439449
raise ValueError("incompatible 'arg' type for given "
440450
"'origin'='julian'")
441-
if arg is None:
442-
result = arg
443-
elif isinstance(arg, tslib.Timestamp):
451+
452+
# premptively check this for a nice range
453+
j_max = tslib.Timestamp.max.to_julian_date() - j0
454+
j_min = tslib.Timestamp.min.to_julian_date() - j0
455+
if np.any(arg > j_max) or np.any(arg < j_min):
456+
raise tslib.OutOfBoundsDatetime(
457+
"{original} is Out of Bounds for "
458+
"origin='julian'".format(original=original))
459+
460+
elif origin not in ['unix', 'julian']:
461+
462+
# arg must be a numeric
463+
original = arg
464+
if not ((is_scalar(arg) and (is_integer(arg) or is_float(arg))) or
465+
is_numeric_dtype(np.asarray(arg))):
466+
raise ValueError(
467+
"'{arg}' is not compatible with origin='{origin}'; "
468+
"it must be numeric with a unit specified ".format(
469+
arg=arg,
470+
origin=origin))
471+
472+
# we are going to offset back to unix / epoch time
473+
try:
474+
offset = tslib.Timestamp(origin) - tslib.Timestamp(0)
475+
except tslib.OutOfBoundsDatetime:
476+
raise tslib.OutOfBoundsDatetime(
477+
"origin {} is Out of Bounds".format(origin))
478+
except ValueError:
479+
raise ValueError("origin {} cannot be converted "
480+
"to a Timestamp".format(origin))
481+
482+
# convert the offset to the unit of the arg
483+
# this should be lossless in terms of precision
484+
offset = offset // tslib.Timedelta(1, unit=unit)
485+
486+
arg = np.asarray(arg)
487+
arg = arg + offset
488+
489+
# convert to the tenor of the original arg
490+
if is_scalar(original):
491+
arg = arg.item()
492+
elif isinstance(original, ABCSeries):
493+
arg = type(original)(arg, index=original.index, name=original.name)
494+
elif isinstance(original, ABCIndexClass):
495+
arg = type(original)(arg)
496+
497+
if isinstance(arg, tslib.Timestamp):
444498
result = arg
445499
elif isinstance(arg, ABCSeries):
446500
from pandas import Series
@@ -455,20 +509,6 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
455509
else:
456510
result = _convert_listlike(np.array([arg]), box, format)[0]
457511

458-
# handle origin
459-
if origin not in ['unix', 'julian']:
460-
try:
461-
offset = tslib.Timestamp(origin) - tslib.Timestamp(0)
462-
except tslib.OutOfBoundsDatetime:
463-
raise ValueError(
464-
"origin {} is Out of Bounds".format(origin))
465-
except ValueError:
466-
raise ValueError("origin {} cannot be converted "
467-
"to a Timestamp".format(origin))
468-
469-
if offset is not None:
470-
result = result + offset
471-
472512
return result
473513

474514

0 commit comments

Comments
 (0)