Skip to content

Commit f08ced1

Browse files
sumitbinnanijreback
authored andcommitted
BUG: Series creation with datetime64 with non-ns unit as object dtype
closes pandas-dev#11275 closes pandas-dev#11745
1 parent 34c6bd0 commit f08ced1

File tree

3 files changed

+115
-19
lines changed

3 files changed

+115
-19
lines changed

Diff for: doc/source/whatsnew/v0.20.0.txt

+13-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ Check the :ref:`API Changes <whatsnew_0200.api_breaking>` and :ref:`deprecations
2727
New features
2828
~~~~~~~~~~~~
2929

30-
3130
.. _whatsnew_0200.enhancements.dataio_dtype:
3231

3332
``dtype`` keyword for data IO
@@ -55,6 +54,19 @@ fixed-width text files, and :func:`read_excel` for parsing Excel files.
5554
pd.read_fwf(StringIO(data)).dtypes
5655
pd.read_fwf(StringIO(data), dtype={'a':'float64', 'b':'object'}).dtypes
5756

57+
.. _whatsnew_0120.enhancements.datetime_origin:
58+
59+
to_datetime can be used with Offset
60+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
61+
``pd.to_datetime`` has gained a new parameter, ``origin``, to define an offset
62+
from where to compute the resulting ``DatetimeIndex``. (:issue:`11276`, :issue:`11745`)
63+
64+
.. ipython:: python
65+
66+
to_datetime([1,2,3], unit='D', origin=pd.Timestamp('1960-01-01'))
67+
68+
The above code would return days with offset from origin as defined by timestamp set by origin.
69+
5870
.. _whatsnew_0200.enhancements.groupby_access:
5971

6072
Groupby Enhancements

Diff for: pandas/tests/indexes/datetimes/test_tools.py

+44
Original file line numberDiff line numberDiff line change
@@ -1515,3 +1515,47 @@ def test_normalize_date():
15151515

15161516
result = normalize_date(value)
15171517
assert (result == datetime(2012, 9, 7))
1518+
1519+
1520+
def test_to_datetime_origin():
1521+
units = ['D', 's', 'ms', 'us', 'ns']
1522+
# gh-11276, gh-11745
1523+
# for origin as julian
1524+
1525+
julian_dates = pd.date_range(
1526+
'2014-1-1', periods=10).to_julian_date().values
1527+
result = Series(pd.to_datetime(
1528+
julian_dates, unit='D', origin='julian'))
1529+
expected = Series(pd.to_datetime(
1530+
julian_dates - pd.Timestamp(0).to_julian_date(), unit='D'))
1531+
assert_series_equal(result, expected)
1532+
1533+
# checking for invalid combination of origin='julian' and unit != D
1534+
for unit in units:
1535+
if unit == 'D':
1536+
continue
1537+
with pytest.raises(ValueError):
1538+
pd.to_datetime(julian_dates, unit=unit, origin='julian')
1539+
1540+
# for origin as 1960-01-01
1541+
epoch_1960 = pd.Timestamp('1960-01-01')
1542+
epoch_timestamp_convertible = [epoch_1960, epoch_1960.to_datetime(),
1543+
epoch_1960.to_datetime64(),
1544+
str(epoch_1960)]
1545+
invalid_origins = ['random_string', '13-24-1990', '0001-01-01']
1546+
units_from_epoch = [0, 1, 2, 3, 4]
1547+
1548+
for unit in units:
1549+
for epoch in epoch_timestamp_convertible:
1550+
expected = Series(
1551+
[pd.Timedelta(x, unit=unit) +
1552+
epoch_1960 for x in units_from_epoch])
1553+
result = Series(pd.to_datetime(
1554+
units_from_epoch, unit=unit, origin=epoch))
1555+
assert_series_equal(result, expected)
1556+
1557+
# check for invalid origins
1558+
for origin in invalid_origins:
1559+
with pytest.raises(ValueError):
1560+
pd.to_datetime(units_from_epoch, unit=unit,
1561+
origin=origin)

Diff for: pandas/tseries/tools.py

+58-18
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,8 @@ def _guess_datetime_format_for_array(arr, **kwargs):
176176

177177

178178
def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
179-
utc=None, box=True, format=None, exact=True,
180-
unit=None, infer_datetime_format=False):
179+
utc=None, box=True, format=None, exact=True, coerce=None,
180+
unit=None, infer_datetime_format=False, origin='epoch'):
181181
"""
182182
Convert argument to datetime.
183183
@@ -236,6 +236,19 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
236236
datetime strings, and if it can be inferred, switch to a faster
237237
method of parsing them. In some cases this can increase the parsing
238238
speed by ~5-10x.
239+
origin : scalar convertible to Timestamp / string ('julian', 'epoch'),
240+
default 'epoch'.
241+
Define reference date. The numeric values would be parsed as number
242+
of units (defined by `unit`) since this reference date.
243+
244+
- If 'epoch', origin is set to 1970-01-01.
245+
- If 'julian', unit must be 'D', and origin is set to beginning of
246+
Julian Calendar. Julian day number 0 is assigned to the day starting
247+
at noon on January 1, 4713 BC.
248+
- If Timestamp convertible, origin is set to Timestamp identified by
249+
origin.
250+
251+
.. versionadded: 0.19.0
239252
240253
Returns
241254
-------
@@ -297,8 +310,14 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
297310
>>> %timeit pd.to_datetime(s,infer_datetime_format=False)
298311
1 loop, best of 3: 471 ms per loop
299312
300-
"""
313+
Using non-epoch origins to parse date
314+
315+
>>> pd.to_datetime([1,2,3], unit='D', origin=pd.Timestamp('1960-01-01'))
316+
0 1960-01-02
317+
1 1960-01-03
318+
2 1960-01-04
301319
320+
"""
302321
from pandas.tseries.index import DatetimeIndex
303322

304323
tz = 'utc' if utc else None
@@ -409,22 +428,43 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
409428
except (ValueError, TypeError):
410429
raise e
411430

412-
if arg is None:
413-
return arg
414-
elif isinstance(arg, tslib.Timestamp):
415-
return arg
416-
elif isinstance(arg, ABCSeries):
417-
from pandas import Series
418-
values = _convert_listlike(arg._values, False, format)
419-
return Series(values, index=arg.index, name=arg.name)
420-
elif isinstance(arg, (ABCDataFrame, MutableMapping)):
421-
return _assemble_from_unit_mappings(arg, errors=errors)
422-
elif isinstance(arg, ABCIndexClass):
423-
return _convert_listlike(arg, box, format, name=arg.name)
424-
elif is_list_like(arg):
425-
return _convert_listlike(arg, box, format)
431+
def intermediate_result(arg):
432+
if origin == 'julian':
433+
if unit != 'D':
434+
raise ValueError("unit must be 'D' for origin='julian'")
435+
try:
436+
arg = arg - tslib.Timestamp(0).to_julian_date()
437+
except:
438+
raise ValueError("incompatible 'arg' type for given "
439+
"'origin'='julian'")
440+
if arg is None:
441+
return arg
442+
elif isinstance(arg, tslib.Timestamp):
443+
return arg
444+
elif isinstance(arg, ABCSeries):
445+
from pandas import Series
446+
values = _convert_listlike(arg._values, False, format)
447+
return Series(values, index=arg.index, name=arg.name)
448+
elif isinstance(arg, (ABCDataFrame, MutableMapping)):
449+
return _assemble_from_unit_mappings(arg, errors=errors)
450+
elif isinstance(arg, ABCIndexClass):
451+
return _convert_listlike(arg, box, format, name=arg.name)
452+
elif is_list_like(arg):
453+
return _convert_listlike(arg, box, format)
454+
return _convert_listlike(np.array([arg]), box, format)[0]
455+
456+
result = intermediate_result(arg)
457+
458+
offset = None
459+
if origin not in ['epoch', 'julian']:
460+
try:
461+
offset = tslib.Timestamp(origin) - tslib.Timestamp(0)
462+
except ValueError:
463+
raise ValueError("Invalid 'origin' or 'origin' Out of Bound")
426464

427-
return _convert_listlike(np.array([arg]), box, format)[0]
465+
if offset is not None:
466+
result = result + offset
467+
return result
428468

429469

430470
# mappings for assembling units

0 commit comments

Comments
 (0)