diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 6fd0a224b81b2..1127d02f0a822 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1043,6 +1043,7 @@ Deprecations `use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`) - :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`) - Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`) +- Constructing a :class:`DatetimeIndex` from data with ``timedelta64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23675`) - The ``keep_tz=False`` option (the default) of the ``keep_tz`` keyword of :meth:`DatetimeIndex.to_series` is deprecated (:issue:`17832`). - Timezone converting a tz-aware ``datetime.datetime`` or :class:`Timestamp` with :class:`Timestamp` and the ``tz`` argument is now deprecated. Instead, use :meth:`Timestamp.tz_convert` (:issue:`23579`) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index ae366149ab899..41c28c5ee3b93 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -15,8 +15,9 @@ from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( - _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_int64_dtype, - is_object_dtype) + _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_extension_type, + is_float_dtype, is_int64_dtype, is_object_dtype, is_period_dtype, + is_timedelta64_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna @@ -1421,6 +1422,83 @@ def to_julian_date(self): DatetimeArrayMixin._add_datetimelike_methods() +# ------------------------------------------------------------------- +# Constructor Helpers + +def maybe_infer_tz(tz, inferred_tz): + """ + If a timezone is inferred from data, check that it is compatible with + the user-provided timezone, if any. + + Parameters + ---------- + tz : tzinfo or None + inferred_tz : tzinfo or None + + Returns + ------- + tz : tzinfo or None + + Raises + ------ + TypeError : if both timezones are present but do not match + """ + if tz is None: + tz = inferred_tz + elif inferred_tz is None: + pass + elif not timezones.tz_compare(tz, inferred_tz): + raise TypeError('data is already tz-aware {inferred_tz}, unable to ' + 'set specified tz: {tz}' + .format(inferred_tz=inferred_tz, tz=tz)) + return tz + + +def maybe_convert_dtype(data, copy): + """ + Convert data based on dtype conventions, issuing deprecation warnings + or errors where appropriate. + Parameters + ---------- + data : np.ndarray or pd.Index + copy : bool + Returns + ------- + data : np.ndarray or pd.Index + copy : bool + Raises + ------ + TypeError : PeriodDType data is passed + """ + if is_float_dtype(data): + # Note: we must cast to datetime64[ns] here in order to treat these + # as wall-times instead of UTC timestamps. + data = data.astype(_NS_DTYPE) + copy = False + # TODO: deprecate this behavior to instead treat symmetrically + # with integer dtypes. See discussion in GH#23675 + + elif is_timedelta64_dtype(data): + warnings.warn("Passing timedelta64-dtype data is deprecated, will " + "raise a TypeError in a future version", + FutureWarning, stacklevel=3) + data = data.view(_NS_DTYPE) + + elif is_period_dtype(data): + # Note: without explicitly raising here, PeriondIndex + # test_setops.test_join_does_not_recur fails + raise TypeError("Passing PeriodDtype data is invalid. " + "Use `data.to_timestamp()` instead") + + elif is_extension_type(data) and not is_datetime64tz_dtype(data): + # Includes categorical + # TODO: We have no tests for these + data = np.array(data, dtype=np.object_) + copy = False + + return data, copy + + def _generate_regular_range(cls, start, end, periods, freq): """ Generate a range of dates with the spans between dates described by diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1ba14ffce383b..72bfefaf33151 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -16,16 +16,17 @@ from pandas.core.dtypes.common import ( _INT64_DTYPE, _NS_DTYPE, ensure_int64, is_datetime64_dtype, - is_datetime64_ns_dtype, is_datetimetz, is_dtype_equal, is_float, - is_integer, is_integer_dtype, is_list_like, is_period_dtype, is_scalar, - is_string_like, pandas_dtype) + is_datetime64_ns_dtype, is_datetime64tz_dtype, is_datetimetz, + is_dtype_equal, is_float, is_integer, is_integer_dtype, is_list_like, + is_period_dtype, is_scalar, is_string_like, pandas_dtype) import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays.datetimes import ( - DatetimeArrayMixin as DatetimeArray, _to_m8) + DatetimeArrayMixin as DatetimeArray, _to_m8, maybe_convert_dtype, + maybe_infer_tz) from pandas.core.base import _shared_docs import pandas.core.common as com from pandas.core.indexes.base import Index, _index_shared_docs @@ -246,50 +247,49 @@ def __new__(cls, data=None, name = data.name freq, freq_infer = dtl.maybe_infer_freq(freq) + if freq is None and hasattr(data, "freq"): + # i.e. DatetimeArray/Index + freq = data.freq + verify_integrity = False # if dtype has an embedded tz, capture it tz = dtl.validate_tz_from_dtype(dtype, tz) - if not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArray)): - # other iterable of some kind - if not isinstance(data, (list, tuple)): + if not hasattr(data, "dtype"): + # e.g. list, tuple + if np.ndim(data) == 0: + # i.e. generator data = list(data) - data = np.asarray(data, dtype='O') + data = np.asarray(data) + copy = False elif isinstance(data, ABCSeries): data = data._values - # data must be Index or np.ndarray here + # By this point we are assured to have either a numpy array or Index + data, copy = maybe_convert_dtype(data, copy) + if not (is_datetime64_dtype(data) or is_datetimetz(data) or is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'): data = tools.to_datetime(data, dayfirst=dayfirst, yearfirst=yearfirst) - if isinstance(data, DatetimeArray): - if tz is None: - tz = data.tz - elif data.tz is None: - data = data.tz_localize(tz, ambiguous=ambiguous) - else: - # the tz's must match - if not timezones.tz_compare(tz, data.tz): - msg = ('data is already tz-aware {0}, unable to ' - 'set specified tz: {1}') - raise TypeError(msg.format(data.tz, tz)) - + if is_datetime64tz_dtype(data): + tz = maybe_infer_tz(tz, data.tz) subarr = data._data - if freq is None: - freq = data.freq - verify_integrity = False - elif issubclass(data.dtype.type, np.datetime64): + elif is_datetime64_dtype(data): + # tz-naive DatetimeArray/Index or ndarray[datetime64] + data = getattr(data, "_data", data) if data.dtype != _NS_DTYPE: data = conversion.ensure_datetime64ns(data) + if tz is not None: # Convert tz-naive to UTC tz = timezones.maybe_get_tz(tz) data = conversion.tz_localize_to_utc(data.view('i8'), tz, ambiguous=ambiguous) subarr = data.view(_NS_DTYPE) + else: # must be integer dtype otherwise # assume this data are epoch timestamps diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 2c6fdb3eaf03c..ee44a64514f4f 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -171,6 +171,8 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, - ndarray of Timestamps if box=False """ from pandas import DatetimeIndex + from pandas.core.arrays.datetimes import maybe_convert_dtype + if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') @@ -208,6 +210,11 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') + # warn if passing timedelta64, raise for PeriodDtype + # NB: this must come after unit transformation + orig_arg = arg + arg, _ = maybe_convert_dtype(arg, copy=False) + arg = ensure_object(arg) require_iso8601 = False @@ -231,7 +238,10 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, # shortcut formatting here if format == '%Y%m%d': try: - result = _attempt_YYYYMMDD(arg, errors=errors) + # pass orig_arg as float-dtype may have been converted to + # datetime64[ns] + orig_arg = ensure_object(orig_arg) + result = _attempt_YYYYMMDD(orig_arg, errors=errors) except (ValueError, TypeError, tslibs.OutOfBoundsDatetime): raise ValueError("cannot convert the input to " "'%Y%m%d' date format") diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 02755c7e58a1d..6651a27098630 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -14,11 +14,51 @@ from pandas import ( DatetimeIndex, Index, Timestamp, date_range, datetime, offsets, to_datetime) +from pandas.core.arrays import period_array import pandas.util.testing as tm class TestDatetimeIndex(object): + def test_dti_with_period_data_raises(self): + # GH#23675 + data = pd.PeriodIndex(['2016Q1', '2016Q2'], freq='Q') + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + DatetimeIndex(data) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + to_datetime(data) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + DatetimeIndex(period_array(data)) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + to_datetime(period_array(data)) + + def test_dti_with_timedelta64_data_deprecation(self): + # GH#23675 + data = np.array([0], dtype='m8[ns]') + with tm.assert_produces_warning(FutureWarning): + result = DatetimeIndex(data) + + assert result[0] == Timestamp('1970-01-01') + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = to_datetime(data) + + assert result[0] == Timestamp('1970-01-01') + + with tm.assert_produces_warning(FutureWarning): + result = DatetimeIndex(pd.TimedeltaIndex(data)) + + assert result[0] == Timestamp('1970-01-01') + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = to_datetime(pd.TimedeltaIndex(data)) + + assert result[0] == Timestamp('1970-01-01') + def test_construction_caching(self): df = pd.DataFrame({'dt': pd.date_range('20130101', periods=3),