pandas-dev · jreback · Dec 3, 2018 · Nov 13, 2018 · Nov 13, 2018 · Nov 13, 2018
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -1034,6 +1034,7 @@ Deprecations
   `use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`)
 - :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`)
 - Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`)
+- Constructing a :class:`DatetimeIndex` from data with ``timedelta64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23675`)
 - The ``keep_tz=False`` option (the default) of the ``keep_tz`` keyword of
   :meth:`DatetimeIndex.to_series` is deprecated (:issue:`17832`).
 

diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
@@ -858,8 +858,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
         - bool if True, treat all vals as DST. If False, treat them as non-DST
         - 'NaT' will return NaT where there are ambiguous times
 
-    nonexistent : str
-        If arraylike, must have the same length as vals
+    nonexistent : {None, "NaT", "shift", "raise"}
+        How to handle non-existent times when converting wall times to UTC
 
         .. versionadded:: 0.24.0
 

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -18,6 +18,10 @@
 
 from pandas.core.dtypes.common import (
     _NS_DTYPE,
+    is_float_dtype,
+    is_timedelta64_dtype,
+    is_period_dtype,
+    is_extension_type,
     is_object_dtype,
     is_int64_dtype,
     is_datetime64tz_dtype,
@@ -186,7 +190,7 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin):
     _freq = None
 
     @classmethod
-    def _simple_new(cls, values, freq=None, tz=None, **kwargs):
+    def _simple_new(cls, values, freq=None, tz=None):
         """
         we require the we have a dtype compat for the values
         if we are passed a non-dtype compat, then coerce using the constructor
@@ -1411,6 +1415,158 @@ def to_julian_date(self):
 DatetimeArrayMixin._add_datetimelike_methods()
 
 
+# -------------------------------------------------------------------
+# Constructor Helpers
+
+def maybe_infer_tz(tz, inferred_tz):
+    """
+    If a timezone is inferred from data, check that it is compatible with
+    the user-provided timezone, if any.
+
+    Parameters
+    ----------
+    tz : tzinfo or None
+    inferred_tz : tzinfo or None
+
+    Returns
+    -------
+    tz : tzinfo or None
+
+    Raises
+    ------
+    TypeError : if both timezones are present but do not match
+    """
+    if tz is None:
+        tz = inferred_tz
+    elif inferred_tz is None:
+        pass
+    elif not timezones.tz_compare(tz, inferred_tz):
+        raise TypeError('data is already tz-aware {inferred_tz}, unable to '
+                        'set specified tz: {tz}'
+                        .format(inferred_tz=inferred_tz, tz=tz))
+    return tz
+
+
+def maybe_convert_dtype(data, copy, has_format=False):
+    """
+    Convert data based on dtype conventions, issuing deprecation warnings
+    or errors where appropriate.
+
+    Parameters
+    ----------
+    data : np.ndarray or pd.Index
+    copy : bool
+    has_format : bool, default False
+        Indicates if the data will be passed to a parsing function with a
+        `format` kwarg.
+
+    Returns
+    -------
+    data : np.ndarray or pd.Index
+    copy : bool
+
+    Raises
+    ------
+    TypeError : PeriodDType data is passed
+    """
+
+    if is_float_dtype(data) and not has_format:
+        # Note: we must cast to datetime64[ns] here in order to treat these
+        #  as wall-times instead of UTC timestamps.
+        data = data.astype(_NS_DTYPE)
+        copy = False
+        # TODO: Why do we treat this differently from integer dtypes?
+
+    elif is_timedelta64_dtype(data):
+        warnings.warn("Passing timedelta64-dtype data is deprecated, will "
+                      "raise a TypeError in a future version",
+                      FutureWarning, stacklevel=3)
+        data = data.view(_NS_DTYPE)
+
+    elif is_period_dtype(data):
+        # Note: without explicitly raising here, PeriondIndex
+        #  test_setops.test_join_does_not_recur fails
+        raise TypeError("Passing PeriodDtype data is invalid.  "
+                        "Use `data.to_timestamp()` instead")
+
+    elif is_extension_type(data) and not is_datetime64tz_dtype(data):
+        # Includes categorical
+        # TODO: We have no tests for these
+        data = np.array(data, dtype=np.object_)
+        copy = False
+
+    return data, copy
+
+
+def objects_to_datetime64ns(data, dayfirst, yearfirst,
+                            utc=False, errors="raise",
+                            require_iso8601=False, allow_object=False):
+    """
+    Convert data to array of timestamps.
+
+    Parameters
+    ----------
+    data : np.ndarray[object]
+    dayfirst : bool
+    yearfirst : bool
+    utc : bool, default False
+        Whether to convert timezone-aware timestamps to UTC
+    errors : {'raise', 'ignore', 'coerce'}
+    allow_object : bool
+
+    Returns
+    -------
+    result : ndarray
+        np.int64 dtype if returned values represent UTC timestamps
+        np.datetime64[ns] if returned values represent wall times
+        object if mixed timezones
+    inferred_tz : tzinfo or None
+
+    Raises
+    ------
+    ValueError : if data cannot be converted to datetimes
+    """
+    assert errors in ["raise", "ignore", "coerce"]
+
+    # if str-dtype, convert
+    data = np.array(data, copy=False, dtype=np.object_)
+
+    try:
+        result, tz_parsed = tslib.array_to_datetime(
+            data,
+            errors=errors,
+            utc=utc,
+            dayfirst=dayfirst,
+            yearfirst=yearfirst,
+            require_iso8601=require_iso8601
+        )
+    except ValueError as e:
+        try:
+            values, tz_parsed = conversion.datetime_to_datetime64(data)
+            # If tzaware, these values represent unix timestamps, so we
+            #  return them as i8 to distinguish from wall times
+            return values.view('i8'), tz_parsed
+        except (ValueError, TypeError):
+            raise e
+
+    if tz_parsed is not None:
+        # We can take a shortcut since the datetime64 numpy array
+        #  is in UTC
+        # Return i8 values to denote unix timestamps
+        return result.view('i8'), tz_parsed
+    elif is_datetime64_dtype(result):
+        # returning M8[ns] denotes wall-times; since tz is None
+        #  the distinction is a thin one
+        return result, tz_parsed
+    elif is_object_dtype(result):
+        if allow_object:
+            # allowed by to_datetime, not by DatetimeIndex constructor
+            return result, tz_parsed
+        raise TypeError(result)
+    else:  # pragma: no cover
+        raise TypeError(result)
+
+
 def _generate_regular_range(cls, start, end, periods, freq):
     """
     Generate a range of dates with the spans between dates described by

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -17,16 +17,17 @@
 
 from pandas.core.dtypes.common import (
     _INT64_DTYPE, _NS_DTYPE, ensure_int64, is_datetime64_dtype,
-    is_datetime64_ns_dtype, is_datetimetz, is_dtype_equal, is_float,
-    is_integer, is_integer_dtype, is_list_like, is_period_dtype, is_scalar,
-    is_string_like, pandas_dtype)
+    is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal, is_float,
+    is_integer, is_list_like, is_object_dtype, is_period_dtype, is_scalar,
+    is_string_dtype, is_string_like, pandas_dtype)
 import pandas.core.dtypes.concat as _concat
 from pandas.core.dtypes.generic import ABCSeries
 from pandas.core.dtypes.missing import isna
 
 from pandas.core.arrays import datetimelike as dtl
 from pandas.core.arrays.datetimes import (
-    DatetimeArrayMixin as DatetimeArray, _to_m8)
+    DatetimeArrayMixin as DatetimeArray, _to_m8, maybe_convert_dtype,
+    maybe_infer_tz, objects_to_datetime64ns)
 from pandas.core.base import _shared_docs
 import pandas.core.common as com
 from pandas.core.indexes.base import Index, _index_shared_docs
@@ -248,50 +249,59 @@ def __new__(cls, data=None,
             name = data.name
 
         freq, freq_infer = dtl.maybe_infer_freq(freq)
+        if freq is None and hasattr(data, "freq"):
+            # i.e. DatetimeArray/Index
+            # TODO: Should this be the stronger condition of `freq_infer`?
+            freq = data.freq
+            verify_integrity = False
 
         # if dtype has an embedded tz, capture it
         tz = dtl.validate_tz_from_dtype(dtype, tz)
 
-        if not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArray)):
-            # other iterable of some kind
-            if not isinstance(data, (list, tuple)):
+        if not hasattr(data, "dtype"):
+            # e.g. list, tuple
+            if np.ndim(data) == 0:
+                # i.e. generator
                 data = list(data)
-            data = np.asarray(data, dtype='O')
+            data = np.asarray(data)
+            copy = False
         elif isinstance(data, ABCSeries):
             data = data._values
 
-        # data must be Index or np.ndarray here
-        if not (is_datetime64_dtype(data) or is_datetimetz(data) or
-                is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'):
-            data = tools.to_datetime(data, dayfirst=dayfirst,
-                                     yearfirst=yearfirst)
-
-        if isinstance(data, DatetimeArray):
-            if tz is None:
-                tz = data.tz
-            elif data.tz is None:
-                data = data.tz_localize(tz, ambiguous=ambiguous)
-            else:
-                # the tz's must match
-                if not timezones.tz_compare(tz, data.tz):
-                    msg = ('data is already tz-aware {0}, unable to '
-                           'set specified tz: {1}')
-                    raise TypeError(msg.format(data.tz, tz))
+        # By this point we are assured to have either a numpy array or Index
+
+        data, copy = maybe_convert_dtype(data, copy)
 
+        if is_object_dtype(data) or is_string_dtype(data):
+            # TODO: We do not have tests specific to string-dtypes,
+            #  also complex or categorical or other extension
+            copy = False
+            if lib.infer_dtype(data) == 'integer':
+                data = data.astype(np.int64)
+            else:
+                # data comes back here as either i8 to denote UTC timestamps
+                #  or M8[ns] to denote wall times
+                data, inferred_tz = objects_to_datetime64ns(
+                    data, dayfirst=dayfirst, yearfirst=yearfirst)
+                tz = maybe_infer_tz(tz, inferred_tz)
+
+        if is_datetime64tz_dtype(data):
+            tz = maybe_infer_tz(tz, data.tz)
             subarr = data._data
 
-            if freq is None:
-                freq = data.freq
-                verify_integrity = False
-        elif issubclass(data.dtype.type, np.datetime64):
+        elif is_datetime64_dtype(data):
+            # DatetimeIndex or ndarray[datetime64]
+            data = getattr(data, "_data", data)
             if data.dtype != _NS_DTYPE:
                 data = conversion.ensure_datetime64ns(data)
+
             if tz is not None:
                 # Convert tz-naive to UTC
                 tz = timezones.maybe_get_tz(tz)
                 data = conversion.tz_localize_to_utc(data.view('i8'), tz,
                                                      ambiguous=ambiguous)
             subarr = data.view(_NS_DTYPE)
+
         else:
             # must be integer dtype otherwise
             # assume this data are epoch timestamps
@@ -319,17 +329,15 @@ def __new__(cls, data=None,
         return subarr._deepcopy_if_needed(ref_to_data, copy)
 
     @classmethod
-    def _simple_new(cls, values, name=None, freq=None, tz=None,
-                    dtype=None, **kwargs):
+    def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None):
         """
         we require the we have a dtype compat for the values
         if we are passed a non-dtype compat, then coerce using the constructor
         """
         # DatetimeArray._simple_new will accept either i8 or M8[ns] dtypes
         assert isinstance(values, np.ndarray), type(values)
 
-        result = super(DatetimeIndex, cls)._simple_new(values, freq, tz,
-                                                       **kwargs)
+        result = super(DatetimeIndex, cls)._simple_new(values, freq, tz)
         result.name = name
         result._reset_identity()
         return result

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -183,6 +183,9 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
         - ndarray of Timestamps if box=False
     """
     from pandas import DatetimeIndex
+    from pandas.core.arrays.datetimes import (
+        maybe_convert_dtype, objects_to_datetime64ns)
+
     if isinstance(arg, (list, tuple)):
         arg = np.array(arg, dtype='O')
 
@@ -220,6 +223,11 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
         raise TypeError('arg must be a string, datetime, list, tuple, '
                         '1-d array, or Series')
 
+    # warn if passing timedelta64, raise for PeriodDtype
+    # NB: this must come after unit transformation
+    arg, _ = maybe_convert_dtype(arg, copy=False,
+                                 has_format=format is not None)
+
     arg = ensure_object(arg)
     require_iso8601 = False
 
@@ -236,10 +244,11 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
             require_iso8601 = not infer_datetime_format
             format = None
 
-    try:
-        result = None
+    result = None
+    tz_parsed = None
 
-        if format is not None:
+    if format is not None:
+        try:
             # shortcut formatting here
             if format == '%Y%m%d':
                 try:
@@ -268,45 +277,42 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
                         if errors == 'raise':
                             raise
                         result = arg
-
-        if result is None and (format is None or infer_datetime_format):
-            result, tz_parsed = tslib.array_to_datetime(
-                arg,
-                errors=errors,
-                utc=tz == 'utc',
-                dayfirst=dayfirst,
-                yearfirst=yearfirst,
-                require_iso8601=require_iso8601
-            )
-            if tz_parsed is not None:
-                if box:
-                    # We can take a shortcut since the datetime64 numpy array
-                    # is in UTC
-                    return DatetimeIndex._simple_new(result, name=name,
-                                                     tz=tz_parsed)
-                else:
-                    # Convert the datetime64 numpy array to an numpy array
-                    # of datetime objects
-                    result = [Timestamp(ts, tz=tz_parsed).to_pydatetime()
-                              for ts in result]
-                    return np.array(result, dtype=object)
-
+        except ValueError as e:
+            try:
+                values, tz = conversion.datetime_to_datetime64(arg)
+                return DatetimeIndex._simple_new(values, name=name, tz=tz)
+            except (ValueError, TypeError):
+                raise e
+
+    if result is None:
+        assert format is None or infer_datetime_format
+        utc = tz == 'utc'
+        result, tz_parsed = objects_to_datetime64ns(
+            arg, dayfirst=dayfirst, yearfirst=yearfirst,
+            utc=utc, errors=errors, require_iso8601=require_iso8601,
+            allow_object=True)
+
+    if tz_parsed is not None:
         if box:
-            # Ensure we return an Index in all cases where box=True
-            if is_datetime64_dtype(result):
-                return DatetimeIndex(result, tz=tz, name=name)
-            elif is_object_dtype(result):
-                # e.g. an Index of datetime objects
-                from pandas import Index
-                return Index(result, name=name)
-        return result
+            # We can take a shortcut since the datetime64 numpy array
+            # is in UTC
+            return DatetimeIndex._simple_new(result, name=name,
+                                             tz=tz_parsed)
+        else:
+            # Convert the datetime64 numpy array to an numpy array
+            # of datetime objects
+            result = DatetimeIndex(result, tz=tz_parsed).to_pydatetime()
+            return result
 
-    except ValueError as e:
-        try:
-            values, tz = conversion.datetime_to_datetime64(arg)
-            return DatetimeIndex._simple_new(values, name=name, tz=tz)
-        except (ValueError, TypeError):
-            raise e
+    if box:
+        # Ensure we return an Index in all cases where box=True
+        if is_datetime64_dtype(result):
+            return DatetimeIndex(result, tz=tz, name=name)
+        elif is_object_dtype(result):
+            # e.g. an Index of datetime objects
+            from pandas import Index
+            return Index(result, name=name)
+    return result
 
 
 def _adjust_to_origin(arg, origin, unit):

diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py
@@ -13,11 +13,51 @@
 from pandas import (
     DatetimeIndex, Index, Timestamp, date_range, datetime, offsets,
     to_datetime)
+from pandas.core.arrays import period_array
 import pandas.util.testing as tm
 
 
 class TestDatetimeIndex(object):
 
+    def test_dti_with_period_data_raises(self):
+        # GH#23675
+        data = pd.PeriodIndex(['2016Q1', '2016Q2'], freq='Q')
+
+        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
+            DatetimeIndex(data)
+
+        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
+            to_datetime(data)
+
+        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
+            DatetimeIndex(period_array(data))
+
+        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
+            to_datetime(period_array(data))
+
+    def test_dti_with_timedelta64_data_deprecation(self):
+        # GH#23675
+        data = np.array([0], dtype='m8[ns]')
+        with tm.assert_produces_warning(FutureWarning):
+            result = DatetimeIndex(data)
+
+        assert result[0] == Timestamp('1970-01-01')
+
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            result = to_datetime(data)
+
+        assert result[0] == Timestamp('1970-01-01')
+
+        with tm.assert_produces_warning(FutureWarning):
+            result = DatetimeIndex(pd.TimedeltaIndex(data))
+
+        assert result[0] == Timestamp('1970-01-01')
+
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            result = to_datetime(pd.TimedeltaIndex(data))
+
+        assert result[0] == Timestamp('1970-01-01')
+
     def test_construction_caching(self):
 
         df = pd.DataFrame({'dt': pd.date_range('20130101', periods=3),

diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py
@@ -548,7 +548,7 @@ def test_overflow(self):
 
         # mean
         result = (s - s.min()).mean()
-        expected = pd.Timedelta((pd.DatetimeIndex((s - s.min())).asi8 / len(s)
+        expected = pd.Timedelta((pd.TimedeltaIndex((s - s.min())).asi8 / len(s)
                                  ).sum())
 
         # the computation is converted to float so