From 1ca7fa4fe611ccee04c657eae836049caef18f6c Mon Sep 17 00:00:00 2001 From: Tom Augspurger <tom.w.augspurger@gmail.com> Date: Wed, 28 Nov 2018 17:04:40 -0600 Subject: [PATCH 01/14] REF/API: DatetimeTZDtype * Remove magic constructor from string * Remove Caching The remaining changes in the DatetimeArray PR will be to 1. Inherit from ExtensionDtype 2. Implement construct_array_type 3. Register --- pandas/core/arrays/datetimelike.py | 25 +++--- pandas/core/dtypes/common.py | 2 +- pandas/core/dtypes/dtypes.py | 130 +++++++++++++++------------- pandas/tests/dtypes/test_common.py | 7 +- pandas/tests/dtypes/test_dtypes.py | 54 ++++++------ pandas/tests/dtypes/test_missing.py | 2 +- 6 files changed, 119 insertions(+), 101 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 83ee335aa5465..cf95824dc1d16 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -978,16 +978,21 @@ def validate_tz_from_dtype(dtype, tz): ValueError : on tzinfo mismatch """ if dtype is not None: - try: - dtype = DatetimeTZDtype.construct_from_string(dtype) - dtz = getattr(dtype, 'tz', None) - if dtz is not None: - if tz is not None and not timezones.tz_compare(tz, dtz): - raise ValueError("cannot supply both a tz and a dtype" - " with a tz") - tz = dtz - except TypeError: - pass + if isinstance(dtype, compat.string_types): + try: + dtype = DatetimeTZDtype.construct_from_string(dtype) + except TypeError: + # Things like `datetime64[ns]`, which is OK for the + # constructors, but also nonsense, which should be validated + # but not by us. We *do* allow non-existent tz errors to + # go through + pass + dtz = getattr(dtype, 'tz', None) + if dtz is not None: + if tz is not None and not timezones.tz_compare(tz, dtz): + raise ValueError("cannot supply both a tz and a dtype" + " with a tz") + tz = dtz return tz diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 51b8488313e99..5a6a49360e1df 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1789,7 +1789,7 @@ def _coerce_to_dtype(dtype): ordered = getattr(dtype, 'ordered', False) dtype = CategoricalDtype(categories=categories, ordered=ordered) elif is_datetime64tz_dtype(dtype): - dtype = DatetimeTZDtype(dtype) + dtype = DatetimeTZDtype.construct_from_string(dtype) elif is_period_dtype(dtype): dtype = PeriodDtype(dtype) elif is_interval_dtype(dtype): diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index fee983f969221..538125db17705 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1,8 +1,8 @@ """ define extension dtypes """ - import re import numpy as np +import pytz from pandas._libs.interval import Interval from pandas._libs.tslibs import NaT, Period, Timestamp, timezones @@ -483,99 +483,103 @@ class DatetimeTZDtype(PandasExtensionDtype): str = '|M8[ns]' num = 101 base = np.dtype('M8[ns]') + na_value = NaT _metadata = ('unit', 'tz') _match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]") _cache = {} + # TODO: restore caching? who cares though? It seems needlessly complex. + # np.dtype('datetime64[ns]') isn't a singleton - def __new__(cls, unit=None, tz=None): - """ Create a new unit if needed, otherwise return from the cache + def __init__(self, unit="ns", tz=None): + """ + An ExtensionDtype for timezone-aware datetime data. Parameters ---------- - unit : string unit that this represents, currently must be 'ns' - tz : string tz that this represents - """ + unit : str, default "ns" + The precision of the datetime data. Currently limited + to ``"ns"``. + tz : str, int, or datetime.tzinfo + The timezone. + + Raises + ------ + pytz.UnknownTimeZoneError + When the requested timezone cannot be found. + Examples + -------- + >>> pd.core.dtypes.dtypes.DatetimeTZDtype(tz='UTC') + datetime64[ns, UTC] + + >>> pd.core.dtypes.dtypes.DatetimeTZDtype(tz='dateutil/US/Central') + datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')] + """ if isinstance(unit, DatetimeTZDtype): unit, tz = unit.unit, unit.tz - elif unit is None: - # we are called as an empty constructor - # generally for pickle compat - return object.__new__(cls) + if unit != 'ns': + raise ValueError("DatetimeTZDtype only supports ns units") + if tz: + tz = timezones.maybe_get_tz(tz) + elif tz is not None: + raise pytz.UnknownTimeZoneError(tz) elif tz is None: + raise TypeError("A 'tz' is required.") - # we were passed a string that we can construct - try: - m = cls._match.search(unit) - if m is not None: - unit = m.groupdict()['unit'] - tz = timezones.maybe_get_tz(m.groupdict()['tz']) - except TypeError: - raise ValueError("could not construct DatetimeTZDtype") - - elif isinstance(unit, compat.string_types): - - if unit != 'ns': - raise ValueError("DatetimeTZDtype only supports ns units") + self._unit = unit + self._tz = tz - unit = unit - tz = tz - - if tz is None: - raise ValueError("DatetimeTZDtype constructor must have a tz " - "supplied") - - # hash with the actual tz if we can - # some cannot be hashed, so stringfy - try: - key = (unit, tz) - hash(key) - except TypeError: - key = (unit, str(tz)) + @property + def unit(self): + """The precision of the datetime data.""" + return self._unit - # set/retrieve from cache - try: - return cls._cache[key] - except KeyError: - u = object.__new__(cls) - u.unit = unit - u.tz = tz - cls._cache[key] = u - return u + @property + def tz(self): + """The timezone.""" + return self._tz @classmethod - def construct_array_type(cls): - """Return the array type associated with this dtype - - Returns - ------- - type + def construct_from_string(cls, string): """ - from pandas import DatetimeIndex - return DatetimeIndex + Construct a DatetimeTZDtype from a string. - @classmethod - def construct_from_string(cls, string): - """ attempt to construct this type from a string, raise a TypeError if - it's not possible + Parameters + ---------- + string : str + The string alias for this DatetimeTZDtype. + Should be formatted like ``datetime64[ns, <tz>]``, + where ``<tz>`` is the timezone name. + + Examples + -------- + >>> DatetimeTZDtype.construct_from_string('datetime64[ns, UTC]') + datetime64[ns, UTC] """ + msg = "could not construct DatetimeTZDtype""" try: - return cls(unit=string) + match = cls._match.match(string) + if match: + d = match.groupdict() + return cls(unit=d['unit'], tz=d['tz']) + else: + raise TypeError(msg) except ValueError: - raise TypeError("could not construct DatetimeTZDtype") + raise TypeError(msg) def __unicode__(self): - # format the tz return "datetime64[{unit}, {tz}]".format(unit=self.unit, tz=self.tz) @property def name(self): + """A string representation of the dtype.""" return str(self) def __hash__(self): # make myself hashable + # TODO: update this. return hash(str(self)) def __eq__(self, other): @@ -586,6 +590,10 @@ def __eq__(self, other): self.unit == other.unit and str(self.tz) == str(other.tz)) + def __getstate__(self): + # for pickle compat. + return self.__dict__ + class PeriodDtype(ExtensionDtype, PandasExtensionDtype): """ diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index a7390e0cffbbf..42c3c49d79dcb 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -42,9 +42,12 @@ def test_numpy_string_dtype(self): 'datetime64[ns, US/Eastern]', 'datetime64[ns, Asia/Tokyo]', 'datetime64[ns, UTC]']) + @pytest.mark.xfail(reason="dtype-caching", strict=True) def test_datetimetz_dtype(self, dtype): - assert com.pandas_dtype(dtype) is DatetimeTZDtype(dtype) - assert com.pandas_dtype(dtype) == DatetimeTZDtype(dtype) + assert (com.pandas_dtype(dtype) is + DatetimeTZDtype.construct_from_string(dtype)) + assert (com.pandas_dtype(dtype) == + DatetimeTZDtype.construct_from_string(dtype)) assert com.pandas_dtype(dtype) == dtype def test_categorical_dtype(self): diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 4048e98142a7f..fb9fbe0f5ee1d 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -155,19 +155,20 @@ def test_hash_vs_equality(self): assert dtype == dtype2 assert dtype2 == dtype assert dtype3 == dtype - assert dtype is dtype2 - assert dtype2 is dtype - assert dtype3 is dtype assert hash(dtype) == hash(dtype2) assert hash(dtype) == hash(dtype3) + dtype4 = DatetimeTZDtype("ns", "US/Central") + assert dtype2 != dtype4 + assert hash(dtype2) != hash(dtype4) + def test_construction(self): pytest.raises(ValueError, lambda: DatetimeTZDtype('ms', 'US/Eastern')) def test_subclass(self): - a = DatetimeTZDtype('datetime64[ns, US/Eastern]') - b = DatetimeTZDtype('datetime64[ns, CET]') + a = DatetimeTZDtype.construct_from_string('datetime64[ns, US/Eastern]') + b = DatetimeTZDtype.construct_from_string('datetime64[ns, CET]') assert issubclass(type(a), type(a)) assert issubclass(type(a), type(b)) @@ -189,8 +190,6 @@ def test_compat(self): assert not is_datetime64_dtype('datetime64[ns, US/Eastern]') def test_construction_from_string(self): - result = DatetimeTZDtype('datetime64[ns, US/Eastern]') - assert is_dtype_equal(self.dtype, result) result = DatetimeTZDtype.construct_from_string( 'datetime64[ns, US/Eastern]') assert is_dtype_equal(self.dtype, result) @@ -255,14 +254,13 @@ def test_dst(self): def test_parser(self, tz, constructor): # pr #11245 dtz_str = '{con}[ns, {tz}]'.format(con=constructor, tz=tz) - result = DatetimeTZDtype(dtz_str) + result = DatetimeTZDtype.construct_from_string(dtz_str) expected = DatetimeTZDtype('ns', tz) assert result == expected def test_empty(self): - dt = DatetimeTZDtype() - with pytest.raises(AttributeError): - str(dt) + with pytest.raises(TypeError, match="A 'tz' is required."): + DatetimeTZDtype() class TestPeriodDtype(Base): @@ -795,34 +793,38 @@ def test_update_dtype_errors(self, bad_dtype): dtype.update_dtype(bad_dtype) -@pytest.mark.parametrize( - 'dtype', - [CategoricalDtype, IntervalDtype]) +@pytest.mark.parametrize('dtype', [ + CategoricalDtype, + IntervalDtype, +]) def test_registry(dtype): assert dtype in registry.dtypes -@pytest.mark.parametrize('dtype', [DatetimeTZDtype, PeriodDtype]) +@pytest.mark.parametrize('dtype', [ + PeriodDtype, + DatetimeTZDtype, +]) def test_pandas_registry(dtype): assert dtype not in registry.dtypes assert dtype in _pandas_registry.dtypes -@pytest.mark.parametrize( - 'dtype, expected', - [('int64', None), - ('interval', IntervalDtype()), - ('interval[int64]', IntervalDtype()), - ('interval[datetime64[ns]]', IntervalDtype('datetime64[ns]')), - ('category', CategoricalDtype())]) +@pytest.mark.parametrize('dtype, expected', [ + ('int64', None), + ('interval', IntervalDtype()), + ('interval[int64]', IntervalDtype()), + ('interval[datetime64[ns]]', IntervalDtype('datetime64[ns]')), + ('category', CategoricalDtype()), +]) def test_registry_find(dtype, expected): assert registry.find(dtype) == expected -@pytest.mark.parametrize( - 'dtype, expected', - [('period[D]', PeriodDtype('D')), - ('datetime64[ns, US/Eastern]', DatetimeTZDtype('ns', 'US/Eastern'))]) +@pytest.mark.parametrize('dtype, expected', [ + ('period[D]', PeriodDtype('D')), + ('datetime64[ns, US/Eastern]', DatetimeTZDtype('ns', 'US/Eastern')), +]) def test_pandas_registry_find(dtype, expected): assert _pandas_registry.find(dtype) == expected diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 8f82db69a9213..cb3f5933c885f 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -322,7 +322,7 @@ def test_array_equivalent_str(): # Datetime-like (np.dtype("M8[ns]"), NaT), (np.dtype("m8[ns]"), NaT), - (DatetimeTZDtype('datetime64[ns, US/Eastern]'), NaT), + (DatetimeTZDtype.construct_from_string('datetime64[ns, US/Eastern]'), NaT), (PeriodDtype("M"), NaT), # Integer ('u1', 0), ('u2', 0), ('u4', 0), ('u8', 0), From 2fa4bb0c753a566a12be28655b71125cf8ea8833 Mon Sep 17 00:00:00 2001 From: Tom Augspurger <tom.w.augspurger@gmail.com> Date: Wed, 28 Nov 2018 21:14:10 -0600 Subject: [PATCH 02/14] unxfail test, remove caching bit --- pandas/tests/dtypes/test_common.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 42c3c49d79dcb..e176d273b916c 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -42,10 +42,7 @@ def test_numpy_string_dtype(self): 'datetime64[ns, US/Eastern]', 'datetime64[ns, Asia/Tokyo]', 'datetime64[ns, UTC]']) - @pytest.mark.xfail(reason="dtype-caching", strict=True) def test_datetimetz_dtype(self, dtype): - assert (com.pandas_dtype(dtype) is - DatetimeTZDtype.construct_from_string(dtype)) assert (com.pandas_dtype(dtype) == DatetimeTZDtype.construct_from_string(dtype)) assert com.pandas_dtype(dtype) == dtype From 7e6d8ea9aa5889da388cfccba971de1792186fd1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger <tom.w.augspurger@gmail.com> Date: Wed, 28 Nov 2018 21:45:21 -0600 Subject: [PATCH 03/14] Restore construct_array_type --- pandas/core/dtypes/dtypes.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 538125db17705..7654cb4b2345b 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -541,6 +541,17 @@ def tz(self): """The timezone.""" return self._tz + @classmethod + def construct_array_type(cls): + """Return the array type associated with this dtype + + Returns + ------- + type + """ + from pandas import DatetimeIndex + return DatetimeIndex + @classmethod def construct_from_string(cls, string): """ From 9e4faf8d78571c70c019e4c802f50039516d4121 Mon Sep 17 00:00:00 2001 From: Tom Augspurger <tom.w.augspurger@gmail.com> Date: Thu, 29 Nov 2018 06:14:05 -0600 Subject: [PATCH 04/14] cache readonly --- pandas/core/dtypes/dtypes.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 7654cb4b2345b..9072e3265a660 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -6,6 +6,7 @@ from pandas._libs.interval import Interval from pandas._libs.tslibs import NaT, Period, Timestamp, timezones +from pandas.util._decorators import cache_readonly from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCIndexClass @@ -531,12 +532,12 @@ def __init__(self, unit="ns", tz=None): self._unit = unit self._tz = tz - @property + @cache_readonly def unit(self): """The precision of the datetime data.""" return self._unit - @property + @cache_readonly def tz(self): """The timezone.""" return self._tz From e0b7b771c1e9d9e744e308fb0f474738e320f9e2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger <tom.w.augspurger@gmail.com> Date: Thu, 29 Nov 2018 07:05:04 -0600 Subject: [PATCH 05/14] Updates * Use pandas_dtype * removed cache_readonly --- pandas/core/arrays/datetimes.py | 4 ++-- pandas/core/dtypes/dtypes.py | 5 ++--- pandas/core/dtypes/missing.py | 2 +- pandas/core/internals/blocks.py | 3 +-- pandas/tests/frame/test_dtypes.py | 4 ++-- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 4d3caaacca1c1..60086d2d3f532 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -12,7 +12,7 @@ timezones) import pandas.compat as compat from pandas.errors import PerformanceWarning -from pandas.util._decorators import Appender, cache_readonly +from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_extension_type, @@ -332,7 +332,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None, def _box_func(self): return lambda x: Timestamp(x, freq=self.freq, tz=self.tz) - @cache_readonly + @property def dtype(self): if self.tz is None: return _NS_DTYPE diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 9072e3265a660..7654cb4b2345b 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -6,7 +6,6 @@ from pandas._libs.interval import Interval from pandas._libs.tslibs import NaT, Period, Timestamp, timezones -from pandas.util._decorators import cache_readonly from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCIndexClass @@ -532,12 +531,12 @@ def __init__(self, unit="ns", tz=None): self._unit = unit self._tz = tz - @cache_readonly + @property def unit(self): """The precision of the datetime data.""" return self._unit - @cache_readonly + @property def tz(self): """The timezone.""" return self._tz diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index fa60c326a19ea..809dcbd054ea0 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -209,7 +209,7 @@ def _isna_ndarraylike(obj): vec = libmissing.isnaobj(values.ravel()) result[...] = vec.reshape(shape) - elif needs_i8_conversion(obj): + elif needs_i8_conversion(dtype): # this is the NaT pattern result = values.view('i8') == iNaT else: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4ae7a812e014d..d1b809c2b424b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2672,11 +2672,10 @@ def _astype(self, dtype, **kwargs): these automatically copy, so copy=True has no effect raise on an except if raise == True """ + dtype = pandas_dtype(dtype) # if we are passed a datetime64[ns, tz] if is_datetime64tz_dtype(dtype): - dtype = DatetimeTZDtype(dtype) - values = self.values if getattr(values, 'tz', None) is None: values = DatetimeIndex(values).tz_localize('UTC') diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 2ad6da084e451..06a0d4aa5f111 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -91,8 +91,8 @@ def test_datetime_with_tz_dtypes(self): tzframe.iloc[1, 2] = pd.NaT result = tzframe.dtypes.sort_index() expected = Series([np.dtype('datetime64[ns]'), - DatetimeTZDtype('datetime64[ns, US/Eastern]'), - DatetimeTZDtype('datetime64[ns, CET]')], + DatetimeTZDtype('ns', 'US/Eastern]'), + DatetimeTZDtype('ns', 'CET')], ['A', 'B', 'C']) assert_series_equal(result, expected) From 6cc9ce58456ab4dc824789c319e0b20d805b0d6a Mon Sep 17 00:00:00 2001 From: Tom Augspurger <tom.w.augspurger@gmail.com> Date: Thu, 29 Nov 2018 08:54:35 -0600 Subject: [PATCH 06/14] Fixed tz name --- pandas/tests/frame/test_dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 06a0d4aa5f111..2bfd3445f2a20 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -91,7 +91,7 @@ def test_datetime_with_tz_dtypes(self): tzframe.iloc[1, 2] = pd.NaT result = tzframe.dtypes.sort_index() expected = Series([np.dtype('datetime64[ns]'), - DatetimeTZDtype('ns', 'US/Eastern]'), + DatetimeTZDtype('ns', 'US/Eastern'), DatetimeTZDtype('ns', 'CET')], ['A', 'B', 'C']) From 7ab2a74c304c6791f8f28edc01b7f43b2cbd2fab Mon Sep 17 00:00:00 2001 From: Tom Augspurger <tom.w.augspurger@gmail.com> Date: Thu, 29 Nov 2018 10:16:32 -0600 Subject: [PATCH 07/14] Remove _coerce_to_dtype --- pandas/core/dtypes/common.py | 32 ------------------------------ pandas/tests/dtypes/test_dtypes.py | 15 -------------- 2 files changed, 47 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 5a6a49360e1df..e1141c6b6b3a8 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1767,38 +1767,6 @@ def is_complex_dtype(arr_or_dtype): return issubclass(tipo, np.complexfloating) -def _coerce_to_dtype(dtype): - """ - Coerce a string or np.dtype to a pandas or numpy - dtype if possible. - - If we cannot convert to a pandas dtype initially, - we convert to a numpy dtype. - - Parameters - ---------- - dtype : The dtype that we want to coerce. - - Returns - ------- - pd_or_np_dtype : The coerced dtype. - """ - - if is_categorical_dtype(dtype): - categories = getattr(dtype, 'categories', None) - ordered = getattr(dtype, 'ordered', False) - dtype = CategoricalDtype(categories=categories, ordered=ordered) - elif is_datetime64tz_dtype(dtype): - dtype = DatetimeTZDtype.construct_from_string(dtype) - elif is_period_dtype(dtype): - dtype = PeriodDtype(dtype) - elif is_interval_dtype(dtype): - dtype = IntervalDtype(dtype) - else: - dtype = np.dtype(dtype) - return dtype - - def _get_dtype(arr_or_dtype): """ Get the dtype instance associated with an array diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index fb9fbe0f5ee1d..888f4bdb73291 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -17,7 +17,6 @@ is_dtype_equal, is_datetime64_ns_dtype, is_datetime64_dtype, is_interval_dtype, is_datetime64_any_dtype, is_string_dtype, - _coerce_to_dtype, is_bool_dtype, ) from pandas.core.sparse.api import SparseDtype @@ -173,12 +172,6 @@ def test_subclass(self): assert issubclass(type(a), type(a)) assert issubclass(type(a), type(b)) - def test_coerce_to_dtype(self): - assert (_coerce_to_dtype('datetime64[ns, US/Eastern]') == - DatetimeTZDtype('ns', 'US/Eastern')) - assert (_coerce_to_dtype('datetime64[ns, Asia/Tokyo]') == - DatetimeTZDtype('ns', 'Asia/Tokyo')) - def test_compat(self): assert is_datetime64tz_dtype(self.dtype) assert is_datetime64tz_dtype('datetime64[ns, US/Eastern]') @@ -319,10 +312,6 @@ def test_identity(self): assert PeriodDtype('period[1S1U]') == PeriodDtype('period[1000001U]') assert PeriodDtype('period[1S1U]') is PeriodDtype('period[1000001U]') - def test_coerce_to_dtype(self): - assert _coerce_to_dtype('period[D]') == PeriodDtype('period[D]') - assert _coerce_to_dtype('period[3M]') == PeriodDtype('period[3M]') - def test_compat(self): assert not is_datetime64_ns_dtype(self.dtype) assert not is_datetime64_ns_dtype('period[D]') @@ -517,10 +506,6 @@ def test_is_dtype(self): assert not IntervalDtype.is_dtype(np.int64) assert not IntervalDtype.is_dtype(np.float64) - def test_coerce_to_dtype(self): - assert (_coerce_to_dtype('interval[int64]') == - IntervalDtype('interval[int64]')) - def test_equality(self): assert is_dtype_equal(self.dtype, 'interval[int64]') assert is_dtype_equal(self.dtype, IntervalDtype('int64')) From c14b45fe3d9abe2e8cb5d295955a13129b40d693 Mon Sep 17 00:00:00 2001 From: Tom Augspurger <tom.w.augspurger@gmail.com> Date: Thu, 29 Nov 2018 10:19:29 -0600 Subject: [PATCH 08/14] fix unpickling --- pandas/core/dtypes/dtypes.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 7654cb4b2345b..1a9c112f9024f 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -601,9 +601,10 @@ def __eq__(self, other): self.unit == other.unit and str(self.tz) == str(other.tz)) - def __getstate__(self): + def __setstate__(self, state): # for pickle compat. - return self.__dict__ + self._tz = state['tz'] + self._unit = state['unit'] class PeriodDtype(ExtensionDtype, PandasExtensionDtype): From 10d2c8adef01f57d252e23c060ff2a299ac13f05 Mon Sep 17 00:00:00 2001 From: Tom Augspurger <tom.w.augspurger@gmail.com> Date: Thu, 29 Nov 2018 10:30:29 -0600 Subject: [PATCH 09/14] refactor construct_from_string --- pandas/core/dtypes/dtypes.py | 10 +++++----- pandas/tests/dtypes/test_dtypes.py | 4 ++++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 1a9c112f9024f..e44738a1ce803 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -569,16 +569,16 @@ def construct_from_string(cls, string): >>> DatetimeTZDtype.construct_from_string('datetime64[ns, UTC]') datetime64[ns, UTC] """ - msg = "could not construct DatetimeTZDtype""" + msg = "Could not construct DatetimeTZDtype from {}" try: match = cls._match.match(string) if match: d = match.groupdict() return cls(unit=d['unit'], tz=d['tz']) - else: - raise TypeError(msg) - except ValueError: - raise TypeError(msg) + except Exception: + # TODO(py3): Change this pass to `raise TypeError(msg) from e` + pass + raise TypeError(msg.format(string)) def __unicode__(self): return "datetime64[{unit}, {tz}]".format(unit=self.unit, tz=self.tz) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 888f4bdb73291..f05affb8a32d1 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -189,6 +189,10 @@ def test_construction_from_string(self): pytest.raises(TypeError, lambda: DatetimeTZDtype.construct_from_string('foo')) + def test_construct_from_string_raises(self): + with pytest.raises(TypeError, match="notatz"): + DatetimeTZDtype.construct_from_string('datetime64[ns, notatz]') + def test_is_dtype(self): assert not DatetimeTZDtype.is_dtype(None) assert DatetimeTZDtype.is_dtype(self.dtype) From 50e1aeb0fee705ab9cbe139cdb5dfbd3a2d22f25 Mon Sep 17 00:00:00 2001 From: Tom Augspurger <tom.w.augspurger@gmail.com> Date: Thu, 29 Nov 2018 13:00:35 -0600 Subject: [PATCH 10/14] PeriodDtype needs freq --- pandas/tests/io/json/test_json_table_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 0b4ff2c34297a..94abedf688912 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -150,7 +150,7 @@ def test_as_json_table_type_bool_dtypes(self, bool_dtype): assert as_json_table_type(bool_dtype) == 'boolean' @pytest.mark.parametrize('date_dtype', [ - np.datetime64, np.dtype("<M8[ns]"), PeriodDtype(), + np.datetime64, np.dtype("<M8[ns]"), PeriodDtype('D'), DatetimeTZDtype('ns', 'US/Central')]) def test_as_json_table_type_date_dtypes(self, date_dtype): # TODO: datedate.date? datetime.time? From 6d626e6266962ff69ef354c2a4dc97b9aa68a5a3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger <tom.w.augspurger@gmail.com> Date: Sun, 2 Dec 2018 07:23:14 -0600 Subject: [PATCH 11/14] remove stale comment --- pandas/core/dtypes/dtypes.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index e44738a1ce803..782733aaf2967 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -487,8 +487,6 @@ class DatetimeTZDtype(PandasExtensionDtype): _metadata = ('unit', 'tz') _match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]") _cache = {} - # TODO: restore caching? who cares though? It seems needlessly complex. - # np.dtype('datetime64[ns]') isn't a singleton def __init__(self, unit="ns", tz=None): """ From 22699f1d993896adfc72788ba7eece21a842dd01 Mon Sep 17 00:00:00 2001 From: Tom Augspurger <tom.w.augspurger@gmail.com> Date: Mon, 3 Dec 2018 07:09:32 -0600 Subject: [PATCH 12/14] Deprecate passing alias to unit --- pandas/core/dtypes/dtypes.py | 19 ++++++++++++++++--- pandas/tests/dtypes/test_dtypes.py | 13 +++++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 782733aaf2967..db3e38c925449 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1,5 +1,6 @@ """ define extension dtypes """ import re +import warnings import numpy as np import pytz @@ -483,7 +484,6 @@ class DatetimeTZDtype(PandasExtensionDtype): str = '|M8[ns]' num = 101 base = np.dtype('M8[ns]') - na_value = NaT _metadata = ('unit', 'tz') _match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]") _cache = {} @@ -517,7 +517,20 @@ def __init__(self, unit="ns", tz=None): unit, tz = unit.unit, unit.tz if unit != 'ns': - raise ValueError("DatetimeTZDtype only supports ns units") + if isinstance(unit, compat.string_types) and tz is None: + # maybe a string like datetime64[ns, tz], which we support for + # now. + result = type(self).construct_from_string(unit) + unit = result.unit + tz = result.tz + msg = ( + "Passing a dtype alias like 'datetime64[ns, {tz}]' " + "to DatetimeTZDtype is deprecated. Use " + "'DatetimeTZDtype.construct_from_string()' instead." + ) + warnings.warn(msg.format(tz=tz), FutureWarning, stacklevel=2) + else: + raise ValueError("DatetimeTZDtype only supports ns units") if tz: tz = timezones.maybe_get_tz(tz) @@ -567,7 +580,7 @@ def construct_from_string(cls, string): >>> DatetimeTZDtype.construct_from_string('datetime64[ns, UTC]') datetime64[ns, UTC] """ - msg = "Could not construct DatetimeTZDtype from {}" + msg = "Could not construct DatetimeTZDtype from '{}'" try: match = cls._match.match(string) if match: diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index f05affb8a32d1..81d08ac71bf6d 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -146,6 +146,19 @@ class TestDatetimeTZDtype(Base): def create(self): return DatetimeTZDtype('ns', 'US/Eastern') + def test_alias_to_unit_raises(self): + # 23990 + with tm.assert_produces_warning(FutureWarning): + DatetimeTZDtype('datetime64[ns, US/Central]') + + def test_alias_to_unit_bad_alias_raises(self): + # 23990 + with pytest.raises(TypeError, match=''): + DatetimeTZDtype('this is a bad string') + + with pytest.raises(TypeError, match=''): + DatetimeTZDtype('datetime64[ns, US/NotATZ]') + def test_hash_vs_equality(self): # make sure that we satisfy is semantics dtype = self.dtype From d89a6cc118d0b7fec98e6eafa1528b2bd4a0a618 Mon Sep 17 00:00:00 2001 From: Tom Augspurger <tom.w.augspurger@gmail.com> Date: Mon, 3 Dec 2018 07:29:14 -0600 Subject: [PATCH 13/14] Added release note for DatetimeTZDtype. --- doc/source/whatsnew/v0.24.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 7617ad5b428a2..93ac9caa42e3e 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1125,6 +1125,7 @@ Deprecations - :func:`pandas.types.is_period` is deprecated in favor of `pandas.types.is_period_dtype` (:issue:`23917`) - :func:`pandas.types.is_datetimetz` is deprecated in favor of `pandas.types.is_datetime64tz` (:issue:`23917`) - Creating a :class:`TimedeltaIndex` or :class:`DatetimeIndex` by passing range arguments `start`, `end`, and `periods` is deprecated in favor of :func:`timedelta_range` and :func:`date_range` (:issue:`23919`) +- Passing a string alias like ``'datetime64[ns, UTC]'`` as the `unit` parameter to :class:`DatetimeTZDtype` is deprecated. Use :class:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`). .. _whatsnew_0240.deprecations.datetimelike_int_ops: From 5cde369f6e87c093a592766226f3a3db4c7cd846 Mon Sep 17 00:00:00 2001 From: Tom Augspurger <tom.w.augspurger@gmail.com> Date: Mon, 3 Dec 2018 16:24:57 -0600 Subject: [PATCH 14/14] try ci