Skip to content

Commit 931d885

Browse files
TomAugspurgerPingviinituutti
authored andcommitted
REF/API: DatetimeTZDtype (pandas-dev#23990)
1 parent 6c6305b commit 931d885

File tree

12 files changed

+152
-146
lines changed

12 files changed

+152
-146
lines changed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1125,6 +1125,7 @@ Deprecations
11251125
- :func:`pandas.types.is_period` is deprecated in favor of `pandas.types.is_period_dtype` (:issue:`23917`)
11261126
- :func:`pandas.types.is_datetimetz` is deprecated in favor of `pandas.types.is_datetime64tz` (:issue:`23917`)
11271127
- Creating a :class:`TimedeltaIndex` or :class:`DatetimeIndex` by passing range arguments `start`, `end`, and `periods` is deprecated in favor of :func:`timedelta_range` and :func:`date_range` (:issue:`23919`)
1128+
- Passing a string alias like ``'datetime64[ns, UTC]'`` as the `unit` parameter to :class:`DatetimeTZDtype` is deprecated. Use :class:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`).
11281129

11291130
.. _whatsnew_0240.deprecations.datetimelike_int_ops:
11301131

pandas/core/arrays/datetimelike.py

+15-10
Original file line numberDiff line numberDiff line change
@@ -1174,16 +1174,21 @@ def validate_tz_from_dtype(dtype, tz):
11741174
ValueError : on tzinfo mismatch
11751175
"""
11761176
if dtype is not None:
1177-
try:
1178-
dtype = DatetimeTZDtype.construct_from_string(dtype)
1179-
dtz = getattr(dtype, 'tz', None)
1180-
if dtz is not None:
1181-
if tz is not None and not timezones.tz_compare(tz, dtz):
1182-
raise ValueError("cannot supply both a tz and a dtype"
1183-
" with a tz")
1184-
tz = dtz
1185-
except TypeError:
1186-
pass
1177+
if isinstance(dtype, compat.string_types):
1178+
try:
1179+
dtype = DatetimeTZDtype.construct_from_string(dtype)
1180+
except TypeError:
1181+
# Things like `datetime64[ns]`, which is OK for the
1182+
# constructors, but also nonsense, which should be validated
1183+
# but not by us. We *do* allow non-existent tz errors to
1184+
# go through
1185+
pass
1186+
dtz = getattr(dtype, 'tz', None)
1187+
if dtz is not None:
1188+
if tz is not None and not timezones.tz_compare(tz, dtz):
1189+
raise ValueError("cannot supply both a tz and a dtype"
1190+
" with a tz")
1191+
tz = dtz
11871192
return tz
11881193

11891194

pandas/core/arrays/datetimes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
resolution as libresolution, timezones)
1212
import pandas.compat as compat
1313
from pandas.errors import PerformanceWarning
14-
from pandas.util._decorators import Appender, cache_readonly
14+
from pandas.util._decorators import Appender
1515

1616
from pandas.core.dtypes.common import (
1717
_NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_extension_type,
@@ -333,7 +333,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
333333
def _box_func(self):
334334
return lambda x: Timestamp(x, freq=self.freq, tz=self.tz)
335335

336-
@cache_readonly
336+
@property
337337
def dtype(self):
338338
if self.tz is None:
339339
return _NS_DTYPE

pandas/core/dtypes/common.py

-32
Original file line numberDiff line numberDiff line change
@@ -1767,38 +1767,6 @@ def is_complex_dtype(arr_or_dtype):
17671767
return issubclass(tipo, np.complexfloating)
17681768

17691769

1770-
def _coerce_to_dtype(dtype):
1771-
"""
1772-
Coerce a string or np.dtype to a pandas or numpy
1773-
dtype if possible.
1774-
1775-
If we cannot convert to a pandas dtype initially,
1776-
we convert to a numpy dtype.
1777-
1778-
Parameters
1779-
----------
1780-
dtype : The dtype that we want to coerce.
1781-
1782-
Returns
1783-
-------
1784-
pd_or_np_dtype : The coerced dtype.
1785-
"""
1786-
1787-
if is_categorical_dtype(dtype):
1788-
categories = getattr(dtype, 'categories', None)
1789-
ordered = getattr(dtype, 'ordered', False)
1790-
dtype = CategoricalDtype(categories=categories, ordered=ordered)
1791-
elif is_datetime64tz_dtype(dtype):
1792-
dtype = DatetimeTZDtype(dtype)
1793-
elif is_period_dtype(dtype):
1794-
dtype = PeriodDtype(dtype)
1795-
elif is_interval_dtype(dtype):
1796-
dtype = IntervalDtype(dtype)
1797-
else:
1798-
dtype = np.dtype(dtype)
1799-
return dtype
1800-
1801-
18021770
def _get_dtype(arr_or_dtype):
18031771
"""
18041772
Get the dtype instance associated with an array

pandas/core/dtypes/dtypes.py

+81-52
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
""" define extension dtypes """
2-
32
import re
3+
import warnings
44

55
import numpy as np
6+
import pytz
67

78
from pandas._libs.interval import Interval
89
from pandas._libs.tslibs import NaT, Period, Timestamp, timezones
@@ -491,64 +492,69 @@ class DatetimeTZDtype(PandasExtensionDtype):
491492
_match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
492493
_cache = {}
493494

494-
def __new__(cls, unit=None, tz=None):
495+
def __init__(self, unit="ns", tz=None):
495496
"""
496-
Create a new unit if needed, otherwise return from the cache
497+
An ExtensionDtype for timezone-aware datetime data.
497498
498499
Parameters
499500
----------
500-
unit : string unit that this represents, currently must be 'ns'
501-
tz : string tz that this represents
502-
"""
503-
504-
if isinstance(unit, DatetimeTZDtype):
505-
unit, tz = unit.unit, unit.tz
506-
507-
elif unit is None:
508-
# we are called as an empty constructor
509-
# generally for pickle compat
510-
return object.__new__(cls)
501+
unit : str, default "ns"
502+
The precision of the datetime data. Currently limited
503+
to ``"ns"``.
504+
tz : str, int, or datetime.tzinfo
505+
The timezone.
511506
512-
elif tz is None:
507+
Raises
508+
------
509+
pytz.UnknownTimeZoneError
510+
When the requested timezone cannot be found.
513511
514-
# we were passed a string that we can construct
515-
try:
516-
m = cls._match.search(unit)
517-
if m is not None:
518-
unit = m.groupdict()['unit']
519-
tz = timezones.maybe_get_tz(m.groupdict()['tz'])
520-
except TypeError:
521-
raise ValueError("could not construct DatetimeTZDtype")
512+
Examples
513+
--------
514+
>>> pd.core.dtypes.dtypes.DatetimeTZDtype(tz='UTC')
515+
datetime64[ns, UTC]
522516
523-
elif isinstance(unit, compat.string_types):
517+
>>> pd.core.dtypes.dtypes.DatetimeTZDtype(tz='dateutil/US/Central')
518+
datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')]
519+
"""
520+
if isinstance(unit, DatetimeTZDtype):
521+
unit, tz = unit.unit, unit.tz
524522

525-
if unit != 'ns':
523+
if unit != 'ns':
524+
if isinstance(unit, compat.string_types) and tz is None:
525+
# maybe a string like datetime64[ns, tz], which we support for
526+
# now.
527+
result = type(self).construct_from_string(unit)
528+
unit = result.unit
529+
tz = result.tz
530+
msg = (
531+
"Passing a dtype alias like 'datetime64[ns, {tz}]' "
532+
"to DatetimeTZDtype is deprecated. Use "
533+
"'DatetimeTZDtype.construct_from_string()' instead."
534+
)
535+
warnings.warn(msg.format(tz=tz), FutureWarning, stacklevel=2)
536+
else:
526537
raise ValueError("DatetimeTZDtype only supports ns units")
527538

528-
unit = unit
529-
tz = tz
539+
if tz:
540+
tz = timezones.maybe_get_tz(tz)
541+
elif tz is not None:
542+
raise pytz.UnknownTimeZoneError(tz)
543+
elif tz is None:
544+
raise TypeError("A 'tz' is required.")
530545

531-
if tz is None:
532-
raise ValueError("DatetimeTZDtype constructor must have a tz "
533-
"supplied")
546+
self._unit = unit
547+
self._tz = tz
534548

535-
# hash with the actual tz if we can
536-
# some cannot be hashed, so stringfy
537-
try:
538-
key = (unit, tz)
539-
hash(key)
540-
except TypeError:
541-
key = (unit, str(tz))
549+
@property
550+
def unit(self):
551+
"""The precision of the datetime data."""
552+
return self._unit
542553

543-
# set/retrieve from cache
544-
try:
545-
return cls._cache[key]
546-
except KeyError:
547-
u = object.__new__(cls)
548-
u.unit = unit
549-
u.tz = tz
550-
cls._cache[key] = u
551-
return u
554+
@property
555+
def tz(self):
556+
"""The timezone."""
557+
return self._tz
552558

553559
@classmethod
554560
def construct_array_type(cls):
@@ -565,24 +571,42 @@ def construct_array_type(cls):
565571
@classmethod
566572
def construct_from_string(cls, string):
567573
"""
568-
attempt to construct this type from a string, raise a TypeError if
569-
it's not possible
574+
Construct a DatetimeTZDtype from a string.
575+
576+
Parameters
577+
----------
578+
string : str
579+
The string alias for this DatetimeTZDtype.
580+
Should be formatted like ``datetime64[ns, <tz>]``,
581+
where ``<tz>`` is the timezone name.
582+
583+
Examples
584+
--------
585+
>>> DatetimeTZDtype.construct_from_string('datetime64[ns, UTC]')
586+
datetime64[ns, UTC]
570587
"""
588+
msg = "Could not construct DatetimeTZDtype from '{}'"
571589
try:
572-
return cls(unit=string)
573-
except ValueError:
574-
raise TypeError("could not construct DatetimeTZDtype")
590+
match = cls._match.match(string)
591+
if match:
592+
d = match.groupdict()
593+
return cls(unit=d['unit'], tz=d['tz'])
594+
except Exception:
595+
# TODO(py3): Change this pass to `raise TypeError(msg) from e`
596+
pass
597+
raise TypeError(msg.format(string))
575598

576599
def __unicode__(self):
577-
# format the tz
578600
return "datetime64[{unit}, {tz}]".format(unit=self.unit, tz=self.tz)
579601

580602
@property
581603
def name(self):
604+
"""A string representation of the dtype."""
582605
return str(self)
583606

584607
def __hash__(self):
585608
# make myself hashable
609+
# TODO: update this.
586610
return hash(str(self))
587611

588612
def __eq__(self, other):
@@ -593,6 +617,11 @@ def __eq__(self, other):
593617
self.unit == other.unit and
594618
str(self.tz) == str(other.tz))
595619

620+
def __setstate__(self, state):
621+
# for pickle compat.
622+
self._tz = state['tz']
623+
self._unit = state['unit']
624+
596625

597626
class PeriodDtype(ExtensionDtype, PandasExtensionDtype):
598627
"""

pandas/core/dtypes/missing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ def _isna_ndarraylike(obj):
209209
vec = libmissing.isnaobj(values.ravel())
210210
result[...] = vec.reshape(shape)
211211

212-
elif needs_i8_conversion(obj):
212+
elif needs_i8_conversion(dtype):
213213
# this is the NaT pattern
214214
result = values.view('i8') == iNaT
215215
else:

pandas/core/internals/blocks.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -2669,11 +2669,10 @@ def _astype(self, dtype, **kwargs):
26692669
these automatically copy, so copy=True has no effect
26702670
raise on an except if raise == True
26712671
"""
2672+
dtype = pandas_dtype(dtype)
26722673

26732674
# if we are passed a datetime64[ns, tz]
26742675
if is_datetime64tz_dtype(dtype):
2675-
dtype = DatetimeTZDtype(dtype)
2676-
26772676
values = self.values
26782677
if getattr(values, 'tz', None) is None:
26792678
values = DatetimeIndex(values).tz_localize('UTC')

pandas/tests/dtypes/test_common.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ def test_numpy_string_dtype(self):
4343
'datetime64[ns, Asia/Tokyo]',
4444
'datetime64[ns, UTC]'])
4545
def test_datetimetz_dtype(self, dtype):
46-
assert com.pandas_dtype(dtype) is DatetimeTZDtype(dtype)
47-
assert com.pandas_dtype(dtype) == DatetimeTZDtype(dtype)
46+
assert (com.pandas_dtype(dtype) ==
47+
DatetimeTZDtype.construct_from_string(dtype))
4848
assert com.pandas_dtype(dtype) == dtype
4949

5050
def test_categorical_dtype(self):

0 commit comments

Comments
 (0)