Skip to content

Commit cb862e4

Browse files
jbrockmendeljreback
authored andcommitted
BUG: fix mutation of DTI backing Series/DataFrame (#24096)
1 parent aead29b commit cb862e4

File tree

5 files changed

+75
-2
lines changed

5 files changed

+75
-2
lines changed

pandas/core/internals/blocks.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -2923,7 +2923,9 @@ def _try_coerce_result(self, result):
29232923
# allow passing of > 1dim if its trivial
29242924
if result.ndim > 1:
29252925
result = result.reshape(np.prod(result.shape))
2926-
result = self.values._shallow_copy(result)
2926+
2927+
# GH#24096 new values invalidates a frequency
2928+
result = self.values._shallow_copy(result, freq=None)
29272929

29282930
return result
29292931

pandas/core/internals/construction.py

+8
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,14 @@ def init_dict(data, index, columns, dtype=None):
196196
arrays.loc[missing] = [v] * missing.sum()
197197

198198
else:
199+
200+
for key in data:
201+
if (isinstance(data[key], ABCDatetimeIndex) and
202+
data[key].tz is not None):
203+
# GH#24096 need copy to be deep for datetime64tz case
204+
# TODO: See if we can avoid these copies
205+
data[key] = data[key].copy(deep=True)
206+
199207
keys = com.dict_keys_to_ordered_list(data)
200208
columns = data_names = Index(keys)
201209
arrays = [data[k] for k in keys]

pandas/core/series.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
is_extension_array_dtype, is_extension_type, is_hashable, is_integer,
2222
is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype)
2323
from pandas.core.dtypes.generic import (
24-
ABCDataFrame, ABCSeries, ABCSparseArray, ABCSparseSeries)
24+
ABCDataFrame, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries)
2525
from pandas.core.dtypes.missing import (
2626
isna, na_value_for_dtype, notna, remove_na_arraylike)
2727

@@ -182,6 +182,11 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
182182
else:
183183
# need to copy to avoid aliasing issues
184184
data = data._values.copy()
185+
if (isinstance(data, ABCDatetimeIndex) and
186+
data.tz is not None):
187+
# GH#24096 need copy to be deep for datetime64tz case
188+
# TODO: See if we can avoid these copies
189+
data = data._values.copy(deep=True)
185190
copy = False
186191

187192
elif isinstance(data, np.ndarray):

pandas/tests/frame/test_block_internals.py

+16
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,22 @@
2828

2929

3030
class TestDataFrameBlockInternals():
31+
def test_setitem_invalidates_datetime_index_freq(self):
32+
# GH#24096 altering a datetime64tz column inplace invalidates the
33+
# `freq` attribute on the underlying DatetimeIndex
34+
35+
dti = date_range('20130101', periods=3, tz='US/Eastern')
36+
ts = dti[1]
37+
38+
df = DataFrame({'B': dti})
39+
assert df['B']._values.freq == 'D'
40+
41+
df.iloc[1, 0] = pd.NaT
42+
assert df['B']._values.freq is None
43+
44+
# check that the DatetimeIndex was not altered in place
45+
assert dti.freq == 'D'
46+
assert dti[1] == ts
3147

3248
def test_cast_internals(self, float_frame):
3349
casted = DataFrame(float_frame._data, dtype=int)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import pandas as pd
4+
5+
# Segregated collection of methods that require the BlockManager internal data
6+
# structure
7+
8+
9+
class TestSeriesBlockInternals(object):
10+
11+
def test_setitem_invalidates_datetime_index_freq(self):
12+
# GH#24096 altering a datetime64tz Series inplace invalidates the
13+
# `freq` attribute on the underlying DatetimeIndex
14+
15+
dti = pd.date_range('20130101', periods=3, tz='US/Eastern')
16+
ts = dti[1]
17+
ser = pd.Series(dti)
18+
assert ser._values is not dti
19+
assert ser._values._data.base is not dti._data.base
20+
assert dti.freq == 'D'
21+
ser.iloc[1] = pd.NaT
22+
assert ser._values.freq is None
23+
24+
# check that the DatetimeIndex was not altered in place
25+
assert ser._values is not dti
26+
assert ser._values._data.base is not dti._data.base
27+
assert dti[1] == ts
28+
assert dti.freq == 'D'
29+
30+
def test_dt64tz_setitem_does_not_mutate_dti(self):
31+
# GH#21907, GH#24096
32+
dti = pd.date_range('2016-01-01', periods=10, tz='US/Pacific')
33+
ts = dti[0]
34+
ser = pd.Series(dti)
35+
assert ser._values is not dti
36+
assert ser._values._data.base is not dti._data.base
37+
assert ser._data.blocks[0].values is not dti
38+
assert ser._data.blocks[0].values._data.base is not dti._data.base
39+
40+
ser[::3] = pd.NaT
41+
assert ser[0] is pd.NaT
42+
assert dti[0] == ts

0 commit comments

Comments
 (0)