Skip to content

Commit abf0824

Browse files
ahcubTomAugspurger
authored andcommittedJan 29, 2019
fix for BUG: grouping with tz-aware: Values falls after last bin (#24973)
1 parent 1fc88c7 commit abf0824

File tree

3 files changed

+31
-18
lines changed

3 files changed

+31
-18
lines changed
 

‎doc/source/whatsnew/v0.24.1.rst

+1-2
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,7 @@ Bug Fixes
7272

7373
**Reshaping**
7474

75-
-
76-
-
75+
- Bug in :meth:`DataFrame.groupby` with :class:`Grouper` when there is a time change (DST) and grouping frequency is ``'1d'`` (:issue:`24972`)
7776

7877
**Visualization**
7978

‎pandas/core/resample.py

+15-16
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@
3030
from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range
3131

3232
from pandas.tseries.frequencies import to_offset
33-
from pandas.tseries.offsets import (
34-
DateOffset, Day, Nano, Tick, delta_to_nanoseconds)
33+
from pandas.tseries.offsets import DateOffset, Day, Nano, Tick
3534

3635
_shared_docs_kwargs = dict()
3736

@@ -1613,20 +1612,20 @@ def _get_timestamp_range_edges(first, last, offset, closed='left', base=0):
16131612
A tuple of length 2, containing the adjusted pd.Timestamp objects.
16141613
"""
16151614
if isinstance(offset, Tick):
1616-
is_day = isinstance(offset, Day)
1617-
day_nanos = delta_to_nanoseconds(timedelta(1))
1618-
1619-
# #1165 and #24127
1620-
if (is_day and not offset.nanos % day_nanos) or not is_day:
1621-
first, last = _adjust_dates_anchored(first, last, offset,
1622-
closed=closed, base=base)
1623-
if is_day and first.tz is not None:
1624-
# _adjust_dates_anchored assumes 'D' means 24H, but first/last
1625-
# might contain a DST transition (23H, 24H, or 25H).
1626-
# Ensure first/last snap to midnight.
1627-
first = first.normalize()
1628-
last = last.normalize()
1629-
return first, last
1615+
if isinstance(offset, Day):
1616+
# _adjust_dates_anchored assumes 'D' means 24H, but first/last
1617+
# might contain a DST transition (23H, 24H, or 25H).
1618+
# So "pretend" the dates are naive when adjusting the endpoints
1619+
tz = first.tz
1620+
first = first.tz_localize(None)
1621+
last = last.tz_localize(None)
1622+
1623+
first, last = _adjust_dates_anchored(first, last, offset,
1624+
closed=closed, base=base)
1625+
if isinstance(offset, Day):
1626+
first = first.tz_localize(tz)
1627+
last = last.tz_localize(tz)
1628+
return first, last
16301629

16311630
else:
16321631
first = first.normalize()

‎pandas/tests/resample/test_datetime_index.py

+15
Original file line numberDiff line numberDiff line change
@@ -1278,6 +1278,21 @@ def test_resample_across_dst():
12781278
assert_frame_equal(result, expected)
12791279

12801280

1281+
def test_groupby_with_dst_time_change():
1282+
# GH 24972
1283+
index = pd.DatetimeIndex([1478064900001000000, 1480037118776792000],
1284+
tz='UTC').tz_convert('America/Chicago')
1285+
1286+
df = pd.DataFrame([1, 2], index=index)
1287+
result = df.groupby(pd.Grouper(freq='1d')).last()
1288+
expected_index_values = pd.date_range('2016-11-02', '2016-11-24',
1289+
freq='d', tz='America/Chicago')
1290+
1291+
index = pd.DatetimeIndex(expected_index_values)
1292+
expected = pd.DataFrame([1.0] + ([np.nan] * 21) + [2.0], index=index)
1293+
assert_frame_equal(result, expected)
1294+
1295+
12811296
def test_resample_dst_anchor():
12821297
# 5172
12831298
dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz='US/Eastern')

0 commit comments

Comments
 (0)