Skip to content

Commit 6361ed8

Browse files
WillAydChiragSehra
authored andcommitted
Cythonized GroupBy pct_change (pandas-dev#19919)
1 parent 334df48 commit 6361ed8

File tree

4 files changed

+112
-55
lines changed

4 files changed

+112
-55
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -795,6 +795,7 @@ Performance Improvements
795795
- Improved performance of variable ``.rolling()`` on ``.min()`` and ``.max()`` (:issue:`19521`)
796796
- Improved performance of :func:`pandas.core.groupby.GroupBy.ffill` and :func:`pandas.core.groupby.GroupBy.bfill` (:issue:`11296`)
797797
- Improved performance of :func:`pandas.core.groupby.GroupBy.any` and :func:`pandas.core.groupby.GroupBy.all` (:issue:`15435`)
798+
- Improved performance of :func:`pandas.core.groupby.GroupBy.pct_change` (:issue:`19165`)
798799

799800
.. _whatsnew_0230.docs:
800801

pandas/core/groupby.py

+24
Original file line numberDiff line numberDiff line change
@@ -2044,6 +2044,23 @@ def shift(self, periods=1, freq=None, axis=0):
20442044
result_is_index=True,
20452045
periods=periods)
20462046

2047+
@Substitution(name='groupby')
2048+
@Appender(_doc_template)
2049+
def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
2050+
axis=0):
2051+
"""Calcuate pct_change of each value to previous entry in group"""
2052+
if freq is not None or axis != 0:
2053+
return self.apply(lambda x: x.pct_change(periods=periods,
2054+
fill_method=fill_method,
2055+
limit=limit, freq=freq,
2056+
axis=axis))
2057+
2058+
filled = getattr(self, fill_method)(limit=limit).drop(
2059+
self.grouper.names, axis=1)
2060+
shifted = filled.shift(periods=periods, freq=freq)
2061+
2062+
return (filled / shifted) - 1
2063+
20472064
@Substitution(name='groupby')
20482065
@Appender(_doc_template)
20492066
def head(self, n=5):
@@ -3884,6 +3901,13 @@ def _apply_to_column_groupbys(self, func):
38843901
""" return a pass thru """
38853902
return func(self)
38863903

3904+
def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None):
3905+
"""Calculate percent change of each value to previous entry in group"""
3906+
filled = getattr(self, fill_method)(limit=limit)
3907+
shifted = filled.shift(periods=periods, freq=freq)
3908+
3909+
return (filled / shifted) - 1
3910+
38873911

38883912
class NDFrameGroupBy(GroupBy):
38893913

pandas/tests/groupby/test_groupby.py

-55
Original file line numberDiff line numberDiff line change
@@ -2062,61 +2062,6 @@ def test_rank_object_raises(self, ties_method, ascending, na_option,
20622062
ascending=ascending,
20632063
na_option=na_option, pct=pct)
20642064

2065-
@pytest.mark.parametrize("mix_groupings", [True, False])
2066-
@pytest.mark.parametrize("as_series", [True, False])
2067-
@pytest.mark.parametrize("val1,val2", [
2068-
('foo', 'bar'), (1, 2), (1., 2.)])
2069-
@pytest.mark.parametrize("fill_method,limit,exp_vals", [
2070-
("ffill", None,
2071-
[np.nan, np.nan, 'val1', 'val1', 'val1', 'val2', 'val2', 'val2']),
2072-
("ffill", 1,
2073-
[np.nan, np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan]),
2074-
("bfill", None,
2075-
['val1', 'val1', 'val1', 'val2', 'val2', 'val2', np.nan, np.nan]),
2076-
("bfill", 1,
2077-
[np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan, np.nan])
2078-
])
2079-
def test_group_fill_methods(self, mix_groupings, as_series, val1, val2,
2080-
fill_method, limit, exp_vals):
2081-
vals = [np.nan, np.nan, val1, np.nan, np.nan, val2, np.nan, np.nan]
2082-
_exp_vals = list(exp_vals)
2083-
# Overwrite placeholder values
2084-
for index, exp_val in enumerate(_exp_vals):
2085-
if exp_val == 'val1':
2086-
_exp_vals[index] = val1
2087-
elif exp_val == 'val2':
2088-
_exp_vals[index] = val2
2089-
2090-
# Need to modify values and expectations depending on the
2091-
# Series / DataFrame that we ultimately want to generate
2092-
if mix_groupings: # ['a', 'b', 'a, 'b', ...]
2093-
keys = ['a', 'b'] * len(vals)
2094-
2095-
def interweave(list_obj):
2096-
temp = list()
2097-
for x in list_obj:
2098-
temp.extend([x, x])
2099-
2100-
return temp
2101-
2102-
_exp_vals = interweave(_exp_vals)
2103-
vals = interweave(vals)
2104-
else: # ['a', 'a', 'a', ... 'b', 'b', 'b']
2105-
keys = ['a'] * len(vals) + ['b'] * len(vals)
2106-
_exp_vals = _exp_vals * 2
2107-
vals = vals * 2
2108-
2109-
df = DataFrame({'key': keys, 'val': vals})
2110-
if as_series:
2111-
result = getattr(
2112-
df.groupby('key')['val'], fill_method)(limit=limit)
2113-
exp = Series(_exp_vals, name='val')
2114-
assert_series_equal(result, exp)
2115-
else:
2116-
result = getattr(df.groupby('key'), fill_method)(limit=limit)
2117-
exp = DataFrame({'key': keys, 'val': _exp_vals})
2118-
assert_frame_equal(result, exp)
2119-
21202065
@pytest.mark.parametrize("agg_func", ['any', 'all'])
21212066
@pytest.mark.parametrize("skipna", [True, False])
21222067
@pytest.mark.parametrize("vals", [

pandas/tests/groupby/test_transform.py

+87
Original file line numberDiff line numberDiff line change
@@ -636,3 +636,90 @@ def test_transform_numeric_ret(self, cols, exp, comp_func, agg_func):
636636
exp = exp.astype('float')
637637

638638
comp_func(result, exp)
639+
640+
@pytest.mark.parametrize("mix_groupings", [True, False])
641+
@pytest.mark.parametrize("as_series", [True, False])
642+
@pytest.mark.parametrize("val1,val2", [
643+
('foo', 'bar'), (1, 2), (1., 2.)])
644+
@pytest.mark.parametrize("fill_method,limit,exp_vals", [
645+
("ffill", None,
646+
[np.nan, np.nan, 'val1', 'val1', 'val1', 'val2', 'val2', 'val2']),
647+
("ffill", 1,
648+
[np.nan, np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan]),
649+
("bfill", None,
650+
['val1', 'val1', 'val1', 'val2', 'val2', 'val2', np.nan, np.nan]),
651+
("bfill", 1,
652+
[np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan, np.nan])
653+
])
654+
def test_group_fill_methods(self, mix_groupings, as_series, val1, val2,
655+
fill_method, limit, exp_vals):
656+
vals = [np.nan, np.nan, val1, np.nan, np.nan, val2, np.nan, np.nan]
657+
_exp_vals = list(exp_vals)
658+
# Overwrite placeholder values
659+
for index, exp_val in enumerate(_exp_vals):
660+
if exp_val == 'val1':
661+
_exp_vals[index] = val1
662+
elif exp_val == 'val2':
663+
_exp_vals[index] = val2
664+
665+
# Need to modify values and expectations depending on the
666+
# Series / DataFrame that we ultimately want to generate
667+
if mix_groupings: # ['a', 'b', 'a, 'b', ...]
668+
keys = ['a', 'b'] * len(vals)
669+
670+
def interweave(list_obj):
671+
temp = list()
672+
for x in list_obj:
673+
temp.extend([x, x])
674+
675+
return temp
676+
677+
_exp_vals = interweave(_exp_vals)
678+
vals = interweave(vals)
679+
else: # ['a', 'a', 'a', ... 'b', 'b', 'b']
680+
keys = ['a'] * len(vals) + ['b'] * len(vals)
681+
_exp_vals = _exp_vals * 2
682+
vals = vals * 2
683+
684+
df = DataFrame({'key': keys, 'val': vals})
685+
if as_series:
686+
result = getattr(
687+
df.groupby('key')['val'], fill_method)(limit=limit)
688+
exp = Series(_exp_vals, name='val')
689+
assert_series_equal(result, exp)
690+
else:
691+
result = getattr(df.groupby('key'), fill_method)(limit=limit)
692+
exp = DataFrame({'key': keys, 'val': _exp_vals})
693+
assert_frame_equal(result, exp)
694+
695+
@pytest.mark.parametrize("test_series", [True, False])
696+
@pytest.mark.parametrize("periods,fill_method,limit", [
697+
(1, 'ffill', None), (1, 'ffill', 1),
698+
(1, 'bfill', None), (1, 'bfill', 1),
699+
(-1, 'ffill', None), (-1, 'ffill', 1),
700+
(-1, 'bfill', None), (-1, 'bfill', 1)])
701+
def test_pct_change(self, test_series, periods, fill_method, limit):
702+
vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan]
703+
exp_vals = Series(vals).pct_change(periods=periods,
704+
fill_method=fill_method,
705+
limit=limit).tolist()
706+
707+
df = DataFrame({'key': ['a'] * len(vals) + ['b'] * len(vals),
708+
'vals': vals * 2})
709+
grp = df.groupby('key')
710+
711+
def get_result(grp_obj):
712+
return grp_obj.pct_change(periods=periods,
713+
fill_method=fill_method,
714+
limit=limit)
715+
716+
if test_series:
717+
exp = pd.Series(exp_vals * 2)
718+
exp.name = 'vals'
719+
grp = grp['vals']
720+
result = get_result(grp)
721+
tm.assert_series_equal(result, exp)
722+
else:
723+
exp = DataFrame({'vals': exp_vals * 2})
724+
result = get_result(grp)
725+
tm.assert_frame_equal(result, exp)

0 commit comments

Comments
 (0)