Skip to content

Commit 5f271eb

Browse files
shangyianjreback
authored andcommitted
BUG: Adding skipna as an option to groupby cumsum and cumprod (#19914)
1 parent 072545d commit 5f271eb

File tree

4 files changed

+44
-4
lines changed

4 files changed

+44
-4
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -927,6 +927,7 @@ Groupby/Resample/Rolling
927927
- Bug in :func:`DataFrame.groupby` passing the `on=` kwarg, and subsequently using ``.apply()`` (:issue:`17813`)
928928
- Bug in :func:`DataFrame.resample().aggregate` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`)
929929
- Fixed a performance regression for ``GroupBy.nth`` and ``GroupBy.last`` with some object columns (:issue:`19283`)
930+
- Bug in :func:`DataFrameGroupBy.cumsum` and :func:`DataFrameGroupBy.cumprod` when ``skipna`` was passed (:issue:`19806`)
930931

931932
Sparse
932933
^^^^^^

pandas/_libs/groupby.pyx

+14-2
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,8 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
139139
def group_cumprod_float64(float64_t[:, :] out,
140140
float64_t[:, :] values,
141141
int64_t[:] labels,
142-
bint is_datetimelike):
142+
bint is_datetimelike,
143+
bint skipna=True):
143144
"""
144145
Only transforms on axis=0
145146
"""
@@ -163,14 +164,20 @@ def group_cumprod_float64(float64_t[:, :] out,
163164
if val == val:
164165
accum[lab, j] *= val
165166
out[i, j] = accum[lab, j]
167+
else:
168+
out[i, j] = NaN
169+
if not skipna:
170+
accum[lab, j] = NaN
171+
break
166172

167173

168174
@cython.boundscheck(False)
169175
@cython.wraparound(False)
170176
def group_cumsum(numeric[:, :] out,
171177
numeric[:, :] values,
172178
int64_t[:] labels,
173-
is_datetimelike):
179+
is_datetimelike,
180+
bint skipna=True):
174181
"""
175182
Only transforms on axis=0
176183
"""
@@ -196,6 +203,11 @@ def group_cumsum(numeric[:, :] out,
196203
if val == val:
197204
accum[lab, j] += val
198205
out[i, j] = accum[lab, j]
206+
else:
207+
out[i, j] = NaN
208+
if not skipna:
209+
accum[lab, j] = NaN
210+
break
199211
else:
200212
accum[lab, j] += val
201213
out[i, j] = accum[lab, j]

pandas/core/groupby.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1888,7 +1888,8 @@ def rank(self, method='average', ascending=True, na_option='keep',
18881888
@Appender(_doc_template)
18891889
def cumprod(self, axis=0, *args, **kwargs):
18901890
"""Cumulative product for each group"""
1891-
nv.validate_groupby_func('cumprod', args, kwargs, ['numeric_only'])
1891+
nv.validate_groupby_func('cumprod', args, kwargs,
1892+
['numeric_only', 'skipna'])
18921893
if axis != 0:
18931894
return self.apply(lambda x: x.cumprod(axis=axis, **kwargs))
18941895

@@ -1898,7 +1899,8 @@ def cumprod(self, axis=0, *args, **kwargs):
18981899
@Appender(_doc_template)
18991900
def cumsum(self, axis=0, *args, **kwargs):
19001901
"""Cumulative sum for each group"""
1901-
nv.validate_groupby_func('cumsum', args, kwargs, ['numeric_only'])
1902+
nv.validate_groupby_func('cumsum', args, kwargs,
1903+
['numeric_only', 'skipna'])
19021904
if axis != 0:
19031905
return self.apply(lambda x: x.cumsum(axis=axis, **kwargs))
19041906

pandas/tests/groupby/test_transform.py

+25
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,31 @@ def test_cython_transform_series(self, op, args, targop):
498498
tm.assert_series_equal(expected, getattr(
499499
data.groupby(labels), op)(*args))
500500

501+
@pytest.mark.parametrize("op", ['cumprod', 'cumsum'])
502+
@pytest.mark.parametrize("skipna", [False, True])
503+
@pytest.mark.parametrize('input, exp', [
504+
# When everything is NaN
505+
({'key': ['b'] * 10, 'value': np.nan},
506+
pd.Series([np.nan] * 10, name='value')),
507+
# When there is a single NaN
508+
({'key': ['b'] * 10 + ['a'] * 2,
509+
'value': [3] * 3 + [np.nan] + [3] * 8},
510+
{('cumprod', False): [3.0, 9.0, 27.0] + [np.nan] * 7 + [3.0, 9.0],
511+
('cumprod', True): [3.0, 9.0, 27.0, np.nan, 81., 243., 729.,
512+
2187., 6561., 19683., 3.0, 9.0],
513+
('cumsum', False): [3.0, 6.0, 9.0] + [np.nan] * 7 + [3.0, 6.0],
514+
('cumsum', True): [3.0, 6.0, 9.0, np.nan, 12., 15., 18.,
515+
21., 24., 27., 3.0, 6.0]})])
516+
def test_groupby_cum_skipna(self, op, skipna, input, exp):
517+
df = pd.DataFrame(input)
518+
result = df.groupby('key')['value'].transform(op, skipna=skipna)
519+
if isinstance(exp, dict):
520+
expected = exp[(op, skipna)]
521+
else:
522+
expected = exp
523+
expected = pd.Series(expected, name='value')
524+
tm.assert_series_equal(expected, result)
525+
501526
@pytest.mark.parametrize(
502527
"op, args, targop",
503528
[('cumprod', (), lambda x: x.cumprod()),

0 commit comments

Comments
 (0)