Skip to content

Commit 0b6d120

Browse files
authored
CLN: Rename "add" to "sum" in groupby (pandas-dev#47892)
* CLN: Rename "add" to "sum" * revert
1 parent 23c53bb commit 0b6d120

File tree

6 files changed

+30
-34
lines changed

6 files changed

+30
-34
lines changed

pandas/_libs/groupby.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def group_any_all(
5050
val_test: Literal["any", "all"],
5151
skipna: bool,
5252
) -> None: ...
53-
def group_add(
53+
def group_sum(
5454
out: np.ndarray, # complexfloating_t[:, ::1]
5555
counts: np.ndarray, # int64_t[::1]
5656
values: np.ndarray, # ndarray[complexfloating_t, ndim=2]

pandas/_libs/groupby.pyx

+15-15
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def group_median_float64(
124124
ndarray[intp_t] indexer
125125
float64_t* ptr
126126

127-
assert min_count == -1, "'min_count' only used in add and prod"
127+
assert min_count == -1, "'min_count' only used in sum and prod"
128128

129129
ngroups = len(counts)
130130
N, K = (<object>values).shape
@@ -502,7 +502,7 @@ def group_any_all(
502502

503503

504504
# ----------------------------------------------------------------------
505-
# group_add, group_prod, group_var, group_mean, group_ohlc
505+
# group_sum, group_prod, group_var, group_mean, group_ohlc
506506
# ----------------------------------------------------------------------
507507

508508
ctypedef fused mean_t:
@@ -511,17 +511,17 @@ ctypedef fused mean_t:
511511
complex64_t
512512
complex128_t
513513

514-
ctypedef fused add_t:
514+
ctypedef fused sum_t:
515515
mean_t
516516
object
517517

518518

519519
@cython.wraparound(False)
520520
@cython.boundscheck(False)
521-
def group_add(
522-
add_t[:, ::1] out,
521+
def group_sum(
522+
sum_t[:, ::1] out,
523523
int64_t[::1] counts,
524-
ndarray[add_t, ndim=2] values,
524+
ndarray[sum_t, ndim=2] values,
525525
const intp_t[::1] labels,
526526
Py_ssize_t min_count=0,
527527
bint is_datetimelike=False,
@@ -531,8 +531,8 @@ def group_add(
531531
"""
532532
cdef:
533533
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
534-
add_t val, t, y
535-
add_t[:, ::1] sumx, compensation
534+
sum_t val, t, y
535+
sum_t[:, ::1] sumx, compensation
536536
int64_t[:, ::1] nobs
537537
Py_ssize_t len_values = len(values), len_labels = len(labels)
538538

@@ -546,7 +546,7 @@ def group_add(
546546

547547
N, K = (<object>values).shape
548548

549-
if add_t is object:
549+
if sum_t is object:
550550
# NB: this does not use 'compensation' like the non-object track does.
551551
for i in range(N):
552552
lab = labels[i]
@@ -588,10 +588,10 @@ def group_add(
588588

589589
# not nan
590590
# With dt64/td64 values, values have been cast to float64
591-
# instead if int64 for group_add, but the logic
591+
# instead if int64 for group_sum, but the logic
592592
# is otherwise the same as in _treat_as_na
593593
if val == val and not (
594-
add_t is float64_t
594+
sum_t is float64_t
595595
and is_datetimelike
596596
and val == <float64_t>NPY_NAT
597597
):
@@ -677,7 +677,7 @@ def group_var(
677677
int64_t[:, ::1] nobs
678678
Py_ssize_t len_values = len(values), len_labels = len(labels)
679679

680-
assert min_count == -1, "'min_count' only used in add and prod"
680+
assert min_count == -1, "'min_count' only used in sum and prod"
681681

682682
if len_values != len_labels:
683683
raise ValueError("len(index) != len(labels)")
@@ -745,7 +745,7 @@ def group_mean(
745745
Array containing unique label for each group, with its
746746
ordering matching up to the corresponding record in `values`.
747747
min_count : Py_ssize_t
748-
Only used in add and prod. Always -1.
748+
Only used in sum and prod. Always -1.
749749
is_datetimelike : bool
750750
True if `values` contains datetime-like entries.
751751
mask : ndarray[bool, ndim=2], optional
@@ -766,7 +766,7 @@ def group_mean(
766766
int64_t[:, ::1] nobs
767767
Py_ssize_t len_values = len(values), len_labels = len(labels)
768768

769-
assert min_count == -1, "'min_count' only used in add and prod"
769+
assert min_count == -1, "'min_count' only used in sum and prod"
770770

771771
if len_values != len_labels:
772772
raise ValueError("len(index) != len(labels)")
@@ -821,7 +821,7 @@ def group_ohlc(
821821
Py_ssize_t i, j, N, K, lab
822822
floating val
823823

824-
assert min_count == -1, "'min_count' only used in add and prod"
824+
assert min_count == -1, "'min_count' only used in sum and prod"
825825

826826
if len(labels) == 0:
827827
return

pandas/core/groupby/groupby.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -1338,7 +1338,6 @@ def _resolve_numeric_only(
13381338

13391339
if numeric_only and self.obj.ndim == 1 and not is_numeric_dtype(self.obj.dtype):
13401340
# GH#47500
1341-
how = "sum" if how == "add" else how
13421341
warnings.warn(
13431342
f"{type(self).__name__}.{how} called with "
13441343
f"numeric_only={numeric_only} and dtype {self.obj.dtype}. This will "
@@ -1738,9 +1737,8 @@ def _cython_agg_general(
17381737
kwd_name = "numeric_only"
17391738
if how in ["any", "all"]:
17401739
kwd_name = "bool_only"
1741-
kernel = "sum" if how == "add" else how
17421740
raise NotImplementedError(
1743-
f"{type(self).__name__}.{kernel} does not implement {kwd_name}."
1741+
f"{type(self).__name__}.{how} does not implement {kwd_name}."
17441742
)
17451743
elif not is_ser:
17461744
data = data.get_numeric_data(copy=False)
@@ -2417,7 +2415,7 @@ def sum(
24172415
result = self._agg_general(
24182416
numeric_only=numeric_only,
24192417
min_count=min_count,
2420-
alias="add",
2418+
alias="sum",
24212419
npfunc=np.sum,
24222420
)
24232421

@@ -4341,8 +4339,6 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
43414339

43424340

43434341
def warn_dropping_nuisance_columns_deprecated(cls, how: str, numeric_only) -> None:
4344-
if how == "add":
4345-
how = "sum"
43464342
if numeric_only is not lib.no_default and not numeric_only:
43474343
# numeric_only was specified and falsey but still dropped nuisance columns
43484344
warnings.warn(

pandas/core/groupby/ops.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def __init__(self, kind: str, how: str, has_dropped_na: bool) -> None:
121121

122122
_CYTHON_FUNCTIONS = {
123123
"aggregate": {
124-
"add": "group_add",
124+
"sum": "group_sum",
125125
"prod": "group_prod",
126126
"min": "group_min",
127127
"max": "group_max",
@@ -213,7 +213,7 @@ def _get_cython_vals(self, values: np.ndarray) -> np.ndarray:
213213
values = ensure_float64(values)
214214

215215
elif values.dtype.kind in ["i", "u"]:
216-
if how in ["add", "var", "prod", "mean", "ohlc"] or (
216+
if how in ["sum", "var", "prod", "mean", "ohlc"] or (
217217
self.kind == "transform" and self.has_dropped_na
218218
):
219219
# result may still include NaN, so we have to cast
@@ -241,7 +241,7 @@ def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False):
241241
if isinstance(dtype, CategoricalDtype):
242242
# NotImplementedError for methods that can fall back to a
243243
# non-cython implementation.
244-
if how in ["add", "prod", "cumsum", "cumprod"]:
244+
if how in ["sum", "prod", "cumsum", "cumprod"]:
245245
raise TypeError(f"{dtype} type does not support {how} operations")
246246
elif how not in ["rank"]:
247247
# only "rank" is implemented in cython
@@ -258,7 +258,7 @@ def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False):
258258
# TODO: same for period_dtype? no for these methods with Period
259259
# we raise NotImplemented if this is an invalid operation
260260
# entirely, e.g. adding datetimes
261-
if how in ["add", "prod", "cumsum", "cumprod"]:
261+
if how in ["sum", "prod", "cumsum", "cumprod"]:
262262
raise TypeError(f"datetime64 type does not support {how} operations")
263263
elif is_timedelta64_dtype(dtype):
264264
if how in ["prod", "cumprod"]:
@@ -311,7 +311,7 @@ def _get_result_dtype(self, dtype: np.dtype) -> np.dtype:
311311
"""
312312
how = self.how
313313

314-
if how in ["add", "cumsum", "sum", "prod"]:
314+
if how in ["sum", "cumsum", "sum", "prod"]:
315315
if dtype == np.dtype(bool):
316316
return np.dtype(np.int64)
317317
elif how in ["mean", "median", "var"]:
@@ -567,7 +567,7 @@ def _call_cython_op(
567567
result_mask=result_mask,
568568
is_datetimelike=is_datetimelike,
569569
)
570-
elif self.how in ["add"]:
570+
elif self.how in ["sum"]:
571571
# We support datetimelike
572572
func(
573573
out=result,
@@ -625,7 +625,7 @@ def _call_cython_op(
625625
# e.g. if we are int64 and need to restore to datetime64/timedelta64
626626
# "rank" is the only member of cast_blocklist we get here
627627
# Casting only needed for float16, bool, datetimelike,
628-
# and self.how in ["add", "prod", "ohlc", "cumprod"]
628+
# and self.how in ["sum", "prod", "ohlc", "cumprod"]
629629
res_dtype = self._get_result_dtype(orig_values.dtype)
630630
op_result = maybe_downcast_to_dtype(result, res_dtype)
631631
else:

pandas/tests/groupby/aggregate/test_cython.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ def test_cython_fail_agg():
166166
("mean", np.mean),
167167
("median", np.median),
168168
("var", np.var),
169-
("add", np.sum),
169+
("sum", np.sum),
170170
("prod", np.prod),
171171
("min", np.min),
172172
("max", np.max),
@@ -214,7 +214,7 @@ def test_cython_agg_empty_buckets_nanops(observed):
214214
grps = range(0, 25, 5)
215215
# add / sum
216216
result = df.groupby(pd.cut(df["a"], grps), observed=observed)._cython_agg_general(
217-
"add", alt=None, numeric_only=True
217+
"sum", alt=None, numeric_only=True
218218
)
219219
intervals = pd.interval_range(0, 20, freq=5, inclusive="right")
220220
expected = DataFrame(

pandas/tests/resample/test_datetime_index.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,15 @@ def test_custom_grouper(index):
6161

6262
# check all cython functions work
6363
g.ohlc() # doesn't use _cython_agg_general
64-
funcs = ["add", "mean", "prod", "min", "max", "var"]
64+
funcs = ["sum", "mean", "prod", "min", "max", "var"]
6565
for f in funcs:
6666
g._cython_agg_general(f, alt=None, numeric_only=True)
6767

6868
b = Grouper(freq=Minute(5), closed="right", label="right")
6969
g = s.groupby(b)
7070
# check all cython functions work
7171
g.ohlc() # doesn't use _cython_agg_general
72-
funcs = ["add", "mean", "prod", "min", "max", "var"]
72+
funcs = ["sum", "mean", "prod", "min", "max", "var"]
7373
for f in funcs:
7474
g._cython_agg_general(f, alt=None, numeric_only=True)
7575

@@ -414,7 +414,7 @@ def test_resample_upsampling_picked_but_not_correct():
414414
tm.assert_series_equal(result2, expected)
415415

416416

417-
@pytest.mark.parametrize("f", ["add", "mean", "prod", "min", "max", "var"])
417+
@pytest.mark.parametrize("f", ["sum", "mean", "prod", "min", "max", "var"])
418418
def test_resample_frame_basic_cy_funcs(f):
419419
df = tm.makeTimeDataFrame()
420420

0 commit comments

Comments
 (0)