Skip to content

Commit 2d47349

Browse files
qwhelanjreback
authored andcommitted
PERF: fix some of .clip() performance regression by using numpy arrays where possible (#24735)
1 parent d716feb commit 2d47349

File tree

2 files changed

+17
-9
lines changed

2 files changed

+17
-9
lines changed

asv_bench/benchmarks/series_methods.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -140,11 +140,13 @@ def time_map(self, mapper):
140140

141141

142142
class Clip(object):
143+
params = [50, 1000, 10**5]
144+
param_names = ['n']
143145

144-
def setup(self):
145-
self.s = Series(np.random.randn(50))
146+
def setup(self, n):
147+
self.s = Series(np.random.randn(n))
146148

147-
def time_clip(self):
149+
def time_clip(self, n):
148150
self.s.clip(0, 1)
149151

150152

pandas/core/generic.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -7148,12 +7148,18 @@ def _clip_with_scalar(self, lower, upper, inplace=False):
71487148
raise ValueError("Cannot use an NA value as a clip threshold")
71497149

71507150
result = self
7151-
if upper is not None:
7152-
subset = self.le(upper, axis=None) | isna(result)
7153-
result = result.where(subset, upper, axis=None, inplace=False)
7154-
if lower is not None:
7155-
subset = self.ge(lower, axis=None) | isna(result)
7156-
result = result.where(subset, lower, axis=None, inplace=False)
7151+
mask = isna(self.values)
7152+
7153+
with np.errstate(all='ignore'):
7154+
if upper is not None:
7155+
subset = self.to_numpy() <= upper
7156+
result = result.where(subset, upper, axis=None, inplace=False)
7157+
if lower is not None:
7158+
subset = self.to_numpy() >= lower
7159+
result = result.where(subset, lower, axis=None, inplace=False)
7160+
7161+
if np.any(mask):
7162+
result[mask] = np.nan
71577163

71587164
if inplace:
71597165
self._update_inplace(result)

0 commit comments

Comments
 (0)