Skip to content

Commit 0aa112b

Browse files
author
tp
committed
improve performance of Series.searchsorted
1 parent b975455 commit 0aa112b

File tree

3 files changed

+26
-3
lines changed

3 files changed

+26
-3
lines changed

asv_bench/benchmarks/series_methods.py

+19
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,25 @@ def time_dropna(self, dtype):
7272
self.s.dropna()
7373

7474

75+
class SearchSorted(object):
76+
77+
goal_time = 0.2
78+
params = ['int8', 'int16', 'int32', 'int64',
79+
'uint8', 'uint16', 'uint32', 'uint64',
80+
'float16', 'float32', 'float64',
81+
'str']
82+
param_names = ['dtype']
83+
84+
def setup(self, dtype):
85+
N = 10**5
86+
data = np.array([1] * N + [2] * N + [3] * N).astype(dtype)
87+
self.s = Series(data)
88+
89+
def time_searchsorted(self, dtype):
90+
key = '2' if dtype == 'str' else 2
91+
self.s.searchsorted(key)
92+
93+
7594
class Map(object):
7695

7796
goal_time = 0.2

doc/source/whatsnew/v0.24.0.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,8 @@ Performance Improvements
401401
- Very large improvement in performance of slicing when the index is a :class:`CategoricalIndex`,
402402
both when indexing by label (using .loc) and position(.iloc).
403403
Likewise, slicing a ``CategoricalIndex`` itself (i.e. ``ci[100:200]``) shows similar speed improvements (:issue:`21659`)
404-
- Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`)
404+
- Improved performance of :func:`Series.searchsorted` (:issue:``)
405+
- Improved performance of :func:`Series.describe` in case of numeric dtypes (:issue:`21274`)
405406
- Improved performance of :func:`pandas.core.groupby.GroupBy.rank` when dealing with tied rankings (:issue:`21237`)
406407
- Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`,:issue:`21606`)
407408
- Improved performance of membership checks in :class:`Categorical` and :class:`CategoricalIndex`

pandas/core/series.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -2077,8 +2077,11 @@ def __rmatmul__(self, other):
20772077
def searchsorted(self, value, side='left', sorter=None):
20782078
if sorter is not None:
20792079
sorter = ensure_platform_int(sorter)
2080-
return self._values.searchsorted(Series(value)._values,
2081-
side=side, sorter=sorter)
2080+
if not is_extension_type(self._values):
2081+
value = np.asarray(value, dtype=self._values.dtype)
2082+
value = value[..., np.newaxis] if value.ndim == 0 else value
2083+
2084+
return self._values.searchsorted(value, side=side, sorter=sorter)
20822085

20832086
# -------------------------------------------------------------------
20842087
# Combination

0 commit comments

Comments
 (0)