|
15 | 15 | from pandas import compat
|
16 | 16 | from pandas.compat import iteritems, PY36, OrderedDict
|
17 | 17 | from pandas.core.dtypes.generic import ABCSeries, ABCIndex, ABCIndexClass
|
18 |
| -from pandas.core.dtypes.common import is_integer |
| 18 | +from pandas.core.dtypes.common import (is_integer, is_integer_dtype, |
| 19 | + is_scalar, ensure_platform_int) |
19 | 20 | from pandas.core.dtypes.inference import _iterable_not_string
|
20 | 21 | from pandas.core.dtypes.missing import isna, isnull, notnull # noqa
|
21 | 22 | from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
@@ -430,3 +431,46 @@ def _pipe(obj, func, *args, **kwargs):
|
430 | 431 | return func(*args, **kwargs)
|
431 | 432 | else:
|
432 | 433 | return func(obj, *args, **kwargs)
|
| 434 | + |
| 435 | + |
| 436 | +def searchsorted_integer(arr, value, side="left", sorter=None): |
| 437 | + dtype = arr.dtype |
| 438 | + if sorter is not None: |
| 439 | + sorter = ensure_platform_int(sorter) |
| 440 | + |
| 441 | + # check integer bounds |
| 442 | + iinfo = np.iinfo(dtype) |
| 443 | + value_arr = np.array([value]) if is_scalar(value) else np.array(value) |
| 444 | + if (value_arr < iinfo.min).any() or (value_arr > iinfo.max).any(): |
| 445 | + msg = "Value {} out of bound for dtype {}".format(value, dtype) |
| 446 | + raise ValueError(msg) |
| 447 | + |
| 448 | + # convert dtype of value for better searchsorted speed |
| 449 | + if is_integer(value) or is_integer_dtype(value): |
| 450 | + value = np.asarray(value, dtype=dtype) |
| 451 | + elif hasattr(value, 'is_integer') and value.is_integer(): |
| 452 | + # float 2.0 should be converted to int 2 |
| 453 | + # but float 2.2 should *not* be converted to int 2 |
| 454 | + value = np.asarray(value, dtype=dtype) |
| 455 | + |
| 456 | + return arr.searchsorted(value, side=side, sorter=sorter) |
| 457 | + |
| 458 | + |
| 459 | +def searchsorted(arr, value, side="left", sorter=None): |
| 460 | + """ |
| 461 | + Do a arr.searchsorted(val) with adjustments for int dtypes. |
| 462 | +
|
| 463 | + :func:`numpy.searchsorted` is only fast if value is of same dtype |
| 464 | + as the searched array. Else numpy recasts arr to a higher dtype, which |
| 465 | + causes a slowdown. Below we ensure that value has the right dtype |
| 466 | + for giving fast results for arr.searchsorted. |
| 467 | +
|
| 468 | + See :meth:`Index.searchsorted` for details on parameters and return value. |
| 469 | + """ |
| 470 | + if sorter is not None: |
| 471 | + sorter = ensure_platform_int(sorter) |
| 472 | + |
| 473 | + if is_integer_dtype(arr): |
| 474 | + return searchsorted_integer(arr, value, side=side, sorter=sorter) |
| 475 | + else: |
| 476 | + return arr.searchsorted(value, side=side, sorter=sorter) |
0 commit comments