Skip to content

Commit 24e881d

Browse files
authored
REF: Implement isin on DTA instead of DTI (#38012)
1 parent 20f7ffa commit 24e881d

File tree

6 files changed

+58
-67
lines changed

6 files changed

+58
-67
lines changed

pandas/core/algorithms.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -433,10 +433,8 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
433433
return cast("Categorical", comps).isin(values)
434434

435435
if needs_i8_conversion(comps):
436-
# Dispatch to DatetimeLikeIndexMixin.isin
437-
from pandas import Index
438-
439-
return Index(comps).isin(values)
436+
# Dispatch to DatetimeLikeArrayMixin.isin
437+
return array(comps).isin(values)
440438

441439
comps, dtype = _ensure_data(comps)
442440
values, _ = _ensure_data(values, dtype=dtype)

pandas/core/arrays/datetimelike.py

+54-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
6363

6464
from pandas.core import nanops, ops
65-
from pandas.core.algorithms import checked_add_with_arr, unique1d, value_counts
65+
from pandas.core.algorithms import checked_add_with_arr, isin, unique1d, value_counts
6666
from pandas.core.arraylike import OpsMixin
6767
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
6868
import pandas.core.common as com
@@ -697,6 +697,59 @@ def map(self, mapper):
697697

698698
return Index(self).map(mapper).array
699699

700+
def isin(self, values) -> np.ndarray:
701+
"""
702+
Compute boolean array of whether each value is found in the
703+
passed set of values.
704+
705+
Parameters
706+
----------
707+
values : set or sequence of values
708+
709+
Returns
710+
-------
711+
ndarray[bool]
712+
"""
713+
if not hasattr(values, "dtype"):
714+
values = np.asarray(values)
715+
716+
if values.dtype.kind in ["f", "i", "u", "c"]:
717+
# TODO: de-duplicate with equals, validate_comparison_value
718+
return np.zeros(self.shape, dtype=bool)
719+
720+
if not isinstance(values, type(self)):
721+
inferrable = [
722+
"timedelta",
723+
"timedelta64",
724+
"datetime",
725+
"datetime64",
726+
"date",
727+
"period",
728+
]
729+
if values.dtype == object:
730+
inferred = lib.infer_dtype(values, skipna=False)
731+
if inferred not in inferrable:
732+
if inferred == "string":
733+
pass
734+
735+
elif "mixed" in inferred:
736+
return isin(self.astype(object), values)
737+
else:
738+
return np.zeros(self.shape, dtype=bool)
739+
740+
try:
741+
values = type(self)._from_sequence(values)
742+
except ValueError:
743+
return isin(self.astype(object), values)
744+
745+
try:
746+
self._check_compatible_with(values)
747+
except (TypeError, ValueError):
748+
# Includes tzawareness mismatch and IncompatibleFrequencyError
749+
return np.zeros(self.shape, dtype=bool)
750+
751+
return isin(self.asi8, values.asi8)
752+
700753
# ------------------------------------------------------------------
701754
# Null Handling
702755

pandas/core/indexes/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5145,7 +5145,7 @@ def isin(self, values, level=None):
51455145
"""
51465146
if level is not None:
51475147
self._validate_index_level(level)
5148-
return algos.isin(self, values)
5148+
return algos.isin(self._values, values)
51495149

51505150
def _get_string_slice(self, key: str_t):
51515151
# this is for partial string indexing,

pandas/core/indexes/datetimelike.py

-53
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
from pandas.core.dtypes.concat import concat_compat
2525
from pandas.core.dtypes.generic import ABCIndex, ABCSeries
2626

27-
from pandas.core import algorithms
2827
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
2928
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
3029
import pandas.core.common as com
@@ -500,58 +499,6 @@ def _partial_date_slice(
500499
__truediv__ = make_wrapped_arith_op("__truediv__")
501500
__rtruediv__ = make_wrapped_arith_op("__rtruediv__")
502501

503-
def isin(self, values, level=None):
504-
"""
505-
Compute boolean array of whether each index value is found in the
506-
passed set of values.
507-
508-
Parameters
509-
----------
510-
values : set or sequence of values
511-
512-
Returns
513-
-------
514-
is_contained : ndarray (boolean dtype)
515-
"""
516-
if level is not None:
517-
self._validate_index_level(level)
518-
519-
if not hasattr(values, "dtype"):
520-
values = np.asarray(values)
521-
522-
if values.dtype.kind in ["f", "i", "u", "c"]:
523-
# TODO: de-duplicate with equals, validate_comparison_value
524-
return np.zeros(self.shape, dtype=bool)
525-
526-
if not isinstance(values, type(self)):
527-
inferrable = [
528-
"timedelta",
529-
"timedelta64",
530-
"datetime",
531-
"datetime64",
532-
"date",
533-
"period",
534-
]
535-
if values.dtype == object:
536-
inferred = lib.infer_dtype(values, skipna=False)
537-
if inferred not in inferrable:
538-
if "mixed" in inferred:
539-
return self.astype(object).isin(values)
540-
return np.zeros(self.shape, dtype=bool)
541-
542-
try:
543-
values = type(self)(values)
544-
except ValueError:
545-
return self.astype(object).isin(values)
546-
547-
try:
548-
self._data._check_compatible_with(values)
549-
except (TypeError, ValueError):
550-
# Includes tzawareness mismatch and IncompatibleFrequencyError
551-
return np.zeros(self.shape, dtype=bool)
552-
553-
return algorithms.isin(self.asi8, values.asi8)
554-
555502
def shift(self, periods=1, freq=None):
556503
"""
557504
Shift index by desired number of time frequency increments.

pandas/core/indexes/numeric.py

-7
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
from pandas.core.dtypes.generic import ABCSeries
2828
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
2929

30-
from pandas.core import algorithms
3130
import pandas.core.common as com
3231
from pandas.core.indexes.base import Index, maybe_extract_name
3332

@@ -434,12 +433,6 @@ def __contains__(self, other: Any) -> bool:
434433
def is_unique(self) -> bool:
435434
return super().is_unique and self._nan_idxs.size < 2
436435

437-
@doc(Index.isin)
438-
def isin(self, values, level=None):
439-
if level is not None:
440-
self._validate_index_level(level)
441-
return algorithms.isin(np.array(self), values)
442-
443436
def _can_union_without_object_cast(self, other) -> bool:
444437
# See GH#26778, further casting may occur in NumericIndex._union
445438
return is_numeric_dtype(other.dtype)

pandas/core/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4691,7 +4691,7 @@ def isin(self, values) -> "Series":
46914691
5 False
46924692
Name: animal, dtype: bool
46934693
"""
4694-
result = algorithms.isin(self, values)
4694+
result = algorithms.isin(self._values, values)
46954695
return self._constructor(result, index=self.index).__finalize__(
46964696
self, method="isin"
46974697
)

0 commit comments

Comments
 (0)