Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: Implement isin on DTA instead of DTI #38012

Merged
merged 3 commits into from
Nov 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,10 +433,8 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
return cast("Categorical", comps).isin(values)

if needs_i8_conversion(comps):
# Dispatch to DatetimeLikeIndexMixin.isin
from pandas import Index

return Index(comps).isin(values)
# Dispatch to DatetimeLikeArrayMixin.isin
return array(comps).isin(values)

comps, dtype = _ensure_data(comps)
values, _ = _ensure_data(values, dtype=dtype)
Expand Down
55 changes: 54 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna

from pandas.core import nanops, ops
from pandas.core.algorithms import checked_add_with_arr, unique1d, value_counts
from pandas.core.algorithms import checked_add_with_arr, isin, unique1d, value_counts
from pandas.core.arraylike import OpsMixin
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
import pandas.core.common as com
Expand Down Expand Up @@ -697,6 +697,59 @@ def map(self, mapper):

return Index(self).map(mapper).array

def isin(self, values) -> np.ndarray:
"""
Compute boolean array of whether each value is found in the
passed set of values.

Parameters
----------
values : set or sequence of values

Returns
-------
ndarray[bool]
"""
if not hasattr(values, "dtype"):
values = np.asarray(values)

if values.dtype.kind in ["f", "i", "u", "c"]:
# TODO: de-duplicate with equals, validate_comparison_value
return np.zeros(self.shape, dtype=bool)

if not isinstance(values, type(self)):
inferrable = [
"timedelta",
"timedelta64",
"datetime",
"datetime64",
"date",
"period",
]
if values.dtype == object:
inferred = lib.infer_dtype(values, skipna=False)
if inferred not in inferrable:
if inferred == "string":
pass

elif "mixed" in inferred:
return isin(self.astype(object), values)
else:
return np.zeros(self.shape, dtype=bool)

try:
values = type(self)._from_sequence(values)
except ValueError:
return isin(self.astype(object), values)

try:
self._check_compatible_with(values)
except (TypeError, ValueError):
# Includes tzawareness mismatch and IncompatibleFrequencyError
return np.zeros(self.shape, dtype=bool)

return isin(self.asi8, values.asi8)

# ------------------------------------------------------------------
# Null Handling

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5145,7 +5145,7 @@ def isin(self, values, level=None):
"""
if level is not None:
self._validate_index_level(level)
return algos.isin(self, values)
return algos.isin(self._values, values)

def _get_string_slice(self, key: str_t):
# this is for partial string indexing,
Expand Down
53 changes: 0 additions & 53 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.generic import ABCIndex, ABCSeries

from pandas.core import algorithms
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
import pandas.core.common as com
Expand Down Expand Up @@ -500,58 +499,6 @@ def _partial_date_slice(
__truediv__ = make_wrapped_arith_op("__truediv__")
__rtruediv__ = make_wrapped_arith_op("__rtruediv__")

def isin(self, values, level=None):
"""
Compute boolean array of whether each index value is found in the
passed set of values.

Parameters
----------
values : set or sequence of values

Returns
-------
is_contained : ndarray (boolean dtype)
"""
if level is not None:
self._validate_index_level(level)

if not hasattr(values, "dtype"):
values = np.asarray(values)

if values.dtype.kind in ["f", "i", "u", "c"]:
# TODO: de-duplicate with equals, validate_comparison_value
return np.zeros(self.shape, dtype=bool)

if not isinstance(values, type(self)):
inferrable = [
"timedelta",
"timedelta64",
"datetime",
"datetime64",
"date",
"period",
]
if values.dtype == object:
inferred = lib.infer_dtype(values, skipna=False)
if inferred not in inferrable:
if "mixed" in inferred:
return self.astype(object).isin(values)
return np.zeros(self.shape, dtype=bool)

try:
values = type(self)(values)
except ValueError:
return self.astype(object).isin(values)

try:
self._data._check_compatible_with(values)
except (TypeError, ValueError):
# Includes tzawareness mismatch and IncompatibleFrequencyError
return np.zeros(self.shape, dtype=bool)

return algorithms.isin(self.asi8, values.asi8)

def shift(self, periods=1, freq=None):
"""
Shift index by desired number of time frequency increments.
Expand Down
7 changes: 0 additions & 7 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna

from pandas.core import algorithms
import pandas.core.common as com
from pandas.core.indexes.base import Index, maybe_extract_name

Expand Down Expand Up @@ -434,12 +433,6 @@ def __contains__(self, other: Any) -> bool:
def is_unique(self) -> bool:
return super().is_unique and self._nan_idxs.size < 2

@doc(Index.isin)
def isin(self, values, level=None):
if level is not None:
self._validate_index_level(level)
return algorithms.isin(np.array(self), values)

def _can_union_without_object_cast(self, other) -> bool:
# See GH#26778, further casting may occur in NumericIndex._union
return is_numeric_dtype(other.dtype)
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4691,7 +4691,7 @@ def isin(self, values) -> "Series":
5 False
Name: animal, dtype: bool
"""
result = algorithms.isin(self, values)
result = algorithms.isin(self._values, values)
return self._constructor(result, index=self.index).__finalize__(
self, method="isin"
)
Expand Down