Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH/POC: EA.isin #38422

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/reference/extensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ objects.
api.extensions.ExtensionArray.equals
api.extensions.ExtensionArray.factorize
api.extensions.ExtensionArray.fillna
api.extensions.ExtensionArray.isin
api.extensions.ExtensionArray.isna
api.extensions.ExtensionArray.ravel
api.extensions.ExtensionArray.repeat
Expand Down
10 changes: 3 additions & 7 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,10 +449,8 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:

comps = _ensure_arraylike(comps)
comps = extract_array(comps, extract_numpy=True)
if is_categorical_dtype(comps.dtype):
# TODO(extension)
# handle categoricals
return cast("Categorical", comps).isin(values)
if is_extension_array_dtype(comps.dtype):
return comps.isin(values)

if needs_i8_conversion(comps.dtype):
# Dispatch to DatetimeLikeArrayMixin.isin
Expand All @@ -464,9 +462,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
elif needs_i8_conversion(values.dtype):
return isin(comps, values.astype(object))

elif is_extension_array_dtype(comps.dtype) or is_extension_array_dtype(
values.dtype
):
elif is_extension_array_dtype(values.dtype):
return isin(np.asarray(comps), np.asarray(values))

# GH16012
Expand Down
19 changes: 18 additions & 1 deletion pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
from pandas.core.dtypes.missing import isna

from pandas.core import ops
from pandas.core.algorithms import factorize_array, unique
from pandas.core.algorithms import factorize_array, isin, unique
from pandas.core.missing import get_fill_func
from pandas.core.sorting import nargminmax, nargsort

Expand Down Expand Up @@ -78,6 +78,7 @@ class ExtensionArray:
factorize
fillna
equals
isin
isna
ravel
repeat
Expand Down Expand Up @@ -833,6 +834,22 @@ def equals(self, other: object) -> bool:
equal_na = self.isna() & other.isna()
return bool((equal_values | equal_na).all())

def isin(self, values) -> np.ndarray:
"""
Pointwise comparison for set containment in the given values.

Roughly equivalent to `np.array([x in values for x in self])`

Parameters
----------
values : Sequence

Returns
-------
np.ndarray[bool]
"""
return isin(np.asarray(self), values)

def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
"""
Return an array and missing value suitable for factorization.
Expand Down
10 changes: 9 additions & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,15 @@
from pandas.core.dtypes.missing import isna, notna

from pandas.core import nanops
from pandas.core.algorithms import factorize_array, take
from pandas.core.algorithms import factorize_array, isin, take
from pandas.core.array_algos import masked_reductions
from pandas.core.arraylike import OpsMixin
from pandas.core.arrays import ExtensionArray
from pandas.core.indexers import check_array_indexer

if TYPE_CHECKING:
from pandas import Series
from pandas.core.arrays import BooleanArray


BaseMaskedArrayT = TypeVar("BaseMaskedArrayT", bound="BaseMaskedArray")
Expand Down Expand Up @@ -299,6 +300,13 @@ def take(

return type(self)(result, mask, copy=False)

def isin(self, values) -> "BooleanArray":

from pandas.core.arrays import BooleanArray

result = isin(self._data, values)
return BooleanArray(result, self._mask.copy(), copy=False)

def copy(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
data, mask = self._data, self._mask
data = data.copy()
Expand Down