Skip to content

Commit c748de0

Browse files
TomAugspurgerjreback
authored andcommitted
ENH: ExtensionArray.unique (#19869)
1 parent e995b0d commit c748de0

File tree

4 files changed

+35
-8
lines changed

4 files changed

+35
-8
lines changed

pandas/core/algorithms.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010
maybe_promote, construct_1d_object_array_from_listlike)
1111
from pandas.core.dtypes.generic import (
1212
ABCSeries, ABCIndex,
13-
ABCIndexClass, ABCCategorical)
13+
ABCIndexClass)
1414
from pandas.core.dtypes.common import (
15+
is_array_like,
1516
is_unsigned_integer_dtype, is_signed_integer_dtype,
1617
is_integer_dtype, is_complex_dtype,
1718
is_object_dtype,
@@ -168,8 +169,7 @@ def _ensure_arraylike(values):
168169
"""
169170
ensure that we are arraylike if not already
170171
"""
171-
if not isinstance(values, (np.ndarray, ABCCategorical,
172-
ABCIndexClass, ABCSeries)):
172+
if not is_array_like(values):
173173
inferred = lib.infer_dtype(values)
174174
if inferred in ['mixed', 'string', 'unicode']:
175175
if isinstance(values, tuple):
@@ -353,11 +353,8 @@ def unique(values):
353353

354354
values = _ensure_arraylike(values)
355355

356-
# categorical is a fast-path
357-
# this will coerce Categorical, CategoricalIndex,
358-
# and category dtypes Series to same return of Category
359-
if is_categorical_dtype(values):
360-
values = getattr(values, '.values', values)
356+
if is_extension_array_dtype(values):
357+
# Dispatch to extension dtype's unique.
361358
return values.unique()
362359

363360
original = values

pandas/core/arrays/base.py

+12
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,18 @@ def isna(self):
236236
"""
237237
raise AbstractMethodError(self)
238238

239+
def unique(self):
240+
"""Compute the ExtensionArray of unique values.
241+
242+
Returns
243+
-------
244+
uniques : ExtensionArray
245+
"""
246+
from pandas import unique
247+
248+
uniques = unique(self.astype(object))
249+
return self._constructor_from_sequence(uniques)
250+
239251
# ------------------------------------------------------------------------
240252
# Indexing methods
241253
# ------------------------------------------------------------------------

pandas/tests/extension/base/methods.py

+11
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,14 @@ def test_count(self, data_missing):
3131
def test_apply_simple_series(self, data):
3232
result = pd.Series(data).apply(id)
3333
assert isinstance(result, pd.Series)
34+
35+
@pytest.mark.parametrize('box', [pd.Series, lambda x: x])
36+
@pytest.mark.parametrize('method', [lambda x: x.unique(), pd.unique])
37+
def test_unique(self, data, box, method):
38+
duplicated = box(data._constructor_from_sequence([data[0], data[0]]))
39+
40+
result = method(duplicated)
41+
42+
assert len(result) == 1
43+
assert isinstance(result, type(data))
44+
assert result[0] == duplicated[0]

pandas/tests/extension/json/array.py

+7
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,13 @@ def take(self, indexer, allow_fill=True, fill_value=None):
8888
def copy(self, deep=False):
8989
return type(self)(self.data[:])
9090

91+
def unique(self):
92+
# Parent method doesn't work since np.array will try to infer
93+
# a 2-dim object.
94+
return type(self)([
95+
dict(x) for x in list(set(tuple(d.items()) for d in self.data))
96+
])
97+
9198
@property
9299
def _na_value(self):
93100
return {}

0 commit comments

Comments
 (0)