|
8 | 8 |
|
9 | 9 | from pandas import compat, lib, tslib, _np_version_under1p8
|
10 | 10 | from pandas.types.cast import _maybe_promote
|
11 |
| -from pandas.types.generic import ABCPeriodIndex, ABCDatetimeIndex |
| 11 | +from pandas.types.generic import (ABCSeries, ABCIndex, ABCPeriodIndex, |
| 12 | + ABCDatetimeIndex) |
12 | 13 | from pandas.types.common import (is_integer_dtype,
|
13 | 14 | is_int64_dtype,
|
14 | 15 | is_categorical_dtype,
|
@@ -448,6 +449,55 @@ def _value_counts_arraylike(values, dropna=True):
|
448 | 449 | return keys, counts
|
449 | 450 |
|
450 | 451 |
|
| 452 | +def duplicated(values, keep='first'): |
| 453 | + """ |
| 454 | + Return boolean ndarray denoting duplicate values |
| 455 | +
|
| 456 | + .. versionadded:: 0.19.0 |
| 457 | +
|
| 458 | + Parameters |
| 459 | + ---------- |
| 460 | + keep : {'first', 'last', False}, default 'first' |
| 461 | + - ``first`` : Mark duplicates as ``True`` except for the first |
| 462 | + occurrence. |
| 463 | + - ``last`` : Mark duplicates as ``True`` except for the last |
| 464 | + occurrence. |
| 465 | + - False : Mark all duplicates as ``True``. |
| 466 | +
|
| 467 | + Returns |
| 468 | + ------- |
| 469 | + duplicated : ndarray |
| 470 | + """ |
| 471 | + |
| 472 | + dtype = values.dtype |
| 473 | + |
| 474 | + # no need to revert to original type |
| 475 | + if is_datetime_or_timedelta_dtype(dtype) or is_datetimetz(dtype): |
| 476 | + if isinstance(values, (ABCSeries, ABCIndex)): |
| 477 | + values = values.values.view(np.int64) |
| 478 | + else: |
| 479 | + values = values.view(np.int64) |
| 480 | + elif is_period_arraylike(values): |
| 481 | + from pandas.tseries.period import PeriodIndex |
| 482 | + values = PeriodIndex(values).asi8 |
| 483 | + elif is_categorical_dtype(dtype): |
| 484 | + values = values.values.codes |
| 485 | + elif isinstance(values, (ABCSeries, ABCIndex)): |
| 486 | + values = values.values |
| 487 | + |
| 488 | + if is_integer_dtype(dtype): |
| 489 | + values = _ensure_int64(values) |
| 490 | + duplicated = htable.duplicated_int64(values, keep=keep) |
| 491 | + elif is_float_dtype(dtype): |
| 492 | + values = _ensure_float64(values) |
| 493 | + duplicated = htable.duplicated_float64(values, keep=keep) |
| 494 | + else: |
| 495 | + values = _ensure_object(values) |
| 496 | + duplicated = htable.duplicated_object(values, keep=keep) |
| 497 | + |
| 498 | + return duplicated |
| 499 | + |
| 500 | + |
451 | 501 | def mode(values):
|
452 | 502 | """Returns the mode or mode(s) of the passed Series or ndarray (sorted)"""
|
453 | 503 | # must sort because hash order isn't necessarily defined.
|
|
0 commit comments