Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

searchsorted, repeat broken off from #24024 #24461

Merged
merged 8 commits into from
Dec 28, 2018
136 changes: 136 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@
from pandas._libs.tslibs.timestamps import (
RoundTo, maybe_integer_op_deprecated, round_nsint64)
import pandas.compat as compat
from pandas.compat.numpy import function as nv
from pandas.errors import (
AbstractMethodError, NullFrequencyError, PerformanceWarning)
from pandas.util._decorators import Appender, Substitution, deprecate_kwarg
@@ -80,6 +81,80 @@ def _get_attributes_dict(self):
"""
return {k: getattr(self, k, None) for k in self._attributes}

@property
def _scalar_type(self):
# type: () -> Union[type, Tuple[type]]
"""The scalar associated with this datelike

* PeriodArray : Period
* DatetimeArray : Timestamp
* TimedeltaArray : Timedelta
"""
raise AbstractMethodError(self)

def _scalar_from_string(self, value):
# type: (str) -> Union[Period, Timestamp, Timedelta, NaTType]
"""
Construct a scalar type from a string.

Parameters
----------
value : str

Returns
-------
Period, Timestamp, or Timedelta, or NaT
Whatever the type of ``self._scalar_type`` is.

Notes
-----
This should call ``self._check_compatible_with`` before
unboxing the result.
"""
raise AbstractMethodError(self)

def _unbox_scalar(self, value):
# type: (Union[Period, Timestamp, Timedelta, NaTType]) -> int
"""
Unbox the integer value of a scalar `value`.

Parameters
----------
value : Union[Period, Timestamp, Timedelta]

Returns
-------
int

Examples
--------
>>> self._unbox_scalar(Timedelta('10s')) # DOCTEST: +SKIP
10000000000
"""
raise AbstractMethodError(self)

def _check_compatible_with(self, other):
# type: (Union[Period, Timestamp, Timedelta, NaTType]) -> None
# TODO: Scalar, array, or both?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This TODO can be removed I think. We've finalized the type IMO.

But that can be done on merge if we don't have any other commits to push.

"""
Verify that `self` and `other` are compatible.

* DatetimeArray verifies that the timezones (if any) match
* PeriodArray verifies that the freq matches
* Timedelta has no verification

In each case, NaT is considered compatible.

Parameters
----------
other

Raises
------
Exception
"""
raise AbstractMethodError(self)


class DatelikeOps(object):
"""
@@ -468,6 +543,67 @@ def _values_for_factorize(self):
def _from_factorized(cls, values, original):
return cls(values, dtype=original.dtype)

def _values_for_argsort(self):
return self._data

# ------------------------------------------------------------------
# Additional array methods
# These are not part of the EA API, but we implement them because
# pandas assumes they're there.

def searchsorted(self, v, side='left', sorter=None):
Copy link
Contributor

@TomAugspurger TomAugspurger Dec 28, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to be clear, the name for the positional argument hasn't been finalized yet. We're figuring out the relative value of matching NumPy vs. matching the rest of pandas here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well, as I have said multiple times, we are already using value in pandas, so -1 unless its that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apparently Jeff has decided :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will revert

"""
Find indices where elements should be inserted to maintain order.

Find the indices into a sorted array `self` such that, if the
corresponding elements in `value` were inserted before the indices,
the order of `self` would be preserved.

Parameters
----------
v : array_like
Values to insert into `self`.
side : {'left', 'right'}, optional
If 'left', the index of the first suitable location found is given.
If 'right', return the last such index. If there is no suitable
index, return either 0 or N (where N is the length of `self`).
sorter : 1-D array_like, optional
Optional array of integer indices that sort `self` into ascending
order. They are typically the result of ``np.argsort``.

Returns
-------
indices : array of ints
Array of insertion points with the same shape as `value`.
"""
if isinstance(v, compat.string_types):
v = self._scalar_from_string(v)

if not (isinstance(v, (self._scalar_type, type(self)))
or isna(v)):
raise ValueError("Unexpected type for 'value': {valtype}"
.format(valtype=type(v)))

self._check_compatible_with(v)
if isinstance(v, type(self)):
value = v.asi8
else:
value = self._unbox_scalar(v)

return self.asi8.searchsorted(value, side=side, sorter=sorter)

def repeat(self, repeats, *args, **kwargs):
"""
Repeat elements of an array.

See Also
--------
numpy.ndarray.repeat
"""
nv.validate_repeat(args, kwargs)
values = self._data.repeat(repeats)
return type(self)(values, dtype=self.dtype)

# ------------------------------------------------------------------
# Null Handling

21 changes: 21 additions & 0 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
@@ -170,6 +170,7 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin,
_data
"""
_typ = "datetimearray"
_scalar_type = Timestamp

# define my properties & methods for delegation
_bool_ops = ['is_month_start', 'is_month_end',
@@ -346,6 +347,26 @@ def _generate_range(cls, start, end, periods, freq, tz=None,

return cls._simple_new(index.asi8, freq=freq, tz=tz)

# -----------------------------------------------------------------
# DatetimeLike Interface

def _unbox_scalar(self, value):
if not isinstance(value, self._scalar_type) and value is not NaT:
raise ValueError("'value' should be a Timestamp.")
if not isna(value):
self._check_compatible_with(value)
return value.value

def _scalar_from_string(self, value):
return Timestamp(value, tz=self.tz)

def _check_compatible_with(self, other):
if other is NaT:
return
if not timezones.tz_compare(self.tz, other.tz):
raise ValueError("Timezones don't match. '{own} != {other}'"
.format(own=self.tz, other=other.tz))

# -----------------------------------------------------------------
# Descriptive Properties

22 changes: 22 additions & 0 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
@@ -139,6 +139,7 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin,
__array_priority__ = 1000
_attributes = ["freq"]
_typ = "periodarray" # ABCPeriodArray
_scalar_type = Period

# Names others delegate to us
_other_ops = []
@@ -242,7 +243,28 @@ def _generate_range(cls, start, end, periods, freq, fields):

return subarr, freq

# -----------------------------------------------------------------
# DatetimeLike Interface

def _unbox_scalar(self, value):
# type: (Union[Period, NaTType]) -> int
if value is NaT:
return value.value
elif isinstance(value, self._scalar_type):
if not isna(value):
self._check_compatible_with(value)
return value.ordinal
else:
raise ValueError("'value' should be a Period. Got '{val}' instead."
.format(val=value))

def _scalar_from_string(self, value):
# type: (str) -> Period
return Period(value, freq=self.freq)

def _check_compatible_with(self, other):
if other is NaT:
return
if self.freqstr != other.freqstr:
_raise_on_incompatible(self, other)

23 changes: 20 additions & 3 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
@@ -115,6 +115,7 @@ def wrapper(self, other):

class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps):
_typ = "timedeltaarray"
_scalar_type = Timedelta
__array_priority__ = 1000
# define my properties & methods for delegation
_other_ops = []
@@ -220,6 +221,22 @@ def _generate_range(cls, start, end, periods, freq, closed=None):

return cls._simple_new(index, freq=freq)

# ----------------------------------------------------------------
# DatetimeLike Interface

def _unbox_scalar(self, value):
if not isinstance(value, self._scalar_type) and value is not NaT:
raise ValueError("'value' should be a Timedelta.")
self._check_compatible_with(value)
return value.value

def _scalar_from_string(self, value):
return Timedelta(value)

def _check_compatible_with(self, other):
# we don't have anything to validate.
pass

# ----------------------------------------------------------------
# Array-Like / EA-Interface Methods

@@ -237,13 +254,13 @@ def _validate_fill_value(self, fill_value):
# ----------------------------------------------------------------
# Rendering Methods

def _format_native_types(self):
return self.astype(object)

def _formatter(self, boxed=False):
from pandas.io.formats.format import _get_format_timedelta64
return _get_format_timedelta64(self, box=True)

def _format_native_types(self):
return self.astype(object)

# ----------------------------------------------------------------
# Arithmetic Methods

1 change: 1 addition & 0 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
@@ -478,6 +478,7 @@ def repeat(self, repeats, axis=None):
nv.validate_repeat(tuple(), dict(axis=axis))
freq = self.freq if is_period_dtype(self) else None
return self._shallow_copy(self.asi8.repeat(repeats), freq=freq)
# TODO: dispatch to _eadata

@Appender(_index_shared_docs['where'] % _index_doc_kwargs)
def where(self, cond, other=None):
53 changes: 53 additions & 0 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
@@ -2,6 +2,8 @@
import numpy as np
import pytest

import pandas.compat as compat

import pandas as pd
from pandas.core.arrays import (
DatetimeArrayMixin as DatetimeArray, PeriodArray,
@@ -129,6 +131,57 @@ def test_concat_same_type(self):

tm.assert_index_equal(self.index_cls(result), expected)

def test_unbox_scalar(self):
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
arr = self.array_cls(data, freq='D')
result = arr._unbox_scalar(arr[0])
assert isinstance(result, (int, compat.long))

result = arr._unbox_scalar(pd.NaT)
assert isinstance(result, (int, compat.long))

with pytest.raises(ValueError):
arr._unbox_scalar('foo')

def test_check_compatible_with(self):
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
arr = self.array_cls(data, freq='D')

arr._check_compatible_with(arr[0])
arr._check_compatible_with(arr[:1])
arr._check_compatible_with(pd.NaT)

def test_scalar_from_string(self):
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
arr = self.array_cls(data, freq='D')
result = arr._scalar_from_string(str(arr[0]))
assert result == arr[0]

def test_searchsorted(self):
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
arr = self.array_cls(data, freq='D')

# scalar
result = arr.searchsorted(arr[1])
assert result == 1

result = arr.searchsorted(arr[2], side="right")
assert result == 3

# own-type
result = arr.searchsorted(arr[1:3])
expected = np.array([1, 2], dtype=np.int64)
tm.assert_numpy_array_equal(result, expected)

result = arr.searchsorted(arr[1:3], side="right")
expected = np.array([2, 3], dtype=np.int64)
tm.assert_numpy_array_equal(result, expected)

# Following numpy convention, NaT goes at the beginning
# (unlike NaN which goes at the end)
result = arr.searchsorted(pd.NaT)
assert result == 0


class TestDatetimeArray(SharedTests):
index_cls = pd.DatetimeIndex