Skip to content

Commit 34da8be

Browse files
fdroesslerjreback
authored andcommitted
[ENH] Move intersection functions for DatetimeIndex and TimedeltaIndex to Datetimelike and added new tests (#25913)
1 parent 66d6023 commit 34da8be

File tree

6 files changed

+192
-103
lines changed

6 files changed

+192
-103
lines changed

doc/source/whatsnew/v0.25.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ Other Enhancements
3333
- :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`)
3434
- :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behaviour of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`)
3535
- :meth:`RangeIndex.union` now supports the ``sort`` argument. If ``sort=False`` an unsorted ``Int64Index`` is always returned. ``sort=None`` is the default and returns a mononotically increasing ``RangeIndex`` if possible or a sorted ``Int64Index`` if not (:issue:`24471`)
36+
- :meth:`TimedeltaIndex.intersection` now also supports the ``sort`` keyword (:issue:`24471`)
3637
- :meth:`DataFrame.rename` now supports the ``errors`` argument to raise errors when attempting to rename nonexistent keys (:issue:`13473`)
3738
- :class:`RangeIndex` has gained :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop`, and :attr:`~RangeIndex.step` attributes (:issue:`25710`)
3839
- :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`)
@@ -275,9 +276,9 @@ Datetimelike
275276
Timedelta
276277
^^^^^^^^^
277278

279+
- Bug in :func:`TimedeltaIndex.intersection` where for non-monotonic indices in some cases an empty ``Index`` was returned when in fact an intersection existed (:issue:`25913`)
278280
- Bug with comparisons between :class:`Timedelta` and ``NaT`` raising ``TypeError`` (:issue:`26039`)
279281
-
280-
-
281282

282283
Timezones
283284
^^^^^^^^^

pandas/core/indexes/datetimelike.py

+57
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from pandas.core.tools.timedeltas import to_timedelta
2828

2929
import pandas.io.formats.printing as printing
30+
from pandas.tseries.frequencies import to_offset
3031

3132
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
3233

@@ -529,6 +530,62 @@ def isin(self, values):
529530

530531
return algorithms.isin(self.asi8, values.asi8)
531532

533+
def intersection(self, other, sort=False):
534+
self._validate_sort_keyword(sort)
535+
self._assert_can_do_setop(other)
536+
537+
if self.equals(other):
538+
return self._get_reconciled_name_object(other)
539+
540+
if len(self) == 0:
541+
return self.copy()
542+
if len(other) == 0:
543+
return other.copy()
544+
545+
if not isinstance(other, type(self)):
546+
result = Index.intersection(self, other, sort=sort)
547+
if isinstance(result, type(self)):
548+
if result.freq is None:
549+
result.freq = to_offset(result.inferred_freq)
550+
return result
551+
552+
elif (other.freq is None or self.freq is None or
553+
other.freq != self.freq or
554+
not other.freq.isAnchored() or
555+
(not self.is_monotonic or not other.is_monotonic)):
556+
result = Index.intersection(self, other, sort=sort)
557+
558+
# Invalidate the freq of `result`, which may not be correct at
559+
# this point, depending on the values.
560+
result.freq = None
561+
if hasattr(self, 'tz'):
562+
result = self._shallow_copy(result._values, name=result.name,
563+
tz=result.tz, freq=None)
564+
else:
565+
result = self._shallow_copy(result._values, name=result.name,
566+
freq=None)
567+
if result.freq is None:
568+
result.freq = to_offset(result.inferred_freq)
569+
return result
570+
571+
# to make our life easier, "sort" the two ranges
572+
if self[0] <= other[0]:
573+
left, right = self, other
574+
else:
575+
left, right = other, self
576+
577+
# after sorting, the intersection always starts with the right index
578+
# and ends with the index of which the last elements is smallest
579+
end = min(left[-1], right[-1])
580+
start = right[0]
581+
582+
if end < start:
583+
return type(self)(data=[])
584+
else:
585+
lslice = slice(*left.slice_locs(start, end))
586+
left_chunk = left.values[lslice]
587+
return self._shallow_copy(left_chunk)
588+
532589
@Appender(_index_shared_docs['repeat'] % _index_doc_kwargs)
533590
def repeat(self, repeats, axis=None):
534591
nv.validate_repeat(tuple(), dict(axis=axis))

pandas/core/indexes/datetimes.py

+7-56
Original file line numberDiff line numberDiff line change
@@ -607,14 +607,10 @@ def _fast_union(self, other, sort=None):
607607
else:
608608
return left
609609

610-
def _wrap_setop_result(self, other, result):
611-
name = get_op_result_name(self, other)
612-
return self._shallow_copy(result, name=name, freq=None, tz=self.tz)
613-
614610
def intersection(self, other, sort=False):
615611
"""
616-
Specialized intersection for DatetimeIndex objects. May be much faster
617-
than Index.intersection
612+
Specialized intersection for DatetimeIndex objects.
613+
May be much faster than Index.intersection
618614
619615
Parameters
620616
----------
@@ -631,58 +627,13 @@ def intersection(self, other, sort=False):
631627
632628
Returns
633629
-------
634-
y : Index or DatetimeIndex
630+
y : Index or DatetimeIndex or TimedeltaIndex
635631
"""
636-
self._validate_sort_keyword(sort)
637-
self._assert_can_do_setop(other)
638-
639-
if self.equals(other):
640-
return self._get_reconciled_name_object(other)
641-
642-
if not isinstance(other, DatetimeIndex):
643-
try:
644-
other = DatetimeIndex(other)
645-
except (TypeError, ValueError):
646-
pass
647-
result = Index.intersection(self, other, sort=sort)
648-
if isinstance(result, DatetimeIndex):
649-
if result.freq is None:
650-
result.freq = to_offset(result.inferred_freq)
651-
return result
652-
653-
elif (other.freq is None or self.freq is None or
654-
other.freq != self.freq or
655-
not other.freq.isAnchored() or
656-
(not self.is_monotonic or not other.is_monotonic)):
657-
result = Index.intersection(self, other, sort=sort)
658-
# Invalidate the freq of `result`, which may not be correct at
659-
# this point, depending on the values.
660-
result.freq = None
661-
result = self._shallow_copy(result._values, name=result.name,
662-
tz=result.tz, freq=None)
663-
if result.freq is None:
664-
result.freq = to_offset(result.inferred_freq)
665-
return result
666-
667-
if len(self) == 0:
668-
return self
669-
if len(other) == 0:
670-
return other
671-
# to make our life easier, "sort" the two ranges
672-
if self[0] <= other[0]:
673-
left, right = self, other
674-
else:
675-
left, right = other, self
676-
677-
end = min(left[-1], right[-1])
678-
start = right[0]
632+
return super(DatetimeIndex, self).intersection(other, sort=sort)
679633

680-
if end < start:
681-
return type(self)(data=[])
682-
else:
683-
lslice = slice(*left.slice_locs(start, end))
684-
left_chunk = left.values[lslice]
685-
return self._shallow_copy(left_chunk)
634+
def _wrap_setop_result(self, other, result):
635+
name = get_op_result_name(self, other)
636+
return self._shallow_copy(result, name=name, freq=None, tz=self.tz)
686637

687638
# --------------------------------------------------------------------
688639

pandas/core/indexes/period.py

+4
Original file line numberDiff line numberDiff line change
@@ -800,6 +800,10 @@ def join(self, other, how='left', level=None, return_indexers=False,
800800
return self._apply_meta(result), lidx, ridx
801801
return self._apply_meta(result)
802802

803+
@Appender(Index.intersection.__doc__)
804+
def intersection(self, other, sort=False):
805+
return Index.intersection(self, other, sort=sort)
806+
803807
def _assert_can_do_setop(self, other):
804808
super()._assert_can_do_setop(other)
805809

pandas/core/indexes/timedeltas.py

+28-46
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,34 @@ def join(self, other, how='left', level=None, return_indexers=False,
379379
return_indexers=return_indexers,
380380
sort=sort)
381381

382+
def intersection(self, other, sort=False):
383+
"""
384+
Specialized intersection for TimedeltaIndex objects.
385+
May be much faster than Index.intersection
386+
387+
Parameters
388+
----------
389+
other : TimedeltaIndex or array-like
390+
sort : False or None, default False
391+
Sort the resulting index if possible.
392+
393+
.. versionadded:: 0.24.0
394+
395+
.. versionchanged:: 0.24.1
396+
397+
Changed the default to ``False`` to match the behaviour
398+
from before 0.24.0.
399+
400+
.. versionchanged:: 0.25.0
401+
402+
The `sort` keyword is added
403+
404+
Returns
405+
-------
406+
y : Index or TimedeltaIndex
407+
"""
408+
return super(TimedeltaIndex, self).intersection(other, sort=sort)
409+
382410
def _wrap_joined_index(self, joined, other):
383411
name = get_op_result_name(self, other)
384412
if (isinstance(other, TimedeltaIndex) and self.freq == other.freq and
@@ -440,52 +468,6 @@ def _fast_union(self, other):
440468
else:
441469
return left
442470

443-
def intersection(self, other):
444-
"""
445-
Specialized intersection for TimedeltaIndex objects. May be much faster
446-
than Index.intersection
447-
448-
Parameters
449-
----------
450-
other : TimedeltaIndex or array-like
451-
452-
Returns
453-
-------
454-
y : Index or TimedeltaIndex
455-
"""
456-
self._assert_can_do_setop(other)
457-
458-
if self.equals(other):
459-
return self._get_reconciled_name_object(other)
460-
461-
if not isinstance(other, TimedeltaIndex):
462-
try:
463-
other = TimedeltaIndex(other)
464-
except (TypeError, ValueError):
465-
pass
466-
result = Index.intersection(self, other)
467-
return result
468-
469-
if len(self) == 0:
470-
return self
471-
if len(other) == 0:
472-
return other
473-
# to make our life easier, "sort" the two ranges
474-
if self[0] <= other[0]:
475-
left, right = self, other
476-
else:
477-
left, right = other, self
478-
479-
end = min(left[-1], right[-1])
480-
start = right[0]
481-
482-
if end < start:
483-
return type(self)(data=[])
484-
else:
485-
lslice = slice(*left.slice_locs(start, end))
486-
left_chunk = left.values[lslice]
487-
return self._shallow_copy(left_chunk)
488-
489471
def _maybe_promote(self, other):
490472
if other.inferred_type == 'timedelta':
491473
other = TimedeltaIndex(other)

pandas/tests/indexes/timedeltas/test_setops.py

+94
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
import numpy as np
2+
import pytest
23

34
import pandas as pd
45
from pandas import Int64Index, TimedeltaIndex, timedelta_range
56
import pandas.util.testing as tm
67

8+
from pandas.tseries.offsets import Hour
9+
710

811
class TestTimedeltaIndex:
912

@@ -73,3 +76,94 @@ def test_intersection_bug_1708(self):
7376
result = index_1 & index_2
7477
expected = timedelta_range('1 day 01:00:00', periods=3, freq='h')
7578
tm.assert_index_equal(result, expected)
79+
80+
@pytest.mark.parametrize("sort", [None, False])
81+
def test_intersection_equal(self, sort):
82+
# GH 24471 Test intersection outcome given the sort keyword
83+
# for equal indicies intersection should return the original index
84+
first = timedelta_range('1 day', periods=4, freq='h')
85+
second = timedelta_range('1 day', periods=4, freq='h')
86+
intersect = first.intersection(second, sort=sort)
87+
if sort is None:
88+
tm.assert_index_equal(intersect, second.sort_values())
89+
assert tm.equalContents(intersect, second)
90+
91+
# Corner cases
92+
inter = first.intersection(first, sort=sort)
93+
assert inter is first
94+
95+
@pytest.mark.parametrize("period_1, period_2", [(0, 4), (4, 0)])
96+
@pytest.mark.parametrize("sort", [None, False])
97+
def test_intersection_zero_length(self, period_1, period_2, sort):
98+
# GH 24471 test for non overlap the intersection should be zero length
99+
index_1 = timedelta_range('1 day', periods=period_1, freq='h')
100+
index_2 = timedelta_range('1 day', periods=period_2, freq='h')
101+
expected = timedelta_range('1 day', periods=0, freq='h')
102+
result = index_1.intersection(index_2, sort=sort)
103+
tm.assert_index_equal(result, expected)
104+
105+
@pytest.mark.parametrize('sort', [None, False])
106+
def test_zero_length_input_index(self, sort):
107+
# GH 24966 test for 0-len intersections are copied
108+
index_1 = timedelta_range('1 day', periods=0, freq='h')
109+
index_2 = timedelta_range('1 day', periods=3, freq='h')
110+
result = index_1.intersection(index_2, sort=sort)
111+
assert index_1 is not result
112+
assert index_2 is not result
113+
tm.assert_copy(result, index_1)
114+
115+
@pytest.mark.parametrize(
116+
"rng, expected",
117+
# if target has the same name, it is preserved
118+
[
119+
(timedelta_range('1 day', periods=5, freq='h', name='idx'),
120+
timedelta_range('1 day', periods=4, freq='h', name='idx')),
121+
# if target name is different, it will be reset
122+
(timedelta_range('1 day', periods=5, freq='h', name='other'),
123+
timedelta_range('1 day', periods=4, freq='h', name=None)),
124+
# if no overlap exists return empty index
125+
(timedelta_range('1 day', periods=10, freq='h', name='idx')[5:],
126+
TimedeltaIndex([], name='idx'))])
127+
@pytest.mark.parametrize("sort", [None, False])
128+
def test_intersection(self, rng, expected, sort):
129+
# GH 4690 (with tz)
130+
base = timedelta_range('1 day', periods=4, freq='h', name='idx')
131+
result = base.intersection(rng, sort=sort)
132+
if sort is None:
133+
expected = expected.sort_values()
134+
tm.assert_index_equal(result, expected)
135+
assert result.name == expected.name
136+
assert result.freq == expected.freq
137+
138+
@pytest.mark.parametrize(
139+
"rng, expected",
140+
# part intersection works
141+
[
142+
(TimedeltaIndex(['5 hour', '2 hour', '4 hour', '9 hour'],
143+
name='idx'),
144+
TimedeltaIndex(['2 hour', '4 hour'], name='idx')),
145+
# reordered part intersection
146+
(TimedeltaIndex(['2 hour', '5 hour', '5 hour', '1 hour'],
147+
name='other'),
148+
TimedeltaIndex(['1 hour', '2 hour'], name=None)),
149+
# reveresed index
150+
(TimedeltaIndex(['1 hour', '2 hour', '4 hour', '3 hour'],
151+
name='idx')[::-1],
152+
TimedeltaIndex(['1 hour', '2 hour', '4 hour', '3 hour'],
153+
name='idx'))])
154+
@pytest.mark.parametrize("sort", [None, False])
155+
def test_intersection_non_monotonic(self, rng, expected, sort):
156+
# 24471 non-monotonic
157+
base = TimedeltaIndex(['1 hour', '2 hour', '4 hour', '3 hour'],
158+
name='idx')
159+
result = base.intersection(rng, sort=sort)
160+
if sort is None:
161+
expected = expected.sort_values()
162+
tm.assert_index_equal(result, expected)
163+
assert result.name == expected.name
164+
165+
# if reveresed order, frequency is still the same
166+
if all(base == rng[::-1]) and sort is None:
167+
assert isinstance(result.freq, Hour)
168+
else:
169+
assert result.freq is None

0 commit comments

Comments
 (0)