Skip to content

Commit 08cd0b9

Browse files
committed
add tests for engines and add is_monotonic_uint(32|16)
1 parent f7b3487 commit 08cd0b9

File tree

6 files changed

+233
-6
lines changed

6 files changed

+233
-6
lines changed

asv_bench/benchmarks/indexing_engines.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import numpy as np
22

3-
from pandas._libs import index as li
3+
from pandas._libs import index as libindex
44

55

66
def _get_numeric_engines():
@@ -11,8 +11,9 @@ def _get_numeric_engines():
1111
('UInt16engine', np.uint16), ('UInt8Engine', np.uint8),
1212
('Float64Engine', np.float64), ('Float32Engine', np.float32),
1313
]
14-
return [(getattr(li, engine_name), dtype)
15-
for engine_name, dtype in engine_names if hasattr(li, engine_name)]
14+
return [(getattr(libindex, engine_name), dtype)
15+
for engine_name, dtype in engine_names
16+
if hasattr(libindex, engine_name)]
1617

1718

1819
class NumericEngineIndexing(object):
@@ -58,7 +59,7 @@ def setup(self, index_type):
5859
'non_monotonic': np.array(list('abc') * N, dtype=object),
5960
}[index_type]
6061

61-
self.data = li.ObjectEngine(lambda: arr, len(arr))
62+
self.data = libindex.ObjectEngine(lambda: arr, len(arr))
6263
# code belows avoids populating the mapping etc. while timing.
6364
self.data.get_loc('b')
6465

pandas/_libs/algos.pyx

+15
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,8 @@ ctypedef fused algos_t:
365365
int16_t
366366
int8_t
367367
uint64_t
368+
uint32_t
369+
uint16_t
368370
uint8_t
369371

370372

@@ -462,7 +464,12 @@ pad_float32 = pad["float32_t"]
462464
pad_object = pad["object"]
463465
pad_int64 = pad["int64_t"]
464466
pad_int32 = pad["int32_t"]
467+
pad_int16 = pad["int16_t"]
468+
pad_int8 = pad["int8_t"]
465469
pad_uint64 = pad["uint64_t"]
470+
pad_uint32 = pad["uint32_t"]
471+
pad_uint16 = pad["uint16_t"]
472+
pad_uint8 = pad["uint8_t"]
466473
pad_bool = pad["uint8_t"]
467474

468475

@@ -656,7 +663,12 @@ backfill_float32 = backfill["float32_t"]
656663
backfill_object = backfill["object"]
657664
backfill_int64 = backfill["int64_t"]
658665
backfill_int32 = backfill["int32_t"]
666+
backfill_int16 = backfill["int16_t"]
667+
backfill_int8 = backfill["int8_t"]
659668
backfill_uint64 = backfill["uint64_t"]
669+
backfill_uint32 = backfill["uint32_t"]
670+
backfill_uint16 = backfill["uint16_t"]
671+
backfill_uint8 = backfill["uint8_t"]
660672
backfill_bool = backfill["uint8_t"]
661673

662674

@@ -872,6 +884,9 @@ is_monotonic_int32 = is_monotonic["int32_t"]
872884
is_monotonic_int16 = is_monotonic["int16_t"]
873885
is_monotonic_int8 = is_monotonic["int8_t"]
874886
is_monotonic_uint64 = is_monotonic["uint64_t"]
887+
is_monotonic_uint32 = is_monotonic["uint32_t"]
888+
is_monotonic_uint16 = is_monotonic["uint16_t"]
889+
is_monotonic_uint8 = is_monotonic["uint8_t"]
875890
is_monotonic_bool = is_monotonic["uint8_t"]
876891

877892

pandas/_libs/algos_common_helper.pxi.in

+3
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ dtypes = [('float64', 'FLOAT64', 'float64'),
133133
('int16', 'INT16', 'int16'),
134134
('int32', 'INT32', 'int32'),
135135
('int64', 'INT64', 'int64'),
136+
('uint8', 'UINT8', 'uint8'),
137+
('uint16', 'UINT16', 'uint16'),
138+
('uint32', 'UINT32', 'uint32'),
136139
('uint64', 'UINT64', 'uint64'),
137140
# ('platform_int', 'INT', 'int_'),
138141
# ('object', 'OBJECT', 'object_'),

pandas/tests/indexes/test_category.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
# -*- coding: utf-8 -*-
22

33
import pytest
4+
import numpy as np
45

56
import pandas.util.testing as tm
67
from pandas.core.indexes.api import Index, CategoricalIndex
78
from pandas.core.dtypes.dtypes import CategoricalDtype
9+
from pandas._libs import index as libindex
810
from .common import Base
911

1012
from pandas.compat import range, PY3
1113

12-
import numpy as np
13-
1414
from pandas import Categorical, IntervalIndex, compat
1515
from pandas.util.testing import assert_almost_equal
1616
import pandas.core.config as cf
@@ -1117,3 +1117,23 @@ def test_take_invalid_kwargs(self):
11171117
msg = "the 'mode' parameter is not supported"
11181118
tm.assert_raises_regex(ValueError, msg, idx.take,
11191119
indices, mode='clip')
1120+
1121+
@pytest.mark.parametrize('dtype, engine_type', [
1122+
(np.int8, libindex.Int8Engine),
1123+
(np.int16, libindex.Int16Engine),
1124+
(np.int32, libindex.Int32Engine),
1125+
(np.int64, libindex.Int64Engine),
1126+
])
1127+
def test_engine_type(self, dtype, engine_type):
1128+
if dtype != np.int64:
1129+
# num. of uniques required to push CategoricalIndex.codes to a
1130+
# dtype (128 categories required for .codes dtype to be int16 etc.)
1131+
num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype]
1132+
ci = pd.CategoricalIndex(range(num_uniques))
1133+
else:
1134+
# having 2**32 - 2**31 categories would be very memory-intensive,
1135+
# so we cheat a bit with the dtype
1136+
ci = pd.CategoricalIndex(range(32768)) # == 2**16 - 2**(16 - 1)
1137+
ci.values._codes = ci.values._codes.astype('int64')
1138+
assert np.issubdtype(ci.codes.dtype, dtype)
1139+
assert isinstance(ci._engine, engine_type)

pandas/tests/indexing/conftest.py

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import numpy as np
2+
import pytest
3+
4+
from pandas._libs import index as libindex
5+
6+
7+
@pytest.fixture(params=[
8+
(libindex.Int64Engine, np.int64),
9+
(libindex.Int32Engine, np.int32),
10+
(libindex.Int16Engine, np.int16),
11+
(libindex.Int8Engine, np.int8),
12+
(libindex.UInt64Engine, np.uint64),
13+
(libindex.UInt32Engine, np.uint32),
14+
(libindex.UInt16Engine, np.uint16),
15+
(libindex.UInt8Engine, np.uint8),
16+
(libindex.Float64Engine, np.float64),
17+
(libindex.Float32Engine, np.float32),
18+
], ids=lambda x: x[0].__name__)
19+
def numeric_indexing_engine_type_and_dtype(request):
20+
return request.param
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
import numpy as np
2+
3+
import pandas.util.testing as tm
4+
from pandas import compat
5+
from pandas._libs import algos as libalgos, index as libindex
6+
7+
8+
class TestNumericEngine(object):
9+
def test_is_monotonic(self, numeric_indexing_engine_type_and_dtype):
10+
engine_type, dtype = numeric_indexing_engine_type_and_dtype
11+
num = 1000
12+
arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
13+
14+
# monotonic increasing
15+
engine = engine_type(lambda: arr, len(arr))
16+
assert engine.is_monotonic_increasing is True
17+
assert engine.is_monotonic_decreasing is False
18+
19+
# monotonic decreasing
20+
engine = engine_type(lambda: arr[::-1], len(arr))
21+
assert engine.is_monotonic_increasing is False
22+
assert engine.is_monotonic_decreasing is True
23+
24+
# neither monotonic increasing or decreasing
25+
arr = np.array([1] * num + [2] * num + [1] * num, dtype=dtype)
26+
engine = engine_type(lambda: arr[::-1], len(arr))
27+
assert engine.is_monotonic_increasing is False
28+
assert engine.is_monotonic_decreasing is False
29+
30+
def test_is_unique(self, numeric_indexing_engine_type_and_dtype):
31+
engine_type, dtype = numeric_indexing_engine_type_and_dtype
32+
33+
# unique
34+
arr = np.array([1, 3, 2], dtype=dtype)
35+
engine = engine_type(lambda: arr, len(arr))
36+
assert engine.is_unique is True
37+
38+
# not unique
39+
arr = np.array([1, 2, 1], dtype=dtype)
40+
engine = engine_type(lambda: arr, len(arr))
41+
assert engine.is_unique is False
42+
43+
def test_get_loc(self, numeric_indexing_engine_type_and_dtype):
44+
engine_type, dtype = numeric_indexing_engine_type_and_dtype
45+
46+
# unique
47+
arr = np.array([1, 2, 3], dtype=dtype)
48+
engine = engine_type(lambda: arr, len(arr))
49+
assert engine.get_loc(2) == 1
50+
51+
# monotonic
52+
num = 1000
53+
arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
54+
engine = engine_type(lambda: arr, len(arr))
55+
assert engine.get_loc(2) == slice(1000, 2000)
56+
57+
# not monotonic
58+
arr = np.array([1, 2, 3] * num, dtype=dtype)
59+
engine = engine_type(lambda: arr, len(arr))
60+
expected = np.array([False, True, False] * num, dtype=bool)
61+
result = engine.get_loc(2)
62+
assert (result == expected).all()
63+
64+
def test_get_backfill_indexer(
65+
self, numeric_indexing_engine_type_and_dtype):
66+
engine_type, dtype = numeric_indexing_engine_type_and_dtype
67+
68+
arr = np.array([1, 5, 10], dtype=dtype)
69+
engine = engine_type(lambda: arr, len(arr))
70+
71+
new = np.array(compat.range(12), dtype=dtype)
72+
result = engine.get_backfill_indexer(new)
73+
74+
expected = libalgos.backfill(arr, new)
75+
tm.assert_numpy_array_equal(result, expected)
76+
77+
def test_get_pad_indexer(
78+
self, numeric_indexing_engine_type_and_dtype):
79+
engine_type, dtype = numeric_indexing_engine_type_and_dtype
80+
81+
arr = np.array([1, 5, 10], dtype=dtype)
82+
engine = engine_type(lambda: arr, len(arr))
83+
84+
new = np.array(compat.range(12), dtype=dtype)
85+
result = engine.get_pad_indexer(new)
86+
87+
expected = libalgos.pad(arr, new)
88+
tm.assert_numpy_array_equal(result, expected)
89+
90+
91+
class TestObjectEngine(object):
92+
engine_type = libindex.ObjectEngine
93+
dtype = np.object_
94+
values = list('abc')
95+
96+
def test_is_monotonic(self):
97+
98+
num = 1000
99+
arr = np.array(['a'] * num + ['a'] * num + ['c'] * num,
100+
dtype=self.dtype)
101+
102+
# monotonic increasing
103+
engine = self.engine_type(lambda: arr, len(arr))
104+
assert engine.is_monotonic_increasing is True
105+
assert engine.is_monotonic_decreasing is False
106+
107+
# monotonic decreasing
108+
engine = self.engine_type(lambda: arr[::-1], len(arr))
109+
assert engine.is_monotonic_increasing is False
110+
assert engine.is_monotonic_decreasing is True
111+
112+
# neither monotonic increasing or decreasing
113+
arr = np.array(['a'] * num + ['b'] * num + ['a'] * num,
114+
dtype=self.dtype)
115+
engine = self.engine_type(lambda: arr[::-1], len(arr))
116+
assert engine.is_monotonic_increasing is False
117+
assert engine.is_monotonic_decreasing is False
118+
119+
def test_is_unique(self):
120+
# unique
121+
arr = np.array(self.values, dtype=self.dtype)
122+
engine = self.engine_type(lambda: arr, len(arr))
123+
assert engine.is_unique is True
124+
125+
# not unique
126+
arr = np.array(['a', 'b', 'a'], dtype=self.dtype)
127+
engine = self.engine_type(lambda: arr, len(arr))
128+
assert engine.is_unique is False
129+
130+
def test_get_loc(self):
131+
# unique
132+
arr = np.array(self.values, dtype=self.dtype)
133+
engine = self.engine_type(lambda: arr, len(arr))
134+
assert engine.get_loc('b') == 1
135+
136+
# monotonic
137+
num = 1000
138+
arr = np.array(['a'] * num + ['b'] * num + ['c'] * num,
139+
dtype=self.dtype)
140+
engine = self.engine_type(lambda: arr, len(arr))
141+
assert engine.get_loc('b') == slice(1000, 2000)
142+
143+
# not monotonic
144+
arr = np.array(self.values * num, dtype=self.dtype)
145+
engine = self.engine_type(lambda: arr, len(arr))
146+
expected = np.array([False, True, False] * num, dtype=bool)
147+
result = engine.get_loc('b')
148+
assert (result == expected).all()
149+
150+
def test_get_backfill_indexer(self):
151+
arr = np.array(['a', 'e', 'j'], dtype=self.dtype)
152+
engine = self.engine_type(lambda: arr, len(arr))
153+
154+
new = np.array(list('abcdefghij'), dtype=self.dtype)
155+
result = engine.get_backfill_indexer(new)
156+
157+
expected = libalgos.backfill_object(arr, new)
158+
tm.assert_numpy_array_equal(result, expected)
159+
160+
def test_get_pad_indexer(self):
161+
arr = np.array(['a', 'e', 'j'], dtype=self.dtype)
162+
engine = self.engine_type(lambda: arr, len(arr))
163+
164+
new = np.array(list('abcdefghij'), dtype=self.dtype)
165+
result = engine.get_pad_indexer(new)
166+
167+
expected = libalgos.pad_object(arr, new)
168+
tm.assert_numpy_array_equal(result, expected)

0 commit comments

Comments
 (0)