forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindexing_engines.py
102 lines (82 loc) · 3.24 KB
/
indexing_engines.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
"""
Benchmarks in this file depend exclusively on code in _libs/
If a PR does not edit anything in _libs, it is very unlikely that benchmarks
in this file will be affected.
"""
import numpy as np
from pandas._libs import index as libindex
def _get_numeric_engines():
engine_names = [
("Int64Engine", np.int64),
("Int32Engine", np.int32),
("Int16Engine", np.int16),
("Int8Engine", np.int8),
("UInt64Engine", np.uint64),
("UInt32Engine", np.uint32),
("UInt16engine", np.uint16),
("UInt8Engine", np.uint8),
("Float64Engine", np.float64),
("Float32Engine", np.float32),
]
return [
(getattr(libindex, engine_name), dtype)
for engine_name, dtype in engine_names
if hasattr(libindex, engine_name)
]
class NumericEngineIndexing:
params = [
_get_numeric_engines(),
["monotonic_incr", "monotonic_decr", "non_monotonic"],
[True, False],
[10**5, 2 * 10**6], # 2e6 is above SIZE_CUTOFF
]
param_names = ["engine_and_dtype", "index_type", "unique", "N"]
def setup(self, engine_and_dtype, index_type, unique, N):
engine, dtype = engine_and_dtype
if index_type == "monotonic_incr":
if unique:
arr = np.arange(N * 3, dtype=dtype)
else:
values = list([1] * N + [2] * N + [3] * N)
arr = np.array(values, dtype=dtype)
elif index_type == "monotonic_decr":
if unique:
arr = np.arange(N * 3, dtype=dtype)[::-1]
else:
values = list([1] * N + [2] * N + [3] * N)
arr = np.array(values, dtype=dtype)[::-1]
else:
assert index_type == "non_monotonic"
if unique:
arr = np.empty(N * 3, dtype=dtype)
arr[:N] = np.arange(N * 2, N * 3, dtype=dtype)
arr[N:] = np.arange(N * 2, dtype=dtype)
else:
arr = np.array([1, 2, 3] * N, dtype=dtype)
self.data = engine(arr)
# code belows avoids populating the mapping etc. while timing.
self.data.get_loc(2)
self.key_middle = arr[len(arr) // 2]
self.key_early = arr[2]
def time_get_loc(self, engine_and_dtype, index_type, unique, N):
self.data.get_loc(self.key_early)
def time_get_loc_near_middle(self, engine_and_dtype, index_type, unique, N):
# searchsorted performance may be different near the middle of a range
# vs near an endpoint
self.data.get_loc(self.key_middle)
class ObjectEngineIndexing:
params = [("monotonic_incr", "monotonic_decr", "non_monotonic")]
param_names = ["index_type"]
def setup(self, index_type):
N = 10**5
values = list("a" * N + "b" * N + "c" * N)
arr = {
"monotonic_incr": np.array(values, dtype=object),
"monotonic_decr": np.array(list(reversed(values)), dtype=object),
"non_monotonic": np.array(list("abc") * N, dtype=object),
}[index_type]
self.data = libindex.ObjectEngine(arr)
# code belows avoids populating the mapping etc. while timing.
self.data.get_loc("b")
def time_get_loc(self, index_type):
self.data.get_loc("b")