forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmultiindex_object.py
149 lines (113 loc) · 4.04 KB
/
multiindex_object.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import string
import numpy as np
import pandas.util.testing as tm
from pandas import date_range, MultiIndex, DataFrame
class GetLoc:
def setup(self):
self.mi_large = MultiIndex.from_product(
[np.arange(1000), np.arange(20), list(string.ascii_letters)],
names=["one", "two", "three"],
)
self.mi_med = MultiIndex.from_product(
[np.arange(1000), np.arange(10), list("A")], names=["one", "two", "three"]
)
self.mi_small = MultiIndex.from_product(
[np.arange(100), list("A"), list("A")], names=["one", "two", "three"]
)
def time_large_get_loc(self):
self.mi_large.get_loc((999, 19, "Z"))
def time_large_get_loc_warm(self):
for _ in range(1000):
self.mi_large.get_loc((999, 19, "Z"))
def time_med_get_loc(self):
self.mi_med.get_loc((999, 9, "A"))
def time_med_get_loc_warm(self):
for _ in range(1000):
self.mi_med.get_loc((999, 9, "A"))
def time_string_get_loc(self):
self.mi_small.get_loc((99, "A", "A"))
def time_small_get_loc_warm(self):
for _ in range(1000):
self.mi_small.get_loc((99, "A", "A"))
class Duplicates:
def setup(self):
size = 65536
arrays = [np.random.randint(0, 8192, size), np.random.randint(0, 1024, size)]
mask = np.random.rand(size) < 0.1
self.mi_unused_levels = MultiIndex.from_arrays(arrays)
self.mi_unused_levels = self.mi_unused_levels[mask]
def time_remove_unused_levels(self):
self.mi_unused_levels.remove_unused_levels()
class Integer:
def setup(self):
self.mi_int = MultiIndex.from_product(
[np.arange(1000), np.arange(1000)], names=["one", "two"]
)
self.obj_index = np.array(
[
(0, 10),
(0, 11),
(0, 12),
(0, 13),
(0, 14),
(0, 15),
(0, 16),
(0, 17),
(0, 18),
(0, 19),
],
dtype=object,
)
def time_get_indexer(self):
self.mi_int.get_indexer(self.obj_index)
def time_is_monotonic(self):
self.mi_int.is_monotonic
class Duplicated:
def setup(self):
n, k = 200, 5000
levels = [np.arange(n), tm.makeStringIndex(n).values, 1000 + np.arange(n)]
codes = [np.random.choice(n, (k * n)) for lev in levels]
self.mi = MultiIndex(levels=levels, codes=codes)
def time_duplicated(self):
self.mi.duplicated()
class Sortlevel:
def setup(self):
n = 1182720
low, high = -4096, 4096
arrs = [
np.repeat(np.random.randint(low, high, (n // k)), k)
for k in [11, 7, 5, 3, 1]
]
self.mi_int = MultiIndex.from_arrays(arrs)[np.random.permutation(n)]
a = np.repeat(np.arange(100), 1000)
b = np.tile(np.arange(1000), 100)
self.mi = MultiIndex.from_arrays([a, b])
self.mi = self.mi.take(np.random.permutation(np.arange(100000)))
def time_sortlevel_int64(self):
self.mi_int.sortlevel()
def time_sortlevel_zero(self):
self.mi.sortlevel(0)
def time_sortlevel_one(self):
self.mi.sortlevel(1)
class Values:
def setup_cache(self):
level1 = range(1000)
level2 = date_range(start="1/1/2012", periods=100)
mi = MultiIndex.from_product([level1, level2])
return mi
def time_datetime_level_values_copy(self, mi):
mi.copy().values
def time_datetime_level_values_sliced(self, mi):
mi[:10].values
class CategoricalLevel:
def setup(self):
self.df = DataFrame(
{
"a": np.arange(1_000_000, dtype=np.int32),
"b": np.arange(1_000_000, dtype=np.int64),
"c": np.arange(1_000_000, dtype=float),
}
).astype({"a": "category", "b": "category"})
def time_categorical_level(self):
self.df.set_index(["a", "b"])
from .pandas_vb_common import setup # noqa: F401