forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhash_functions.py
76 lines (54 loc) · 1.93 KB
/
hash_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import numpy as np
import pandas as pd
class UniqueForLargePyObjectInts:
def setup(self):
lst = [x << 32 for x in range(5000)]
self.arr = np.array(lst, dtype=np.object_)
def time_unique(self):
pd.unique(self.arr)
class Float64GroupIndex:
# GH28303
def setup(self):
self.df = pd.date_range(
start="1/1/2018", end="1/2/2018", periods=10**6
).to_frame()
self.group_index = np.round(self.df.index.astype(int) / 10**9)
def time_groupby(self):
self.df.groupby(self.group_index).last()
class UniqueAndFactorizeArange:
params = range(4, 16)
param_names = ["exponent"]
def setup(self, exponent):
a = np.arange(10**4, dtype="float64")
self.a2 = (a + 10**exponent).repeat(100)
def time_factorize(self, exponent):
pd.factorize(self.a2)
def time_unique(self, exponent):
pd.unique(self.a2)
class NumericSeriesIndexing:
params = [
(pd.Int64Index, pd.UInt64Index, pd.Float64Index),
(10**4, 10**5, 5 * 10**5, 10**6, 5 * 10**6),
]
param_names = ["index_dtype", "N"]
def setup(self, index, N):
vals = np.array(list(range(55)) + [54] + list(range(55, N - 1)))
indices = index(vals)
self.data = pd.Series(np.arange(N), index=indices)
def time_loc_slice(self, index, N):
# trigger building of mapping
self.data.loc[:800]
class NumericSeriesIndexingShuffled:
params = [
(pd.Int64Index, pd.UInt64Index, pd.Float64Index),
(10**4, 10**5, 5 * 10**5, 10**6, 5 * 10**6),
]
param_names = ["index_dtype", "N"]
def setup(self, index, N):
vals = np.array(list(range(55)) + [54] + list(range(55, N - 1)))
np.random.shuffle(vals)
indices = index(vals)
self.data = pd.Series(np.arange(N), index=indices)
def time_loc_slice(self, index, N):
# trigger building of mapping
self.data.loc[:800]