Skip to content

Commit b51a58d

Browse files
committed
PERF: expand UInt64Index benchmark coverage
1 parent 8ad9867 commit b51a58d

File tree

2 files changed

+53
-34
lines changed

2 files changed

+53
-34
lines changed

asv_bench/benchmarks/algorithms.py

+51-32
Original file line numberDiff line numberDiff line change
@@ -16,56 +16,75 @@
1616

1717
class Factorize(object):
1818

19-
params = [True, False]
20-
param_names = ['sort']
19+
params = [[True, False], ['int', 'uint', 'float', 'string']]
20+
param_names = ['sort', 'dtype']
2121

22-
def setup(self, sort):
22+
def setup(self, sort, dtype):
2323
N = 10**5
24-
self.int_idx = pd.Int64Index(np.arange(N).repeat(5))
25-
self.float_idx = pd.Float64Index(np.random.randn(N).repeat(5))
26-
self.string_idx = tm.makeStringIndex(N)
24+
data = {'int': pd.Int64Index(np.arange(N).repeat(5)),
25+
'uint': pd.UInt64Index(np.arange(N).repeat(5)),
26+
'float': pd.Float64Index(np.random.randn(N).repeat(5)),
27+
'string': tm.makeStringIndex(N).repeat(5)}
28+
self.idx = data[dtype]
2729

28-
def time_factorize_int(self, sort):
29-
self.int_idx.factorize(sort=sort)
30+
def time_factorize(self, sort, dtype):
31+
self.idx.factorize(sort=sort)
3032

31-
def time_factorize_float(self, sort):
32-
self.float_idx.factorize(sort=sort)
3333

34-
def time_factorize_string(self, sort):
35-
self.string_idx.factorize(sort=sort)
34+
class FactorizeUnique(object):
3635

36+
params = [[True, False], ['int', 'uint', 'float', 'string']]
37+
param_names = ['sort', 'dtype']
3738

38-
class Duplicated(object):
39+
def setup(self, sort, dtype):
40+
N = 10**5
41+
data = {'int': pd.Int64Index(np.arange(N)),
42+
'uint': pd.UInt64Index(np.arange(N)),
43+
'float': pd.Float64Index(np.arange(N)),
44+
'string': tm.makeStringIndex(N)}
45+
self.idx = data[dtype]
46+
assert self.idx.is_unique
3947

40-
params = ['first', 'last', False]
41-
param_names = ['keep']
48+
def time_factorize(self, sort, dtype):
49+
self.idx.factorize(sort=sort)
4250

43-
def setup(self, keep):
44-
N = 10**5
45-
self.int_idx = pd.Int64Index(np.arange(N).repeat(5))
46-
self.float_idx = pd.Float64Index(np.random.randn(N).repeat(5))
47-
self.string_idx = tm.makeStringIndex(N)
4851

49-
def time_duplicated_int(self, keep):
50-
self.int_idx.duplicated(keep=keep)
52+
class Duplicated(object):
53+
54+
params = [['first', 'last', False], ['int', 'uint', 'float', 'string']]
55+
param_names = ['keep', 'dtype']
5156

52-
def time_duplicated_float(self, keep):
53-
self.float_idx.duplicated(keep=keep)
57+
def setup(self, keep, dtype):
58+
N = 10**5
59+
data = {'int': pd.Int64Index(np.arange(N).repeat(5)),
60+
'uint': pd.UInt64Index(np.arange(N).repeat(5)),
61+
'float': pd.Float64Index(np.random.randn(N).repeat(5)),
62+
'string': tm.makeStringIndex(N).repeat(5)}
63+
self.idx = data[dtype]
64+
# cache is_unique
65+
self.idx.is_unique
5466

55-
def time_duplicated_string(self, keep):
56-
self.string_idx.duplicated(keep=keep)
67+
def time_duplicated(self, keep, dtype):
68+
self.idx.duplicated(keep=keep)
5769

5870

5971
class DuplicatedUniqueIndex(object):
6072

61-
def setup(self):
62-
N = 10**5
63-
self.idx_int_dup = pd.Int64Index(np.arange(N * 5))
73+
params = ['int', 'uint', 'float', 'string']
74+
param_names = ['dtype']
75+
76+
def setup(self, dtype):
77+
N = 10**5 * 5
78+
data = {'int': pd.Int64Index(np.arange(N)),
79+
'uint': pd.UInt64Index(np.arange(N)),
80+
'float': pd.Float64Index(np.random.randn(N)),
81+
'string': tm.makeStringIndex(N)}
82+
self.idx = data[dtype]
6483
# cache is_unique
65-
self.idx_int_dup.is_unique
84+
self.idx.is_unique
6685

67-
def time_duplicated_unique_int(self):
68-
self.idx_int_dup.duplicated()
86+
def time_duplicated_unique(self, dtype):
87+
self.idx.duplicated()
6988

7089

7190
class Match(object):

asv_bench/benchmarks/series_methods.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def time_constructor(self, data):
2323

2424
class IsIn(object):
2525

26-
params = ['int64', 'object']
26+
params = ['int64', 'uint64', 'object']
2727
param_names = ['dtype']
2828

2929
def setup(self, dtype):
@@ -150,7 +150,7 @@ def time_clip(self):
150150

151151
class ValueCounts(object):
152152

153-
params = ['int', 'float', 'object']
153+
params = ['int', 'uint', 'float', 'object']
154154
param_names = ['dtype']
155155

156156
def setup(self, dtype):

0 commit comments

Comments
 (0)