|
16 | 16 |
|
17 | 17 | class Factorize(object):
|
18 | 18 |
|
19 |
| - params = [True, False] |
20 |
| - param_names = ['sort'] |
| 19 | + params = [[True, False], ['int', 'uint', 'float', 'string']] |
| 20 | + param_names = ['sort', 'dtype'] |
21 | 21 |
|
22 |
| - def setup(self, sort): |
| 22 | + def setup(self, sort, dtype): |
23 | 23 | N = 10**5
|
24 |
| - self.int_idx = pd.Int64Index(np.arange(N).repeat(5)) |
25 |
| - self.float_idx = pd.Float64Index(np.random.randn(N).repeat(5)) |
26 |
| - self.string_idx = tm.makeStringIndex(N) |
| 24 | + data = {'int': pd.Int64Index(np.arange(N).repeat(5)), |
| 25 | + 'uint': pd.UInt64Index(np.arange(N).repeat(5)), |
| 26 | + 'float': pd.Float64Index(np.random.randn(N).repeat(5)), |
| 27 | + 'string': tm.makeStringIndex(N).repeat(5)} |
| 28 | + self.idx = data[dtype] |
27 | 29 |
|
28 |
| - def time_factorize_int(self, sort): |
29 |
| - self.int_idx.factorize(sort=sort) |
| 30 | + def time_factorize(self, sort, dtype): |
| 31 | + self.idx.factorize(sort=sort) |
30 | 32 |
|
31 |
| - def time_factorize_float(self, sort): |
32 |
| - self.float_idx.factorize(sort=sort) |
33 | 33 |
|
34 |
| - def time_factorize_string(self, sort): |
35 |
| - self.string_idx.factorize(sort=sort) |
| 34 | +class FactorizeUnique(object): |
36 | 35 |
|
| 36 | + params = [[True, False], ['int', 'uint', 'float', 'string']] |
| 37 | + param_names = ['sort', 'dtype'] |
37 | 38 |
|
38 |
| -class Duplicated(object): |
| 39 | + def setup(self, sort, dtype): |
| 40 | + N = 10**5 |
| 41 | + data = {'int': pd.Int64Index(np.arange(N)), |
| 42 | + 'uint': pd.UInt64Index(np.arange(N)), |
| 43 | + 'float': pd.Float64Index(np.arange(N)), |
| 44 | + 'string': tm.makeStringIndex(N)} |
| 45 | + self.idx = data[dtype] |
| 46 | + assert self.idx.is_unique |
39 | 47 |
|
40 |
| - params = ['first', 'last', False] |
41 |
| - param_names = ['keep'] |
| 48 | + def time_factorize(self, sort, dtype): |
| 49 | + self.idx.factorize(sort=sort) |
42 | 50 |
|
43 |
| - def setup(self, keep): |
44 |
| - N = 10**5 |
45 |
| - self.int_idx = pd.Int64Index(np.arange(N).repeat(5)) |
46 |
| - self.float_idx = pd.Float64Index(np.random.randn(N).repeat(5)) |
47 |
| - self.string_idx = tm.makeStringIndex(N) |
48 | 51 |
|
49 |
| - def time_duplicated_int(self, keep): |
50 |
| - self.int_idx.duplicated(keep=keep) |
| 52 | +class Duplicated(object): |
| 53 | + |
| 54 | + params = [['first', 'last', False], ['int', 'uint', 'float', 'string']] |
| 55 | + param_names = ['keep', 'dtype'] |
51 | 56 |
|
52 |
| - def time_duplicated_float(self, keep): |
53 |
| - self.float_idx.duplicated(keep=keep) |
| 57 | + def setup(self, keep, dtype): |
| 58 | + N = 10**5 |
| 59 | + data = {'int': pd.Int64Index(np.arange(N).repeat(5)), |
| 60 | + 'uint': pd.UInt64Index(np.arange(N).repeat(5)), |
| 61 | + 'float': pd.Float64Index(np.random.randn(N).repeat(5)), |
| 62 | + 'string': tm.makeStringIndex(N).repeat(5)} |
| 63 | + self.idx = data[dtype] |
| 64 | + # cache is_unique |
| 65 | + self.idx.is_unique |
54 | 66 |
|
55 |
| - def time_duplicated_string(self, keep): |
56 |
| - self.string_idx.duplicated(keep=keep) |
| 67 | + def time_duplicated(self, keep, dtype): |
| 68 | + self.idx.duplicated(keep=keep) |
57 | 69 |
|
58 | 70 |
|
59 | 71 | class DuplicatedUniqueIndex(object):
|
60 | 72 |
|
61 |
| - def setup(self): |
62 |
| - N = 10**5 |
63 |
| - self.idx_int_dup = pd.Int64Index(np.arange(N * 5)) |
| 73 | + params = ['int', 'uint', 'float', 'string'] |
| 74 | + param_names = ['dtype'] |
| 75 | + |
| 76 | + def setup(self, dtype): |
| 77 | + N = 10**5 * 5 |
| 78 | + data = {'int': pd.Int64Index(np.arange(N)), |
| 79 | + 'uint': pd.UInt64Index(np.arange(N)), |
| 80 | + 'float': pd.Float64Index(np.random.randn(N)), |
| 81 | + 'string': tm.makeStringIndex(N)} |
| 82 | + self.idx = data[dtype] |
64 | 83 | # cache is_unique
|
65 |
| - self.idx_int_dup.is_unique |
| 84 | + self.idx.is_unique |
66 | 85 |
|
67 |
| - def time_duplicated_unique_int(self): |
68 |
| - self.idx_int_dup.duplicated() |
| 86 | + def time_duplicated_unique(self, dtype): |
| 87 | + self.idx.duplicated() |
69 | 88 |
|
70 | 89 |
|
71 | 90 | class Match(object):
|
|
0 commit comments