|
16 | 16 |
|
17 | 17 | class Factorize(object):
|
18 | 18 |
|
19 |
| - params = [True, False] |
20 |
| - param_names = ['sort'] |
| 19 | + params = [[True, False], ['int', 'uint', 'float', 'string']] |
| 20 | + param_names = ['sort', 'dtype'] |
21 | 21 |
|
22 |
| - def setup(self, sort): |
| 22 | + def setup(self, sort, dtype): |
23 | 23 | N = 10**5
|
24 |
| - self.int_idx = pd.Int64Index(np.arange(N).repeat(5)) |
25 |
| - self.float_idx = pd.Float64Index(np.random.randn(N).repeat(5)) |
26 |
| - self.string_idx = tm.makeStringIndex(N) |
| 24 | + data = {'int': pd.Int64Index(np.arange(N).repeat(5)), |
| 25 | + 'uint': pd.UInt64Index(np.arange(N).repeat(5)), |
| 26 | + 'float': pd.Float64Index(np.random.randn(N).repeat(5)), |
| 27 | + 'string': tm.makeStringIndex(N).repeat(5)} |
| 28 | + self.idx = data[dtype] |
27 | 29 |
|
28 |
| - def time_factorize_int(self, sort): |
29 |
| - self.int_idx.factorize(sort=sort) |
| 30 | + def time_factorize(self, sort, dtype): |
| 31 | + self.idx.factorize(sort=sort) |
30 | 32 |
|
31 |
| - def time_factorize_float(self, sort): |
32 |
| - self.float_idx.factorize(sort=sort) |
33 | 33 |
|
34 |
| - def time_factorize_string(self, sort): |
35 |
| - self.string_idx.factorize(sort=sort) |
| 34 | +class FactorizeUnique(object): |
36 | 35 |
|
| 36 | + params = [[True, False], ['int', 'uint', 'float', 'string']] |
| 37 | + param_names = ['sort', 'dtype'] |
37 | 38 |
|
38 |
| -class Duplicated(object): |
39 |
| - |
40 |
| - params = ['first', 'last', False] |
41 |
| - param_names = ['keep'] |
42 |
| - |
43 |
| - def setup(self, keep): |
| 39 | + def setup(self, sort, dtype): |
44 | 40 | N = 10**5
|
45 |
| - self.int_idx = pd.Int64Index(np.arange(N).repeat(5)) |
46 |
| - self.float_idx = pd.Float64Index(np.random.randn(N).repeat(5)) |
47 |
| - self.string_idx = tm.makeStringIndex(N) |
48 |
| - |
49 |
| - def time_duplicated_int(self, keep): |
50 |
| - self.int_idx.duplicated(keep=keep) |
| 41 | + data = {'int': pd.Int64Index(np.arange(N)), |
| 42 | + 'uint': pd.UInt64Index(np.arange(N)), |
| 43 | + 'float': pd.Float64Index(np.arange(N)), |
| 44 | + 'string': tm.makeStringIndex(N)} |
| 45 | + self.idx = data[dtype] |
| 46 | + assert self.idx.is_unique |
51 | 47 |
|
52 |
| - def time_duplicated_float(self, keep): |
53 |
| - self.float_idx.duplicated(keep=keep) |
| 48 | + def time_factorize(self, sort, dtype): |
| 49 | + self.idx.factorize(sort=sort) |
54 | 50 |
|
55 |
| - def time_duplicated_string(self, keep): |
56 |
| - self.string_idx.duplicated(keep=keep) |
57 | 51 |
|
| 52 | +class Duplicated(object): |
58 | 53 |
|
59 |
| -class DuplicatedUniqueIndex(object): |
| 54 | + params = [['first', 'last', False], ['int', 'uint', 'float', 'string']] |
| 55 | + param_names = ['keep', 'dtype'] |
60 | 56 |
|
61 |
| - def setup(self): |
| 57 | + def setup(self, keep, dtype): |
62 | 58 | N = 10**5
|
63 |
| - self.idx_int_dup = pd.Int64Index(np.arange(N * 5)) |
| 59 | + data = {'int': pd.Int64Index(np.arange(N).repeat(5)), |
| 60 | + 'uint': pd.UInt64Index(np.arange(N).repeat(5)), |
| 61 | + 'float': pd.Float64Index(np.random.randn(N).repeat(5)), |
| 62 | + 'string': tm.makeStringIndex(N).repeat(5)} |
| 63 | + self.idx = data[dtype] |
64 | 64 | # cache is_unique
|
65 |
| - self.idx_int_dup.is_unique |
| 65 | + self.idx.is_unique |
| 66 | + |
| 67 | + def time_duplicated(self, keep, dtype): |
| 68 | + self.idx.duplicated(keep=keep) |
| 69 | + |
66 | 70 |
|
67 |
| - def time_duplicated_unique_int(self): |
68 |
| - self.idx_int_dup.duplicated() |
| 71 | +class DuplicatedUniqueIndex(object): |
69 | 72 |
|
| 73 | + params = ['int', 'uint', 'float', 'string'] |
| 74 | + param_names = ['dtype'] |
70 | 75 |
|
71 |
| -class Match(object): |
| 76 | + def setup(self, dtype): |
| 77 | + N = 10**5 |
| 78 | + data = {'int': pd.Int64Index(np.arange(N)), |
| 79 | + 'uint': pd.UInt64Index(np.arange(N)), |
| 80 | + 'float': pd.Float64Index(np.random.randn(N)), |
| 81 | + 'string': tm.makeStringIndex(N)} |
| 82 | + self.idx = data[dtype] |
| 83 | + # cache is_unique |
| 84 | + self.idx.is_unique |
72 | 85 |
|
73 |
| - def setup(self): |
74 |
| - self.uniques = tm.makeStringIndex(1000).values |
75 |
| - self.all = self.uniques.repeat(10) |
| 86 | + def time_duplicated_unique(self, dtype): |
| 87 | + self.idx.duplicated() |
76 | 88 |
|
77 | 89 |
|
78 | 90 | class Hashing(object):
|
@@ -113,4 +125,21 @@ def time_series_dates(self, df):
|
113 | 125 | hashing.hash_pandas_object(df['dates'])
|
114 | 126 |
|
115 | 127 |
|
| 128 | +class Quantile(object): |
| 129 | + params = [[0, 0.5, 1], |
| 130 | + ['linear', 'nearest', 'lower', 'higher', 'midpoint'], |
| 131 | + ['float', 'int', 'uint']] |
| 132 | + param_names = ['quantile', 'interpolation', 'dtype'] |
| 133 | + |
| 134 | + def setup(self, quantile, interpolation, dtype): |
| 135 | + N = 10**5 |
| 136 | + data = {'int': np.arange(N), |
| 137 | + 'uint': np.arange(N).astype(np.uint64), |
| 138 | + 'float': np.random.randn(N)} |
| 139 | + self.idx = pd.Series(data[dtype].repeat(5)) |
| 140 | + |
| 141 | + def time_quantile(self, quantile, interpolation, dtype): |
| 142 | + self.idx.quantile(quantile, interpolation=interpolation) |
| 143 | + |
| 144 | + |
116 | 145 | from .pandas_vb_common import setup # noqa: F401
|
0 commit comments