forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstat_ops.py
109 lines (74 loc) · 3.16 KB
/
stat_ops.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import numpy as np
import pandas as pd
ops = ['mean', 'sum', 'median', 'std', 'skew', 'kurt', 'mad', 'prod', 'sem',
'var']
class FrameOps(object):
params = [ops, ['float', 'int'], [0, 1], [True, False]]
param_names = ['op', 'dtype', 'axis', 'use_bottleneck']
def setup(self, op, dtype, axis, use_bottleneck):
df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
try:
pd.options.compute.use_bottleneck = use_bottleneck
except TypeError:
from pandas.core import nanops
nanops._USE_BOTTLENECK = use_bottleneck
self.df_func = getattr(df, op)
def time_op(self, op, dtype, axis, use_bottleneck):
self.df_func(axis=axis)
class FrameMultiIndexOps(object):
params = ([0, 1, [0, 1]], ops)
param_names = ['level', 'op']
def setup(self, level, op):
levels = [np.arange(10), np.arange(100), np.arange(100)]
labels = [np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)]
index = pd.MultiIndex(levels=levels, labels=labels)
df = pd.DataFrame(np.random.randn(len(index), 4), index=index)
self.df_func = getattr(df, op)
def time_op(self, level, op):
self.df_func(level=level)
class SeriesOps(object):
params = [ops, ['float', 'int'], [True, False]]
param_names = ['op', 'dtype', 'use_bottleneck']
def setup(self, op, dtype, use_bottleneck):
s = pd.Series(np.random.randn(100000)).astype(dtype)
try:
pd.options.compute.use_bottleneck = use_bottleneck
except TypeError:
from pandas.core import nanops
nanops._USE_BOTTLENECK = use_bottleneck
self.s_func = getattr(s, op)
def time_op(self, op, dtype, use_bottleneck):
self.s_func()
class SeriesMultiIndexOps(object):
params = ([0, 1, [0, 1]], ops)
param_names = ['level', 'op']
def setup(self, level, op):
levels = [np.arange(10), np.arange(100), np.arange(100)]
labels = [np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)]
index = pd.MultiIndex(levels=levels, labels=labels)
s = pd.Series(np.random.randn(len(index)), index=index)
self.s_func = getattr(s, op)
def time_op(self, level, op):
self.s_func(level=level)
class Rank(object):
params = [['DataFrame', 'Series'], [True, False]]
param_names = ['constructor', 'pct']
def setup(self, constructor, pct):
values = np.random.randn(10**5)
self.data = getattr(pd, constructor)(values)
def time_rank(self, constructor, pct):
self.data.rank(pct=pct)
def time_average_old(self, constructor, pct):
self.data.rank(pct=pct) / len(self.data)
class Correlation(object):
params = ['spearman', 'kendall', 'pearson']
param_names = ['method']
def setup(self, method):
self.df = pd.DataFrame(np.random.randn(1000, 30))
def time_corr(self, method):
self.df.corr(method=method)
from .pandas_vb_common import setup # noqa: F401