forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinference.py
116 lines (81 loc) · 3.16 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import numpy as np
import pandas.util.testing as tm
from pandas import DataFrame, Series, to_numeric
from .pandas_vb_common import numeric_dtypes, lib
class NumericInferOps(object):
# from GH 7332
goal_time = 0.2
params = numeric_dtypes
param_names = ['dtype']
def setup(self, dtype):
N = 5 * 10**5
self.df = DataFrame({'A': np.arange(N).astype(dtype),
'B': np.arange(N).astype(dtype)})
def time_add(self, dtype):
self.df['A'] + self.df['B']
def time_subtract(self, dtype):
self.df['A'] - self.df['B']
def time_multiply(self, dtype):
self.df['A'] * self.df['B']
def time_divide(self, dtype):
self.df['A'] / self.df['B']
def time_modulo(self, dtype):
self.df['A'] % self.df['B']
class DateInferOps(object):
# from GH 7332
goal_time = 0.2
def setup_cache(self):
N = 5 * 10**5
df = DataFrame({'datetime64': np.arange(N).astype('datetime64[ms]')})
df['timedelta'] = df['datetime64'] - df['datetime64']
return df
def time_subtract_datetimes(self, df):
df['datetime64'] - df['datetime64']
def time_timedelta_plus_datetime(self, df):
df['timedelta'] + df['datetime64']
def time_add_timedeltas(self, df):
df['timedelta'] + df['timedelta']
class ToNumeric(object):
goal_time = 0.2
params = ['ignore', 'coerce']
param_names = ['errors']
def setup(self, errors):
N = 10000
self.float = Series(np.random.randn(N))
self.numstr = self.float.astype('str')
self.str = Series(tm.makeStringIndex(N))
def time_from_float(self, errors):
to_numeric(self.float, errors=errors)
def time_from_numeric_str(self, errors):
to_numeric(self.numstr, errors=errors)
def time_from_str(self, errors):
to_numeric(self.str, errors=errors)
class ToNumericDowncast(object):
param_names = ['dtype', 'downcast']
params = [['string-float', 'string-int', 'string-nint', 'datetime64',
'int-list', 'int32'],
[None, 'integer', 'signed', 'unsigned', 'float']]
N = 500000
N2 = int(N / 2)
data_dict = {'string-int': ['1'] * N2 + [2] * N2,
'string-nint': ['-1'] * N2 + [2] * N2,
'datetime64': np.repeat(np.array(['1970-01-01', '1970-01-02'],
dtype='datetime64[D]'), N),
'string-float': ['1.1'] * N2 + [2] * N2,
'int-list': [1] * N2 + [2] * N2,
'int32': np.repeat(np.int32(1), N)}
def setup(self, dtype, downcast):
self.data = self.data_dict[dtype]
def time_downcast(self, dtype, downcast):
to_numeric(self.data, downcast=downcast)
class MaybeConvertNumeric(object):
def setup_cache(self):
N = 10**6
arr = np.repeat([2**63], N) + np.arange(N).astype('uint64')
data = arr.astype(object)
data[1::2] = arr[1::2].astype(str)
data[-1] = -1
return data
def time_convert(self, data):
lib.maybe_convert_numeric(data, set(), coerce_numeric=False)
from .pandas_vb_common import setup # noqa: F401