Skip to content

Commit c6060a8

Browse files
CLN: clean benchmarks to get them running (#16025)
* fix lib and algos import * fix take_1d import * string uppercase -> ascii_uppercase (py3 compat) * sas test file path * fix datetools usage * fix hashing benchmarks * dict values py3 compat * avoid overflow by using higher freq * xrange -> range * fix xport path * revised hdfstore_bench to use new query syntax rename table variables * change default python version to 3.6
1 parent 0e2bbcf commit c6060a8

14 files changed

+68
-61
lines changed

asv_bench/asv.conf.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
// The Pythons you'd like to test against. If not provided, defaults
2727
// to the current version of Python used to run `asv`.
2828
// "pythons": ["2.7", "3.4"],
29-
"pythons": ["2.7"],
29+
"pythons": ["3.6"],
3030

3131
// The matrix of dependencies to test. Each key is the name of a
3232
// package (in PyPI) and the values are version numbers. An empty

asv_bench/benchmarks/algorithms.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
import pandas as pd
33
from pandas.util import testing as tm
44

5+
try:
6+
from pandas.tools.hashing import hash_pandas_object
7+
except ImportError:
8+
pass
9+
510

611
class Algorithms(object):
712
goal_time = 0.2
@@ -103,13 +108,13 @@ def setup(self):
103108
self.df.iloc[10:20] = np.nan
104109

105110
def time_frame(self):
106-
self.df.hash()
111+
hash_pandas_object(self.df)
107112

108113
def time_series_int(self):
109-
self.df.E.hash()
114+
hash_pandas_object(self.df.E)
110115

111116
def time_series_string(self):
112-
self.df.B.hash()
117+
hash_pandas_object(self.df.B)
113118

114119
def time_series_categorical(self):
115-
self.df.C.hash()
120+
hash_pandas_object(self.df.C)

asv_bench/benchmarks/frame_ctor.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@ def setup(self):
2020
self.data = self.frame.to_dict()
2121
except:
2222
self.data = self.frame.toDict()
23-
self.some_dict = self.data.values()[0]
23+
self.some_dict = list(self.data.values())[0]
2424
self.dict_list = [dict(zip(self.columns, row)) for row in self.frame.values]
2525

2626
self.data2 = dict(
2727
((i, dict(((j, float(j)) for j in range(100)))) for i in
28-
xrange(2000)))
28+
range(2000)))
2929

3030
def time_frame_ctor_list_of_dict(self):
3131
DataFrame(self.dict_list)

asv_bench/benchmarks/frame_methods.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def time_reindex_both_axes_ix(self):
5656
self.df.ix[(self.idx, self.idx)]
5757

5858
def time_reindex_upcast(self):
59-
self.df2.reindex(permutation(range(1200)))
59+
self.df2.reindex(np.random.permutation(range(1200)))
6060

6161

6262
#----------------------------------------------------------------------
@@ -583,7 +583,7 @@ class frame_assign_timeseries_index(object):
583583
goal_time = 0.2
584584

585585
def setup(self):
586-
self.idx = date_range('1/1/2000', periods=100000, freq='D')
586+
self.idx = date_range('1/1/2000', periods=100000, freq='H')
587587
self.df = DataFrame(randn(100000, 1), columns=['A'], index=self.idx)
588588

589589
def time_frame_assign_timeseries_index(self):

asv_bench/benchmarks/gil.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
from .pandas_vb_common import *
2-
from pandas.core import common as com
2+
3+
from pandas.core.algorithms import take_1d
34

45
try:
56
from cStringIO import StringIO
67
except ImportError:
78
from io import StringIO
89

10+
try:
11+
from pandas._libs import algos
12+
except ImportError:
13+
from pandas import algos
14+
915
try:
1016
from pandas.util.testing import test_parallel
1117

@@ -167,11 +173,11 @@ def time_nogil_take1d_float64(self):
167173

168174
@test_parallel(num_threads=2)
169175
def take_1d_pg2_int64(self):
170-
com.take_1d(self.df.int64.values, self.indexer)
176+
take_1d(self.df.int64.values, self.indexer)
171177

172178
@test_parallel(num_threads=2)
173179
def take_1d_pg2_float64(self):
174-
com.take_1d(self.df.float64.values, self.indexer)
180+
take_1d(self.df.float64.values, self.indexer)
175181

176182

177183
class nogil_take1d_int64(object):
@@ -193,11 +199,11 @@ def time_nogil_take1d_int64(self):
193199

194200
@test_parallel(num_threads=2)
195201
def take_1d_pg2_int64(self):
196-
com.take_1d(self.df.int64.values, self.indexer)
202+
take_1d(self.df.int64.values, self.indexer)
197203

198204
@test_parallel(num_threads=2)
199205
def take_1d_pg2_float64(self):
200-
com.take_1d(self.df.float64.values, self.indexer)
206+
take_1d(self.df.float64.values, self.indexer)
201207

202208

203209
class nogil_kth_smallest(object):
@@ -226,7 +232,7 @@ class nogil_datetime_fields(object):
226232

227233
def setup(self):
228234
self.N = 100000000
229-
self.dti = pd.date_range('1900-01-01', periods=self.N, freq='D')
235+
self.dti = pd.date_range('1900-01-01', periods=self.N, freq='T')
230236
self.period = self.dti.to_period('D')
231237
if (not have_real_test_parallel):
232238
raise NotImplementedError

asv_bench/benchmarks/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ def setup(self):
331331

332332
def get_test_data(self, ngroups=100, n=100000):
333333
self.unique_groups = range(self.ngroups)
334-
self.arr = np.asarray(np.tile(self.unique_groups, (n / self.ngroups)), dtype=object)
334+
self.arr = np.asarray(np.tile(self.unique_groups, int(n / self.ngroups)), dtype=object)
335335
if (len(self.arr) < n):
336336
self.arr = np.asarray((list(self.arr) + self.unique_groups[:(n - len(self.arr))]), dtype=object)
337337
random.shuffle(self.arr)

asv_bench/benchmarks/hdfstore_bench.py

+23-25
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,12 @@ def setup(self):
3131
self.remove(self.f)
3232

3333
self.store = HDFStore(self.f)
34-
self.store.put('df1', self.df)
35-
self.store.put('df_mixed', self.df_mixed)
36-
37-
self.store.append('df5', self.df_mixed)
38-
self.store.append('df7', self.df)
39-
40-
self.store.append('df9', self.df_wide)
41-
42-
self.store.append('df11', self.df_wide2)
43-
self.store.append('df12', self.df2)
34+
self.store.put('fixed', self.df)
35+
self.store.put('fixed_mixed', self.df_mixed)
36+
self.store.append('table', self.df2)
37+
self.store.append('table_mixed', self.df_mixed)
38+
self.store.append('table_wide', self.df_wide)
39+
self.store.append('table_wide2', self.df_wide2)
4440

4541
def teardown(self):
4642
self.store.close()
@@ -52,45 +48,47 @@ def remove(self, f):
5248
pass
5349

5450
def time_read_store(self):
55-
self.store.get('df1')
51+
self.store.get('fixed')
5652

5753
def time_read_store_mixed(self):
58-
self.store.get('df_mixed')
54+
self.store.get('fixed_mixed')
5955

6056
def time_write_store(self):
61-
self.store.put('df2', self.df)
57+
self.store.put('fixed_write', self.df)
6258

6359
def time_write_store_mixed(self):
64-
self.store.put('df_mixed2', self.df_mixed)
60+
self.store.put('fixed_mixed_write', self.df_mixed)
6561

6662
def time_read_store_table_mixed(self):
67-
self.store.select('df5')
63+
self.store.select('table_mixed')
6864

6965
def time_write_store_table_mixed(self):
70-
self.store.append('df6', self.df_mixed)
66+
self.store.append('table_mixed_write', self.df_mixed)
7167

7268
def time_read_store_table(self):
73-
self.store.select('df7')
69+
self.store.select('table')
7470

7571
def time_write_store_table(self):
76-
self.store.append('df8', self.df)
72+
self.store.append('table_write', self.df)
7773

7874
def time_read_store_table_wide(self):
79-
self.store.select('df9')
75+
self.store.select('table_wide')
8076

8177
def time_write_store_table_wide(self):
82-
self.store.append('df10', self.df_wide)
78+
self.store.append('table_wide_write', self.df_wide)
8379

8480
def time_write_store_table_dc(self):
85-
self.store.append('df15', self.df, data_columns=True)
81+
self.store.append('table_dc_write', self.df_dc, data_columns=True)
8682

8783
def time_query_store_table_wide(self):
88-
self.store.select('df11', [('index', '>', self.df_wide2.index[10000]),
89-
('index', '<', self.df_wide2.index[15000])])
84+
start = self.df_wide2.index[10000]
85+
stop = self.df_wide2.index[15000]
86+
self.store.select('table_wide', where="index > start and index < stop")
9087

9188
def time_query_store_table(self):
92-
self.store.select('df12', [('index', '>', self.df2.index[10000]),
93-
('index', '<', self.df2.index[15000])])
89+
start = self.df2.index[10000]
90+
stop = self.df2.index[15000]
91+
self.store.select('table', where="index > start and index < stop")
9492

9593

9694
class HDF5Panel(object):

asv_bench/benchmarks/inference.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -113,5 +113,5 @@ def setup(self):
113113
self.na_values = set()
114114

115115
def time_convert(self):
116-
pd.lib.maybe_convert_numeric(self.data, self.na_values,
117-
coerce_numeric=False)
116+
lib.maybe_convert_numeric(self.data, self.na_values,
117+
coerce_numeric=False)

asv_bench/benchmarks/join_merge.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -314,12 +314,12 @@ def setup(self):
314314

315315
self.df1 = pd.DataFrame(
316316
{'time': np.random.randint(0, one_count / 20, one_count),
317-
'key': np.random.choice(list(string.uppercase), one_count),
317+
'key': np.random.choice(list(string.ascii_uppercase), one_count),
318318
'key2': np.random.randint(0, 25, one_count),
319319
'value1': np.random.randn(one_count)})
320320
self.df2 = pd.DataFrame(
321321
{'time': np.random.randint(0, two_count / 20, two_count),
322-
'key': np.random.choice(list(string.uppercase), two_count),
322+
'key': np.random.choice(list(string.ascii_uppercase), two_count),
323323
'key2': np.random.randint(0, 25, two_count),
324324
'value2': np.random.randn(two_count)})
325325

asv_bench/benchmarks/packers.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -153,18 +153,20 @@ def time_packers_read_stata_with_validation(self):
153153
class packers_read_sas(_Packers):
154154

155155
def setup(self):
156-
self.f = os.path.join(os.path.dirname(__file__), '..', '..',
157-
'pandas', 'io', 'tests', 'sas', 'data',
158-
'test1.sas7bdat')
159-
self.f2 = os.path.join(os.path.dirname(__file__), '..', '..',
160-
'pandas', 'io', 'tests', 'sas', 'data',
161-
'paxraw_d_short.xpt')
156+
157+
testdir = os.path.join(os.path.dirname(__file__), '..', '..',
158+
'pandas', 'tests', 'io', 'sas')
159+
if not os.path.exists(testdir):
160+
testdir = os.path.join(os.path.dirname(__file__), '..', '..',
161+
'pandas', 'io', 'tests', 'sas')
162+
self.f = os.path.join(testdir, 'data', 'test1.sas7bdat')
163+
self.f2 = os.path.join(testdir, 'data', 'paxraw_d_short.xpt')
162164

163165
def time_read_sas7bdat(self):
164166
pd.read_sas(self.f, format='sas7bdat')
165167

166168
def time_read_xport(self):
167-
pd.read_sas(self.f, format='xport')
169+
pd.read_sas(self.f2, format='xport')
168170

169171

170172
class CSV(_Packers):

asv_bench/benchmarks/pandas_vb_common.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
from pandas import *
22
import pandas as pd
3-
from datetime import timedelta
43
from numpy.random import randn
54
from numpy.random import randint
6-
from numpy.random import permutation
75
import pandas.util.testing as tm
86
import random
97
import numpy as np
@@ -18,7 +16,7 @@
1816
np.random.seed(1234)
1917

2018
# try em until it works!
21-
for imp in ['pandas_tseries', 'pandas.lib', 'pandas._libs.lib']:
19+
for imp in ['pandas._libs.lib', 'pandas.lib', 'pandas_tseries']:
2220
try:
2321
lib = import_module(imp)
2422
break

asv_bench/benchmarks/panel_ctor.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from .pandas_vb_common import *
2+
from datetime import timedelta
23

34

45
class Constructors1(object):
@@ -24,7 +25,7 @@ class Constructors2(object):
2425
def setup(self):
2526
self.data_frames = {}
2627
for x in range(100):
27-
self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq=datetools.Day(1)))
28+
self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq='D'))
2829
self.df = DataFrame({'a': ([0] * len(self.dr)), 'b': ([1] * len(self.dr)), 'c': ([2] * len(self.dr)), }, index=self.dr)
2930
self.data_frames[x] = self.df
3031

@@ -36,7 +37,7 @@ class Constructors3(object):
3637
goal_time = 0.2
3738

3839
def setup(self):
39-
self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq=datetools.Day(1)))
40+
self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq='D'))
4041
self.data_frames = {}
4142
for x in range(100):
4243
self.df = DataFrame({'a': ([0] * len(self.dr)), 'b': ([1] * len(self.dr)), 'c': ([2] * len(self.dr)), }, index=self.dr)

asv_bench/benchmarks/replace.py

-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
from .pandas_vb_common import *
2-
from pandas.compat import range
3-
from datetime import timedelta
42

53

64
class replace_fillna(object):

asv_bench/benchmarks/timeseries.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from pandas.tseries.converter import DatetimeConverter
55
from .pandas_vb_common import *
66
import pandas as pd
7-
from datetime import timedelta
87
import datetime as dt
98
try:
109
import pandas.tseries.holiday
@@ -57,7 +56,7 @@ def setup(self):
5756
self.a = self.rng7[:50000].append(self.rng7[50002:])
5857

5958
def time_add_timedelta(self):
60-
(self.rng + timedelta(minutes=2))
59+
(self.rng + dt.timedelta(minutes=2))
6160

6261
def time_add_offset_delta(self):
6362
(self.rng + self.delta_offset)

0 commit comments

Comments
 (0)