1
- # pylint: disable=E1101
2
-
3
1
from datetime import datetime
4
2
from operator import methodcaller
5
3
6
4
import numpy as np
7
5
import pytest
8
6
9
- from pandas .compat import zip
10
-
11
7
import pandas as pd
12
8
from pandas import DataFrame , Panel , Series
13
9
from pandas .core .indexes .datetimes import date_range
@@ -104,20 +100,21 @@ def f(x):
104
100
tm .assert_panel_equal (result , binagg )
105
101
106
102
107
- def test_fails_on_no_datetime_index ():
108
- index_names = ('Int64Index' , 'Index' , 'Float64Index' , 'MultiIndex' )
109
- index_funcs = (tm .makeIntIndex ,
110
- tm .makeUnicodeIndex , tm .makeFloatIndex ,
111
- lambda m : tm .makeCustomIndex (m , 2 ))
103
+ @pytest .mark .parametrize ('name, func' , [
104
+ ('Int64Index' , tm .makeIntIndex ),
105
+ ('Index' , tm .makeUnicodeIndex ),
106
+ ('Float64Index' , tm .makeFloatIndex ),
107
+ ('MultiIndex' , lambda m : tm .makeCustomIndex (m , 2 ))
108
+ ])
109
+ def test_fails_on_no_datetime_index (name , func ):
112
110
n = 2
113
- for name , func in zip (index_names , index_funcs ):
114
- index = func (n )
115
- df = DataFrame ({'a' : np .random .randn (n )}, index = index )
111
+ index = func (n )
112
+ df = DataFrame ({'a' : np .random .randn (n )}, index = index )
116
113
117
- msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
118
- "or PeriodIndex, but got an instance of %r" % name )
119
- with pytest .raises (TypeError , match = msg ):
120
- df .groupby (TimeGrouper ('D' ))
114
+ msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
115
+ "or PeriodIndex, but got an instance of %r" % name )
116
+ with pytest .raises (TypeError , match = msg ):
117
+ df .groupby (TimeGrouper ('D' ))
121
118
122
119
123
120
def test_aaa_group_order ():
@@ -143,11 +140,13 @@ def test_aaa_group_order():
143
140
df [4 ::5 ])
144
141
145
142
146
- def test_aggregate_normal ():
147
- # check TimeGrouper's aggregation is identical as normal groupby
143
+ def test_aggregate_normal (resample_method ):
144
+ """Check TimeGrouper's aggregation is identical as normal groupby."""
148
145
149
- n = 20
150
- data = np .random .randn (n , 4 )
146
+ if resample_method == 'ohlc' :
147
+ pytest .xfail (reason = 'DataError: No numeric types to aggregate' )
148
+
149
+ data = np .random .randn (20 , 4 )
151
150
normal_df = DataFrame (data , columns = ['A' , 'B' , 'C' , 'D' ])
152
151
normal_df ['key' ] = [1 , 2 , 3 , 4 , 5 ] * 4
153
152
@@ -159,35 +158,11 @@ def test_aggregate_normal():
159
158
normal_grouped = normal_df .groupby ('key' )
160
159
dt_grouped = dt_df .groupby (TimeGrouper (key = 'key' , freq = 'D' ))
161
160
162
- for func in ['min' , 'max' , 'prod' , 'var' , 'std' , 'mean' ]:
163
- expected = getattr (normal_grouped , func )()
164
- dt_result = getattr (dt_grouped , func )()
165
- expected .index = date_range (start = '2013-01-01' , freq = 'D' ,
166
- periods = 5 , name = 'key' )
167
- assert_frame_equal (expected , dt_result )
168
-
169
- for func in ['count' , 'sum' ]:
170
- expected = getattr (normal_grouped , func )()
171
- expected .index = date_range (start = '2013-01-01' , freq = 'D' ,
172
- periods = 5 , name = 'key' )
173
- dt_result = getattr (dt_grouped , func )()
174
- assert_frame_equal (expected , dt_result )
175
-
176
- # GH 7453
177
- for func in ['size' ]:
178
- expected = getattr (normal_grouped , func )()
179
- expected .index = date_range (start = '2013-01-01' , freq = 'D' ,
180
- periods = 5 , name = 'key' )
181
- dt_result = getattr (dt_grouped , func )()
182
- assert_series_equal (expected , dt_result )
183
-
184
- # GH 7453
185
- for func in ['first' , 'last' ]:
186
- expected = getattr (normal_grouped , func )()
187
- expected .index = date_range (start = '2013-01-01' , freq = 'D' ,
188
- periods = 5 , name = 'key' )
189
- dt_result = getattr (dt_grouped , func )()
190
- assert_frame_equal (expected , dt_result )
161
+ expected = getattr (normal_grouped , resample_method )()
162
+ dt_result = getattr (dt_grouped , resample_method )()
163
+ expected .index = date_range (start = '2013-01-01' , freq = 'D' ,
164
+ periods = 5 , name = 'key' )
165
+ tm .assert_equal (expected , dt_result )
191
166
192
167
# if TimeGrouper is used included, 'nth' doesn't work yet
193
168
@@ -201,34 +176,23 @@ def test_aggregate_normal():
201
176
"""
202
177
203
178
204
- @pytest .mark .parametrize ('method, unit' , [
205
- ('sum' , 0 ),
206
- ('prod' , 1 ),
179
+ @pytest .mark .parametrize ('method, method_args, unit' , [
180
+ ('sum' , dict (), 0 ),
181
+ ('sum' , dict (min_count = 0 ), 0 ),
182
+ ('sum' , dict (min_count = 1 ), np .nan ),
183
+ ('prod' , dict (), 1 ),
184
+ ('prod' , dict (min_count = 0 ), 1 ),
185
+ ('prod' , dict (min_count = 1 ), np .nan )
207
186
])
208
- def test_resample_entirly_nat_window (method , unit ):
187
+ def test_resample_entirly_nat_window (method , method_args , unit ):
209
188
s = pd .Series ([0 ] * 2 + [np .nan ] * 2 ,
210
189
index = pd .date_range ('2017' , periods = 4 ))
211
- # 0 / 1 by default
212
- result = methodcaller (method )(s .resample ("2d" ))
213
- expected = pd .Series ([0.0 , unit ],
214
- index = pd .to_datetime (['2017-01-01' ,
215
- '2017-01-03' ]))
216
- tm .assert_series_equal (result , expected )
217
-
218
- # min_count=0
219
- result = methodcaller (method , min_count = 0 )(s .resample ("2d" ))
190
+ result = methodcaller (method , ** method_args )(s .resample ("2d" ))
220
191
expected = pd .Series ([0.0 , unit ],
221
192
index = pd .to_datetime (['2017-01-01' ,
222
193
'2017-01-03' ]))
223
194
tm .assert_series_equal (result , expected )
224
195
225
- # min_count=1
226
- result = methodcaller (method , min_count = 1 )(s .resample ("2d" ))
227
- expected = pd .Series ([0.0 , np .nan ],
228
- index = pd .to_datetime (['2017-01-01' ,
229
- '2017-01-03' ]))
230
- tm .assert_series_equal (result , expected )
231
-
232
196
233
197
@pytest .mark .parametrize ('func, fill_value' , [
234
198
('min' , np .nan ),
@@ -302,33 +266,22 @@ def test_repr():
302
266
assert result == expected
303
267
304
268
305
- @pytest .mark .parametrize ('method, unit' , [
306
- ('sum' , 0 ),
307
- ('prod' , 1 ),
269
+ @pytest .mark .parametrize ('method, method_args, expected_values' , [
270
+ ('sum' , dict (), [1 , 0 , 1 ]),
271
+ ('sum' , dict (min_count = 0 ), [1 , 0 , 1 ]),
272
+ ('sum' , dict (min_count = 1 ), [1 , np .nan , 1 ]),
273
+ ('sum' , dict (min_count = 2 ), [np .nan , np .nan , np .nan ]),
274
+ ('prod' , dict (), [1 , 1 , 1 ]),
275
+ ('prod' , dict (min_count = 0 ), [1 , 1 , 1 ]),
276
+ ('prod' , dict (min_count = 1 ), [1 , np .nan , 1 ]),
277
+ ('prod' , dict (min_count = 2 ), [np .nan , np .nan , np .nan ]),
308
278
])
309
- def test_upsample_sum (method , unit ):
279
+ def test_upsample_sum (method , method_args , expected_values ):
310
280
s = pd .Series (1 , index = pd .date_range ("2017" , periods = 2 , freq = "H" ))
311
281
resampled = s .resample ("30T" )
312
282
index = pd .to_datetime (['2017-01-01T00:00:00' ,
313
283
'2017-01-01T00:30:00' ,
314
284
'2017-01-01T01:00:00' ])
315
-
316
- # 0 / 1 by default
317
- result = methodcaller (method )(resampled )
318
- expected = pd .Series ([1 , unit , 1 ], index = index )
319
- tm .assert_series_equal (result , expected )
320
-
321
- # min_count=0
322
- result = methodcaller (method , min_count = 0 )(resampled )
323
- expected = pd .Series ([1 , unit , 1 ], index = index )
324
- tm .assert_series_equal (result , expected )
325
-
326
- # min_count=1
327
- result = methodcaller (method , min_count = 1 )(resampled )
328
- expected = pd .Series ([1 , np .nan , 1 ], index = index )
329
- tm .assert_series_equal (result , expected )
330
-
331
- # min_count>1
332
- result = methodcaller (method , min_count = 2 )(resampled )
333
- expected = pd .Series ([np .nan , np .nan , np .nan ], index = index )
285
+ result = methodcaller (method , ** method_args )(resampled )
286
+ expected = pd .Series (expected_values , index = index )
334
287
tm .assert_series_equal (result , expected )
0 commit comments