TST/CLN: parametrize tests\resample\test_time_grouper.py (#24013)

simonjayhawkins · jreback · commit 367efb280c69 · 2018-12-03T07:30:52.000-05:00
diff --git a/pandas/tests/resample/conftest.py b/pandas/tests/resample/conftest.py
@@ -0,0 +1,22 @@
+import pytest
+
+from pandas.tests.resample.test_base import (
+    downsample_methods, resample_methods, upsample_methods)
+
+
+@pytest.fixture(params=downsample_methods)
+def downsample_method(request):
+    """Fixture for parametrization of Grouper downsample methods."""
+    return request.param
+
+
+@pytest.fixture(params=upsample_methods)
+def upsample_method(request):
+    """Fixture for parametrization of Grouper upsample methods."""
+    return request.param
+
+
+@pytest.fixture(params=resample_methods)
+def resample_method(request):
+    """Fixture for parametrization of Grouper resample methods."""
+    return request.param
diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py
@@ -26,7 +26,7 @@
 
 # The various methods we support
 downsample_methods = ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem',
-                      'median', 'prod', 'var', 'ohlc', 'quantile']
+                      'median', 'prod', 'var', 'std', 'ohlc', 'quantile']
 upsample_methods = ['count', 'size']
 series_methods = ['nunique']
 resample_methods = downsample_methods + upsample_methods + series_methods
diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py
@@ -1,13 +1,9 @@
-# pylint: disable=E1101
-
 from datetime import datetime
 from operator import methodcaller
 
 import numpy as np
 import pytest
 
-from pandas.compat import zip
-
 import pandas as pd
 from pandas import DataFrame, Panel, Series
 from pandas.core.indexes.datetimes import date_range
@@ -104,20 +100,21 @@ def f(x):
     tm.assert_panel_equal(result, binagg)
 
 
-def test_fails_on_no_datetime_index():
-    index_names = ('Int64Index', 'Index', 'Float64Index', 'MultiIndex')
-    index_funcs = (tm.makeIntIndex,
-                   tm.makeUnicodeIndex, tm.makeFloatIndex,
-                   lambda m: tm.makeCustomIndex(m, 2))
+@pytest.mark.parametrize('name, func', [
+    ('Int64Index', tm.makeIntIndex),
+    ('Index', tm.makeUnicodeIndex),
+    ('Float64Index', tm.makeFloatIndex),
+    ('MultiIndex', lambda m: tm.makeCustomIndex(m, 2))
+])
+def test_fails_on_no_datetime_index(name, func):
     n = 2
-    for name, func in zip(index_names, index_funcs):
-        index = func(n)
-        df = DataFrame({'a': np.random.randn(n)}, index=index)
+    index = func(n)
+    df = DataFrame({'a': np.random.randn(n)}, index=index)
 
-        msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
-               "or PeriodIndex, but got an instance of %r" % name)
-        with pytest.raises(TypeError, match=msg):
-            df.groupby(TimeGrouper('D'))
+    msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
+           "or PeriodIndex, but got an instance of %r" % name)
+    with pytest.raises(TypeError, match=msg):
+        df.groupby(TimeGrouper('D'))
 
 
 def test_aaa_group_order():
@@ -143,11 +140,13 @@ def test_aaa_group_order():
                           df[4::5])
 
 
-def test_aggregate_normal():
-    # check TimeGrouper's aggregation is identical as normal groupby
+def test_aggregate_normal(resample_method):
+    """Check TimeGrouper's aggregation is identical as normal groupby."""
 
-    n = 20
-    data = np.random.randn(n, 4)
+    if resample_method == 'ohlc':
+        pytest.xfail(reason='DataError: No numeric types to aggregate')
+
+    data = np.random.randn(20, 4)
     normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
     normal_df['key'] = [1, 2, 3, 4, 5] * 4
 
@@ -159,35 +158,11 @@ def test_aggregate_normal():
     normal_grouped = normal_df.groupby('key')
     dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))
 
-    for func in ['min', 'max', 'prod', 'var', 'std', 'mean']:
-        expected = getattr(normal_grouped, func)()
-        dt_result = getattr(dt_grouped, func)()
-        expected.index = date_range(start='2013-01-01', freq='D',
-                                    periods=5, name='key')
-        assert_frame_equal(expected, dt_result)
-
-    for func in ['count', 'sum']:
-        expected = getattr(normal_grouped, func)()
-        expected.index = date_range(start='2013-01-01', freq='D',
-                                    periods=5, name='key')
-        dt_result = getattr(dt_grouped, func)()
-        assert_frame_equal(expected, dt_result)
-
-    # GH 7453
-    for func in ['size']:
-        expected = getattr(normal_grouped, func)()
-        expected.index = date_range(start='2013-01-01', freq='D',
-                                    periods=5, name='key')
-        dt_result = getattr(dt_grouped, func)()
-        assert_series_equal(expected, dt_result)
-
-    # GH 7453
-    for func in ['first', 'last']:
-        expected = getattr(normal_grouped, func)()
-        expected.index = date_range(start='2013-01-01', freq='D',
-                                    periods=5, name='key')
-        dt_result = getattr(dt_grouped, func)()
-        assert_frame_equal(expected, dt_result)
+    expected = getattr(normal_grouped, resample_method)()
+    dt_result = getattr(dt_grouped, resample_method)()
+    expected.index = date_range(start='2013-01-01', freq='D',
+                                periods=5, name='key')
+    tm.assert_equal(expected, dt_result)
 
     # if TimeGrouper is used included, 'nth' doesn't work yet
 
@@ -201,34 +176,23 @@ def test_aggregate_normal():
     """
 
 
-@pytest.mark.parametrize('method, unit', [
-    ('sum', 0),
-    ('prod', 1),
+@pytest.mark.parametrize('method, method_args, unit', [
+    ('sum', dict(), 0),
+    ('sum', dict(min_count=0), 0),
+    ('sum', dict(min_count=1), np.nan),
+    ('prod', dict(), 1),
+    ('prod', dict(min_count=0), 1),
+    ('prod', dict(min_count=1), np.nan)
 ])
-def test_resample_entirly_nat_window(method, unit):
+def test_resample_entirly_nat_window(method, method_args, unit):
     s = pd.Series([0] * 2 + [np.nan] * 2,
                   index=pd.date_range('2017', periods=4))
-    # 0 / 1 by default
-    result = methodcaller(method)(s.resample("2d"))
-    expected = pd.Series([0.0, unit],
-                         index=pd.to_datetime(['2017-01-01',
-                                               '2017-01-03']))
-    tm.assert_series_equal(result, expected)
-
-    # min_count=0
-    result = methodcaller(method, min_count=0)(s.resample("2d"))
+    result = methodcaller(method, **method_args)(s.resample("2d"))
     expected = pd.Series([0.0, unit],
                          index=pd.to_datetime(['2017-01-01',
                                                '2017-01-03']))
     tm.assert_series_equal(result, expected)
 
-    # min_count=1
-    result = methodcaller(method, min_count=1)(s.resample("2d"))
-    expected = pd.Series([0.0, np.nan],
-                         index=pd.to_datetime(['2017-01-01',
-                                               '2017-01-03']))
-    tm.assert_series_equal(result, expected)
-
 
 @pytest.mark.parametrize('func, fill_value', [
     ('min', np.nan),
@@ -302,33 +266,22 @@ def test_repr():
     assert result == expected
 
 
-@pytest.mark.parametrize('method, unit', [
-    ('sum', 0),
-    ('prod', 1),
+@pytest.mark.parametrize('method, method_args, expected_values', [
+    ('sum', dict(), [1, 0, 1]),
+    ('sum', dict(min_count=0), [1, 0, 1]),
+    ('sum', dict(min_count=1), [1, np.nan, 1]),
+    ('sum', dict(min_count=2), [np.nan, np.nan, np.nan]),
+    ('prod', dict(), [1, 1, 1]),
+    ('prod', dict(min_count=0), [1, 1, 1]),
+    ('prod', dict(min_count=1), [1, np.nan, 1]),
+    ('prod', dict(min_count=2), [np.nan, np.nan, np.nan]),
 ])
-def test_upsample_sum(method, unit):
+def test_upsample_sum(method, method_args, expected_values):
     s = pd.Series(1, index=pd.date_range("2017", periods=2, freq="H"))
     resampled = s.resample("30T")
     index = pd.to_datetime(['2017-01-01T00:00:00',
                             '2017-01-01T00:30:00',
                             '2017-01-01T01:00:00'])
-
-    # 0 / 1 by default
-    result = methodcaller(method)(resampled)
-    expected = pd.Series([1, unit, 1], index=index)
-    tm.assert_series_equal(result, expected)
-
-    # min_count=0
-    result = methodcaller(method, min_count=0)(resampled)
-    expected = pd.Series([1, unit, 1], index=index)
-    tm.assert_series_equal(result, expected)
-
-    # min_count=1
-    result = methodcaller(method, min_count=1)(resampled)
-    expected = pd.Series([1, np.nan, 1], index=index)
-    tm.assert_series_equal(result, expected)
-
-    # min_count>1
-    result = methodcaller(method, min_count=2)(resampled)
-    expected = pd.Series([np.nan, np.nan, np.nan], index=index)
+    result = methodcaller(method, **method_args)(resampled)
+    expected = pd.Series(expected_values, index=index)
     tm.assert_series_equal(result, expected)