Skip to content

Commit bdf7525

Browse files
committed
more tests & change observed=None
1 parent bdb7ad3 commit bdf7525

File tree

3 files changed

+34
-6
lines changed

3 files changed

+34
-6
lines changed

pandas/conftest.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,14 @@ def ip():
6666
return InteractiveShell()
6767

6868

69-
@pytest.fixture(params=[True, False])
69+
@pytest.fixture(params=[True, False, None])
7070
def observed(request):
7171
""" pass in the observed keyword to groupby for [True, False]
7272
This indicates whether categoricals should return values for
73-
values which are not in the grouper [False], or only values which
74-
appear in the grouper [True] """
73+
values which are not in the grouper [False / None], or only values which
74+
appear in the grouper [True]. [None] is supported for future compatiblity
75+
if we decide to change the default (and would need to warn if this
76+
parameter is not passed)"""
7577
return request.param
7678

7779

pandas/core/groupby/groupby.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,7 @@ class _GroupBy(PandasObject, SelectionMixin):
557557
def __init__(self, obj, keys=None, axis=0, level=None,
558558
grouper=None, exclusions=None, selection=None, as_index=True,
559559
sort=True, group_keys=True, squeeze=False,
560-
observed=False, **kwargs):
560+
observed=None, **kwargs):
561561

562562
self._selection = selection
563563

@@ -2907,7 +2907,7 @@ class Grouping(object):
29072907
"""
29082908

29092909
def __init__(self, index, grouper=None, obj=None, name=None, level=None,
2910-
sort=True, observed=False, in_axis=False):
2910+
sort=True, observed=None, in_axis=False):
29112911

29122912
self.name = name
29132913
self.level = level
@@ -2964,6 +2964,12 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
29642964
# a passed Categorical
29652965
elif is_categorical_dtype(self.grouper):
29662966

2967+
# observed can be True/False/None
2968+
# we treat None as False. If in the future
2969+
# we need to warn if observed is not passed
2970+
# then we have this option
2971+
# gh-20583
2972+
29672973
self.all_grouper = self.grouper
29682974
self.grouper = self.grouper._codes_for_groupby(
29692975
self.sort, observed)
@@ -3082,7 +3088,7 @@ def groups(self):
30823088

30833089

30843090
def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
3085-
observed=False, mutated=False, validate=True):
3091+
observed=None, mutated=False, validate=True):
30863092
"""
30873093
create and return a BaseGrouper, which is an internal
30883094
mapping of how to create the grouper indexers.

pandas/tests/groupby/test_categorical.py

+20
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,26 @@ def test_observed_perf():
395395
assert result.index.levels[2].nunique() == df.other_id.nunique()
396396

397397

398+
def test_observed_groups(observed):
399+
# gh-20583
400+
# test that we have the appropriate groups
401+
402+
cat = pd.Categorical(['a', 'c', 'a'], categories=['a', 'b', 'c'])
403+
df = pd.DataFrame({'cat': cat, 'vals': [1, 2, 3]})
404+
g = df.groupby('cat', observed=observed)
405+
406+
result = g.groups
407+
if observed:
408+
expected = {'a': Index([0, 2], dtype='int64'),
409+
'c': Index([1], dtype='int64')}
410+
else:
411+
expected = {'a': Index([0, 2], dtype='int64'),
412+
'b': Index([], dtype='int64'),
413+
'c': Index([1], dtype='int64')}
414+
415+
tm.assert_dict_equal(result, expected)
416+
417+
398418
def test_datetime():
399419
# GH9049: ensure backward compatibility
400420
levels = pd.date_range('2014-01-01', periods=4)

0 commit comments

Comments
 (0)