Skip to content

Commit fe696e4

Browse files
jbrockmendeljreback
authored andcommitted
standardize signature for Index reductions, implement nanmean for datetime64 dtypes (#24293)
1 parent 1d760af commit fe696e4

File tree

10 files changed

+176
-46
lines changed

10 files changed

+176
-46
lines changed

pandas/core/base.py

+37-9
Original file line numberDiff line numberDiff line change
@@ -973,10 +973,16 @@ def _ndarray_values(self):
973973
def empty(self):
974974
return not self.size
975975

976-
def max(self):
976+
def max(self, axis=None, skipna=True):
977977
"""
978978
Return the maximum value of the Index.
979979
980+
Parameters
981+
----------
982+
axis : int, optional
983+
For compatibility with NumPy. Only 0 or None are allowed.
984+
skipna : bool, default True
985+
980986
Returns
981987
-------
982988
scalar
@@ -1004,22 +1010,36 @@ def max(self):
10041010
>>> idx.max()
10051011
('b', 2)
10061012
"""
1007-
return nanops.nanmax(self.values)
1013+
nv.validate_minmax_axis(axis)
1014+
return nanops.nanmax(self._values, skipna=skipna)
10081015

1009-
def argmax(self, axis=None):
1016+
def argmax(self, axis=None, skipna=True):
10101017
"""
10111018
Return a ndarray of the maximum argument indexer.
10121019
1020+
Parameters
1021+
----------
1022+
axis : {None}
1023+
Dummy argument for consistency with Series
1024+
skipna : bool, default True
1025+
10131026
See Also
10141027
--------
10151028
numpy.ndarray.argmax
10161029
"""
1017-
return nanops.nanargmax(self.values)
1030+
nv.validate_minmax_axis(axis)
1031+
return nanops.nanargmax(self._values, skipna=skipna)
10181032

1019-
def min(self):
1033+
def min(self, axis=None, skipna=True):
10201034
"""
10211035
Return the minimum value of the Index.
10221036
1037+
Parameters
1038+
----------
1039+
axis : {None}
1040+
Dummy argument for consistency with Series
1041+
skipna : bool, default True
1042+
10231043
Returns
10241044
-------
10251045
scalar
@@ -1047,17 +1067,25 @@ def min(self):
10471067
>>> idx.min()
10481068
('a', 1)
10491069
"""
1050-
return nanops.nanmin(self.values)
1070+
nv.validate_minmax_axis(axis)
1071+
return nanops.nanmin(self._values, skipna=skipna)
10511072

1052-
def argmin(self, axis=None):
1073+
def argmin(self, axis=None, skipna=True):
10531074
"""
10541075
Return a ndarray of the minimum argument indexer.
10551076
1077+
Parameters
1078+
----------
1079+
axis : {None}
1080+
Dummy argument for consistency with Series
1081+
skipna : bool, default True
1082+
10561083
See Also
10571084
--------
10581085
numpy.ndarray.argmin
10591086
"""
1060-
return nanops.nanargmin(self.values)
1087+
nv.validate_minmax_axis(axis)
1088+
return nanops.nanargmin(self._values, skipna=skipna)
10611089

10621090
def tolist(self):
10631091
"""
@@ -1110,7 +1138,7 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
11101138
if func is None:
11111139
raise TypeError("{klass} cannot perform the operation {op}".format(
11121140
klass=self.__class__.__name__, op=name))
1113-
return func(**kwds)
1141+
return func(skipna=skipna, **kwds)
11141142

11151143
def _map_values(self, mapper, na_action=None):
11161144
"""

pandas/core/dtypes/missing.py

+2
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,8 @@ def _isna_ndarraylike(obj):
198198
else:
199199
values = obj
200200
result = values.isna()
201+
elif isinstance(obj, ABCDatetimeArray):
202+
return obj.isna()
201203
elif is_string_dtype(dtype):
202204
# Working around NumPy ticket 1542
203205
shape = values.shape

pandas/core/indexes/datetimelike.py

+24-12
Original file line numberDiff line numberDiff line change
@@ -267,35 +267,41 @@ def tolist(self):
267267
"""
268268
return list(self.astype(object))
269269

270-
def min(self, axis=None, *args, **kwargs):
270+
def min(self, axis=None, skipna=True, *args, **kwargs):
271271
"""
272272
Return the minimum value of the Index or minimum along
273273
an axis.
274274
275275
See Also
276276
--------
277277
numpy.ndarray.min
278+
Series.min : Return the minimum value in a Series.
278279
"""
279280
nv.validate_min(args, kwargs)
280281
nv.validate_minmax_axis(axis)
281282

282-
try:
283-
i8 = self.asi8
283+
if not len(self):
284+
return self._na_value
284285

286+
i8 = self.asi8
287+
try:
285288
# quick check
286289
if len(i8) and self.is_monotonic:
287290
if i8[0] != iNaT:
288291
return self._box_func(i8[0])
289292

290293
if self.hasnans:
291-
min_stamp = self[~self._isnan].asi8.min()
294+
if skipna:
295+
min_stamp = self[~self._isnan].asi8.min()
296+
else:
297+
return self._na_value
292298
else:
293299
min_stamp = i8.min()
294300
return self._box_func(min_stamp)
295301
except ValueError:
296302
return self._na_value
297303

298-
def argmin(self, axis=None, *args, **kwargs):
304+
def argmin(self, axis=None, skipna=True, *args, **kwargs):
299305
"""
300306
Returns the indices of the minimum values along an axis.
301307
@@ -312,41 +318,47 @@ def argmin(self, axis=None, *args, **kwargs):
312318
i8 = self.asi8
313319
if self.hasnans:
314320
mask = self._isnan
315-
if mask.all():
321+
if mask.all() or not skipna:
316322
return -1
317323
i8 = i8.copy()
318324
i8[mask] = np.iinfo('int64').max
319325
return i8.argmin()
320326

321-
def max(self, axis=None, *args, **kwargs):
327+
def max(self, axis=None, skipna=True, *args, **kwargs):
322328
"""
323329
Return the maximum value of the Index or maximum along
324330
an axis.
325331
326332
See Also
327333
--------
328334
numpy.ndarray.max
335+
Series.max : Return the maximum value in a Series.
329336
"""
330337
nv.validate_max(args, kwargs)
331338
nv.validate_minmax_axis(axis)
332339

333-
try:
334-
i8 = self.asi8
340+
if not len(self):
341+
return self._na_value
335342

343+
i8 = self.asi8
344+
try:
336345
# quick check
337346
if len(i8) and self.is_monotonic:
338347
if i8[-1] != iNaT:
339348
return self._box_func(i8[-1])
340349

341350
if self.hasnans:
342-
max_stamp = self[~self._isnan].asi8.max()
351+
if skipna:
352+
max_stamp = self[~self._isnan].asi8.max()
353+
else:
354+
return self._na_value
343355
else:
344356
max_stamp = i8.max()
345357
return self._box_func(max_stamp)
346358
except ValueError:
347359
return self._na_value
348360

349-
def argmax(self, axis=None, *args, **kwargs):
361+
def argmax(self, axis=None, skipna=True, *args, **kwargs):
350362
"""
351363
Returns the indices of the maximum values along an axis.
352364
@@ -363,7 +375,7 @@ def argmax(self, axis=None, *args, **kwargs):
363375
i8 = self.asi8
364376
if self.hasnans:
365377
mask = self._isnan
366-
if mask.all():
378+
if mask.all() or not skipna:
367379
return -1
368380
i8 = i8.copy()
369381
i8[mask] = 0

pandas/core/indexes/range.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -297,12 +297,14 @@ def _minmax(self, meth):
297297

298298
return self._start + self._step * no_steps
299299

300-
def min(self):
300+
def min(self, axis=None, skipna=True):
301301
"""The minimum value of the RangeIndex"""
302+
nv.validate_minmax_axis(axis)
302303
return self._minmax('min')
303304

304-
def max(self):
305+
def max(self, axis=None, skipna=True):
305306
"""The maximum value of the RangeIndex"""
307+
nv.validate_minmax_axis(axis)
306308
return self._minmax('max')
307309

308310
def argsort(self, *args, **kwargs):

pandas/core/nanops.py

+29-18
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66

77
import numpy as np
88

9-
from pandas._libs import lib, tslibs
9+
from pandas._libs import iNaT, lib, tslibs
1010
import pandas.compat as compat
1111

1212
from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask
1313
from pandas.core.dtypes.common import (
1414
_get_dtype, is_any_int_dtype, is_bool_dtype, is_complex, is_complex_dtype,
15-
is_datetime64_dtype, is_datetime_or_timedelta_dtype, is_float,
16-
is_float_dtype, is_integer, is_integer_dtype, is_numeric_dtype,
15+
is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype,
16+
is_float, is_float_dtype, is_integer, is_integer_dtype, is_numeric_dtype,
1717
is_object_dtype, is_scalar, is_timedelta64_dtype)
1818
from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna
1919

@@ -203,15 +203,28 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None,
203203
if necessary copy and mask using the specified fill_value
204204
copy = True will force the copy
205205
"""
206-
values = com.values_from_object(values)
206+
207+
if is_datetime64tz_dtype(values):
208+
# com.values_from_object returns M8[ns] dtype instead of tz-aware,
209+
# so this case must be handled separately from the rest
210+
dtype = values.dtype
211+
values = getattr(values, "_values", values)
212+
else:
213+
values = com.values_from_object(values)
214+
dtype = values.dtype
207215

208216
if mask is None:
209217
if isfinite:
210218
mask = _isfinite(values)
211219
else:
212220
mask = isna(values)
213221

214-
dtype = values.dtype
222+
if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values):
223+
# changing timedelta64/datetime64 to int64 needs to happen after
224+
# finding `mask` above
225+
values = getattr(values, "asi8", values)
226+
values = values.view(np.int64)
227+
215228
dtype_ok = _na_ok_dtype(dtype)
216229

217230
# get our fill value (in case we need to provide an alternative
@@ -232,8 +245,6 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None,
232245
elif copy:
233246
values = values.copy()
234247

235-
values = _view_if_needed(values)
236-
237248
# return a platform independent precision dtype
238249
dtype_max = dtype
239250
if is_integer_dtype(dtype) or is_bool_dtype(dtype):
@@ -259,21 +270,19 @@ def _na_ok_dtype(dtype):
259270
(np.integer, np.timedelta64, np.datetime64))
260271

261272

262-
def _view_if_needed(values):
263-
if is_datetime_or_timedelta_dtype(values):
264-
return values.view(np.int64)
265-
return values
266-
267-
268273
def _wrap_results(result, dtype, fill_value=None):
269274
""" wrap our results if needed """
270275

271-
if is_datetime64_dtype(dtype):
276+
if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
277+
if fill_value is None:
278+
# GH#24293
279+
fill_value = iNaT
272280
if not isinstance(result, np.ndarray):
281+
tz = getattr(dtype, 'tz', None)
273282
assert not isna(fill_value), "Expected non-null fill_value"
274283
if result == fill_value:
275284
result = np.nan
276-
result = tslibs.Timestamp(result)
285+
result = tslibs.Timestamp(result, tz=tz)
277286
else:
278287
result = result.view(dtype)
279288
elif is_timedelta64_dtype(dtype):
@@ -426,7 +435,6 @@ def nansum(values, axis=None, skipna=True, min_count=0, mask=None):
426435
return _wrap_results(the_sum, dtype)
427436

428437

429-
@disallow('M8')
430438
@bottleneck_switch()
431439
def nanmean(values, axis=None, skipna=True, mask=None):
432440
"""
@@ -457,7 +465,8 @@ def nanmean(values, axis=None, skipna=True, mask=None):
457465
values, skipna, 0, mask=mask)
458466
dtype_sum = dtype_max
459467
dtype_count = np.float64
460-
if is_integer_dtype(dtype) or is_timedelta64_dtype(dtype):
468+
if (is_integer_dtype(dtype) or is_timedelta64_dtype(dtype) or
469+
is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype)):
461470
dtype_sum = np.float64
462471
elif is_float_dtype(dtype):
463472
dtype_sum = dtype
@@ -466,7 +475,9 @@ def nanmean(values, axis=None, skipna=True, mask=None):
466475
the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))
467476

468477
if axis is not None and getattr(the_sum, 'ndim', False):
469-
the_mean = the_sum / count
478+
with np.errstate(all="ignore"):
479+
# suppress division by zero warnings
480+
the_mean = the_sum / count
470481
ct_mask = count == 0
471482
if ct_mask.any():
472483
the_mean[ct_mask] = np.nan

pandas/core/series.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@
1717

1818
from pandas.core.dtypes.common import (
1919
_is_unorderable_exception, ensure_platform_int, is_bool,
20-
is_categorical_dtype, is_datetime64tz_dtype, is_datetimelike, is_dict_like,
21-
is_extension_array_dtype, is_extension_type, is_hashable, is_integer,
22-
is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype)
20+
is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
21+
is_datetimelike, is_dict_like, is_extension_array_dtype, is_extension_type,
22+
is_hashable, is_integer, is_iterator, is_list_like, is_scalar,
23+
is_string_like, is_timedelta64_dtype)
2324
from pandas.core.dtypes.generic import (
2425
ABCDataFrame, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries)
2526
from pandas.core.dtypes.missing import (
@@ -3537,6 +3538,9 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
35373538
# dispatch to ExtensionArray interface
35383539
if isinstance(delegate, ExtensionArray):
35393540
return delegate._reduce(name, skipna=skipna, **kwds)
3541+
elif is_datetime64_dtype(delegate):
3542+
# use DatetimeIndex implementation to handle skipna correctly
3543+
delegate = DatetimeIndex(delegate)
35403544

35413545
# dispatch to numpy arrays
35423546
elif isinstance(delegate, np.ndarray):

pandas/tests/indexes/test_range.py

+8
Original file line numberDiff line numberDiff line change
@@ -895,10 +895,18 @@ def test_max_min(self, start, stop, step):
895895
result = idx.max()
896896
assert result == expected
897897

898+
# skipna should be irrelevant since RangeIndex should never have NAs
899+
result2 = idx.max(skipna=False)
900+
assert result2 == expected
901+
898902
expected = idx._int64index.min()
899903
result = idx.min()
900904
assert result == expected
901905

906+
# skipna should be irrelevant since RangeIndex should never have NAs
907+
result2 = idx.min(skipna=False)
908+
assert result2 == expected
909+
902910
# empty
903911
idx = RangeIndex(start, stop, -step)
904912
assert isna(idx.max())

0 commit comments

Comments
 (0)