Skip to content

Commit 0a00365

Browse files
peterpanmjjreback
authored andcommitted
FIX: add support for desc order when ranking infs with nans #19538 (#20091)
1 parent 77d5ea0 commit 0a00365

File tree

3 files changed

+42
-25
lines changed

3 files changed

+42
-25
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,7 @@ Numeric
10381038
- Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`)
10391039
- Bug in :class:`DataFrame` flex arithmetic (e.g. ``df.add(other, fill_value=foo)``) with a ``fill_value`` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`)
10401040
- Multiplication and division of numeric-dtyped :class:`Index` objects with timedelta-like scalars returns ``TimedeltaIndex`` instead of raising ``TypeError`` (:issue:`19333`)
1041+
- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``ascending='False'`` failed to return correct ranks for infinity if ``NaN`` were present (:issue:`19538`)
10411042
- Bug where ``NaN`` was returned instead of 0 by :func:`Series.pct_change` and :func:`DataFrame.pct_change` when ``fill_method`` is not ``None`` (:issue:`19873`)
10421043

10431044

pandas/_libs/algos_rank_helper.pxi.in

+3-3
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
135135

136136
sorted_data = values.take(_as)
137137
sorted_mask = mask.take(_as)
138-
_indices = order[1].take(_as).nonzero()[0]
138+
_indices = np.diff(sorted_mask).nonzero()[0]
139139
non_na_idx = _indices[0] if len(_indices) > 0 else -1
140140
argsorted = _as.astype('i8')
141141

@@ -153,7 +153,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
153153

154154
if (i == n - 1 or
155155
are_diff(util.get_value_at(sorted_data, i + 1), val) or
156-
i == non_na_idx - 1):
156+
i == non_na_idx):
157157
if tiebreak == TIEBREAK_AVERAGE:
158158
for j in range(i - dups + 1, i + 1):
159159
ranks[argsorted[j]] = sum_ranks / dups
@@ -190,7 +190,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
190190
count += 1.0
191191

192192
if (i == n - 1 or sorted_data[i + 1] != val or
193-
i == non_na_idx - 1):
193+
i == non_na_idx):
194194
if tiebreak == TIEBREAK_AVERAGE:
195195
for j in range(i - dups + 1, i + 1):
196196
ranks[argsorted[j]] = sum_ranks / dups

pandas/tests/series/test_rank.py

+38-22
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
from pandas.tests.series.common import TestData
1717
from pandas._libs.tslib import iNaT
1818
from pandas._libs.algos import Infinity, NegInfinity
19+
from itertools import chain
20+
import pandas.util._test_decorators as td
1921

2022

2123
class TestSeriesRank(TestData):
@@ -257,38 +259,52 @@ def _check(s, expected, method='average'):
257259
series = s if dtype is None else s.astype(dtype)
258260
_check(series, results[method], method=method)
259261

260-
def test_rank_tie_methods_on_infs_nans(self):
262+
@td.skip_if_no_scipy
263+
@pytest.mark.parametrize('ascending', [True, False])
264+
@pytest.mark.parametrize('method', ['average', 'min', 'max', 'first',
265+
'dense'])
266+
@pytest.mark.parametrize('na_option', ['top', 'bottom', 'keep'])
267+
def test_rank_tie_methods_on_infs_nans(self, method, na_option, ascending):
261268
dtypes = [('object', None, Infinity(), NegInfinity()),
262269
('float64', np.nan, np.inf, -np.inf)]
263270
chunk = 3
264271
disabled = set([('object', 'first')])
265272

266-
def _check(s, expected, method='average', na_option='keep'):
267-
result = s.rank(method=method, na_option=na_option)
273+
def _check(s, method, na_option, ascending):
274+
exp_ranks = {
275+
'average': ([2, 2, 2], [5, 5, 5], [8, 8, 8]),
276+
'min': ([1, 1, 1], [4, 4, 4], [7, 7, 7]),
277+
'max': ([3, 3, 3], [6, 6, 6], [9, 9, 9]),
278+
'first': ([1, 2, 3], [4, 5, 6], [7, 8, 9]),
279+
'dense': ([1, 1, 1], [2, 2, 2], [3, 3, 3])
280+
}
281+
ranks = exp_ranks[method]
282+
if na_option == 'top':
283+
order = [ranks[1], ranks[0], ranks[2]]
284+
elif na_option == 'bottom':
285+
order = [ranks[0], ranks[2], ranks[1]]
286+
else:
287+
order = [ranks[0], [np.nan] * chunk, ranks[1]]
288+
expected = order if ascending else order[::-1]
289+
expected = list(chain.from_iterable(expected))
290+
result = s.rank(method=method, na_option=na_option,
291+
ascending=ascending)
268292
tm.assert_series_equal(result, Series(expected, dtype='float64'))
269293

270-
exp_ranks = {
271-
'average': ([2, 2, 2], [5, 5, 5], [8, 8, 8]),
272-
'min': ([1, 1, 1], [4, 4, 4], [7, 7, 7]),
273-
'max': ([3, 3, 3], [6, 6, 6], [9, 9, 9]),
274-
'first': ([1, 2, 3], [4, 5, 6], [7, 8, 9]),
275-
'dense': ([1, 1, 1], [2, 2, 2], [3, 3, 3])
276-
}
277-
na_options = ('top', 'bottom', 'keep')
278294
for dtype, na_value, pos_inf, neg_inf in dtypes:
279295
in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk
280296
iseries = Series(in_arr, dtype=dtype)
281-
for method, na_opt in product(exp_ranks.keys(), na_options):
282-
ranks = exp_ranks[method]
283-
if (dtype, method) in disabled:
284-
continue
285-
if na_opt == 'top':
286-
order = ranks[1] + ranks[0] + ranks[2]
287-
elif na_opt == 'bottom':
288-
order = ranks[0] + ranks[2] + ranks[1]
289-
else:
290-
order = ranks[0] + [np.nan] * chunk + ranks[1]
291-
_check(iseries, order, method, na_opt)
297+
if (dtype, method) in disabled:
298+
continue
299+
_check(iseries, method, na_option, ascending)
300+
301+
def test_rank_desc_mix_nans_infs(self):
302+
# GH 19538
303+
# check descending ranking when mix nans and infs
304+
iseries = Series([1, np.nan, np.inf, -np.inf, 25])
305+
result = iseries.rank(ascending=False)
306+
exp = Series([3, np.nan, 1, 4, 2], dtype='float64')
307+
tm.assert_series_equal(result, exp)
292308

293309
def test_rank_methods_series(self):
294310
pytest.importorskip('scipy.stats.special')

0 commit comments

Comments
 (0)