From 23643305b2b42c3f76f6982c3b98960283fee594 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 20 Mar 2021 18:22:19 -0400
Subject: [PATCH 01/13] CLN: rank_1d followup

---
 pandas/_libs/algos.pyx | 86 +++++++++++++++++++++---------------------
 1 file changed, 42 insertions(+), 44 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 047eb848b7540..acbe114db82c6 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -951,8 +951,9 @@ def rank_1d(
         ndarray[float64_t, ndim=1] grp_sizes, out
         ndarray[rank_t, ndim=1] masked_vals
         ndarray[uint8_t, ndim=1] mask
-        bint keep_na, at_end, next_val_diff, check_labels
+        bint keep_na, at_end, next_val_diff, check_labels, set_as_na
         rank_t nan_fill_val
+        float computed_rank
 
     tiebreak = tiebreakers[ties_method]
     keep_na = na_option == 'keep'
@@ -1037,11 +1038,8 @@ def rank_1d(
             # the number of occurrence of current value) and assign the ranks
             # based on the starting index of the current group (grp_start)
             # and the current index
-            if not at_end:
-                next_val_diff = are_diff(masked_vals[lexsort_indexer[i]],
-                                         masked_vals[lexsort_indexer[i+1]])
-            else:
-                next_val_diff = True
+            next_val_diff = at_end or are_diff(masked_vals[lexsort_indexer[i]],
+                                    masked_vals[lexsort_indexer[i+1]])
 
             if (next_val_diff
                     or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])
@@ -1051,28 +1049,32 @@ def rank_1d(
             ):
                 # if keep_na, check for missing values and assign back
                 # to the result where appropriate
-                if keep_na and mask[lexsort_indexer[i]]:
-                    for j in range(i - dups + 1, i + 1):
-                        out[lexsort_indexer[j]] = NaN
-                        grp_na_count = dups
+                set_as_na = keep_na and mask[lexsort_indexer[i]]
+
+                # For all cases except TIEBREAK_FIRST when not setting
+                # nulls, we set the same value at each index
+                if set_as_na:
+                    computed_rank = NaN
+                    grp_na_count = dups
                 elif tiebreak == TIEBREAK_AVERAGE:
-                    for j in range(i - dups + 1, i + 1):
-                        out[lexsort_indexer[j]] = sum_ranks / <float64_t>dups
+                    computed_rank = sum_ranks / <float64_t>dups
                 elif tiebreak == TIEBREAK_MIN:
-                    for j in range(i - dups + 1, i + 1):
-                        out[lexsort_indexer[j]] = i - grp_start - dups + 2
+                    computed_rank = i - grp_start - dups + 2
                 elif tiebreak == TIEBREAK_MAX:
-                    for j in range(i - dups + 1, i + 1):
-                        out[lexsort_indexer[j]] = i - grp_start + 1
-                elif tiebreak == TIEBREAK_FIRST:
+                    computed_rank = i - grp_start + 1
+                elif tiebreak == TIEBREAK_DENSE:
+                    computed_rank = grp_vals_seen
+                else:
                     for j in range(i - dups + 1, i + 1):
                         if ascending:
                             out[lexsort_indexer[j]] = j + 1 - grp_start
                         else:
-                            out[lexsort_indexer[j]] = 2 * i - j - dups + 2 - grp_start
-                elif tiebreak == TIEBREAK_DENSE:
+                            out[lexsort_indexer[j]] = \
+                                (2 * i - j - dups + 2 - grp_start)
+
+                if set_as_na or tiebreak != TIEBREAK_FIRST:
                     for j in range(i - dups + 1, i + 1):
-                        out[lexsort_indexer[j]] = grp_vals_seen
+                        out[lexsort_indexer[j]] = computed_rank
 
                 # look forward to the next value (using the sorting in _as)
                 # if the value does not equal the current value then we need to
@@ -1083,7 +1085,6 @@ def rank_1d(
                                      ^ mask[lexsort_indexer[i+1]]):
                     dups = sum_ranks = 0
                     grp_vals_seen += 1
-                    grp_tie_count += 1
 
                 # Similar to the previous conditional, check now if we are
                 # moving to a new group. If so, keep track of the index where
@@ -1102,10 +1103,9 @@ def rank_1d(
                     else:
                         for j in range(grp_start, i + 1):
                             grp_sizes[lexsort_indexer[j]] = \
-                                (grp_tie_count - (grp_na_count > 0))
+                                (grp_vals_seen - 1 - (grp_na_count > 0))
                     dups = sum_ranks = 0
                     grp_na_count = 0
-                    grp_tie_count = 0
                     grp_start = i + 1
                     grp_vals_seen = 1
     else:
@@ -1124,11 +1124,8 @@ def rank_1d(
                 # the number of occurrence of current value) and assign the ranks
                 # based on the starting index of the current group (grp_start)
                 # and the current index
-                if not at_end:
-                    next_val_diff = (masked_vals[lexsort_indexer[i]]
-                                     != masked_vals[lexsort_indexer[i+1]])
-                else:
-                    next_val_diff = True
+                next_val_diff = at_end or (masked_vals[lexsort_indexer[i]]
+                                    != masked_vals[lexsort_indexer[i+1]])
 
                 if (next_val_diff
                         or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])
@@ -1138,29 +1135,32 @@ def rank_1d(
                 ):
                     # if keep_na, check for missing values and assign back
                     # to the result where appropriate
-                    if keep_na and mask[lexsort_indexer[i]]:
-                        for j in range(i - dups + 1, i + 1):
-                            out[lexsort_indexer[j]] = NaN
-                            grp_na_count = dups
+                    set_as_na = keep_na and mask[lexsort_indexer[i]]
+
+                    # For all cases except TIEBREAK_FIRST when not setting
+                    # nulls, we set the same value at each index
+                    if set_as_na:
+                        computed_rank = NaN
+                        grp_na_count = dups
                     elif tiebreak == TIEBREAK_AVERAGE:
-                        for j in range(i - dups + 1, i + 1):
-                            out[lexsort_indexer[j]] = sum_ranks / <float64_t>dups
+                        computed_rank = sum_ranks / <float64_t>dups
                     elif tiebreak == TIEBREAK_MIN:
-                        for j in range(i - dups + 1, i + 1):
-                            out[lexsort_indexer[j]] = i - grp_start - dups + 2
+                        computed_rank = i - grp_start - dups + 2
                     elif tiebreak == TIEBREAK_MAX:
-                        for j in range(i - dups + 1, i + 1):
-                            out[lexsort_indexer[j]] = i - grp_start + 1
-                    elif tiebreak == TIEBREAK_FIRST:
+                        computed_rank = i - grp_start + 1
+                    elif tiebreak == TIEBREAK_DENSE:
+                        computed_rank = grp_vals_seen
+                    else:
                         for j in range(i - dups + 1, i + 1):
                             if ascending:
                                 out[lexsort_indexer[j]] = j + 1 - grp_start
                             else:
                                 out[lexsort_indexer[j]] = \
                                     (2 * i - j - dups + 2 - grp_start)
-                    elif tiebreak == TIEBREAK_DENSE:
+
+                    if set_as_na or tiebreak != TIEBREAK_FIRST:
                         for j in range(i - dups + 1, i + 1):
-                            out[lexsort_indexer[j]] = grp_vals_seen
+                            out[lexsort_indexer[j]] = computed_rank
 
                     # look forward to the next value (using the sorting in
                     # lexsort_indexer) if the value does not equal the current
@@ -1171,7 +1171,6 @@ def rank_1d(
                                          ^ mask[lexsort_indexer[i+1]]):
                         dups = sum_ranks = 0
                         grp_vals_seen += 1
-                        grp_tie_count += 1
 
                     # Similar to the previous conditional, check now if we are
                     # moving to a new group. If so, keep track of the index where
@@ -1189,10 +1188,9 @@ def rank_1d(
                         else:
                             for j in range(grp_start, i + 1):
                                 grp_sizes[lexsort_indexer[j]] = \
-                                    (grp_tie_count - (grp_na_count > 0))
+                                    (grp_vals_seen - 1 - (grp_na_count > 0))
                         dups = sum_ranks = 0
                         grp_na_count = 0
-                        grp_tie_count = 0
                         grp_start = i + 1
                         grp_vals_seen = 1
 

From 999d8802bd0044ba37f589eb3b0ef7ea4b17916a Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 20 Mar 2021 19:00:35 -0400
Subject: [PATCH 02/13] WIP

---
 pandas/_libs/algos.pyx | 47 ++++++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index acbe114db82c6..caec06d498312 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -963,6 +963,7 @@ def rank_1d(
     assert <Py_ssize_t>len(labels) == N
     out = np.empty(N)
     grp_sizes = np.ones(N)
+
     # If all 0 labels, can short-circuit later label
     # comparisons
     check_labels = np.any(labels)
@@ -1026,6 +1027,7 @@ def rank_1d(
     if rank_t is object:
         for i in range(N):
             at_end = i == N - 1
+
             # dups and sum_ranks will be incremented each loop where
             # the value / group remains the same, and should be reset
             # when either of those change
@@ -1033,20 +1035,23 @@ def rank_1d(
             dups += 1
             sum_ranks += i - grp_start + 1
 
+            next_val_diff = at_end or are_diff(masked_vals[lexsort_indexer[i]],
+                                    masked_vals[lexsort_indexer[i+1]])
+
+            # We'll need this check later anyway to determine group size, so just
+            # compute it here since shortcircuiting won't help
+            group_changed = at_end or (check_labels and
+                                       (labels[lexsort_indexer[i]]
+                                        != labels[lexsort_indexer[i+1]]))
+
             # Update out only when there is a transition of values or labels.
             # When a new value or group is encountered, go back #dups steps(
             # the number of occurrence of current value) and assign the ranks
             # based on the starting index of the current group (grp_start)
             # and the current index
-            next_val_diff = at_end or are_diff(masked_vals[lexsort_indexer[i]],
-                                    masked_vals[lexsort_indexer[i+1]])
+            if (next_val_diff or group_changed
+                    or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])):
 
-            if (next_val_diff
-                    or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])
-                    or (check_labels
-                        and (labels[lexsort_indexer[i]]
-                             != labels[lexsort_indexer[i+1]]))
-            ):
                 # if keep_na, check for missing values and assign back
                 # to the result where appropriate
                 set_as_na = keep_na and mask[lexsort_indexer[i]]
@@ -1092,10 +1097,7 @@ def rank_1d(
                 # decrement that from their position. fill in the size of each
                 # group encountered (used by pct calculations later). also be
                 # sure to reset any of the items helping to calculate dups
-                if (at_end or
-                        (check_labels
-                         and (labels[lexsort_indexer[i]]
-                              != labels[lexsort_indexer[i+1]]))):
+                if group_changed:
                     if tiebreak != TIEBREAK_DENSE:
                         for j in range(grp_start, i + 1):
                             grp_sizes[lexsort_indexer[j]] = \
@@ -1112,6 +1114,7 @@ def rank_1d(
         with nogil:
             for i in range(N):
                 at_end = i == N - 1
+
                 # dups and sum_ranks will be incremented each loop where
                 # the value / group remains the same, and should be reset
                 # when either of those change
@@ -1119,14 +1122,20 @@ def rank_1d(
                 dups += 1
                 sum_ranks += i - grp_start + 1
 
+                next_val_diff = at_end or (masked_vals[lexsort_indexer[i]]
+                                    != masked_vals[lexsort_indexer[i+1]])
+
+                # We'll need this check later anyway to determine group size, so just
+                # compute it here since shortcircuiting won't help
+                group_changed = at_end or (check_labels and
+                                       (labels[lexsort_indexer[i]]
+                                        != labels[lexsort_indexer[i+1]]))
+
                 # Update out only when there is a transition of values or labels.
                 # When a new value or group is encountered, go back #dups steps(
                 # the number of occurrence of current value) and assign the ranks
                 # based on the starting index of the current group (grp_start)
                 # and the current index
-                next_val_diff = at_end or (masked_vals[lexsort_indexer[i]]
-                                    != masked_vals[lexsort_indexer[i+1]])
-
                 if (next_val_diff
                         or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])
                         or (check_labels
@@ -1137,8 +1146,8 @@ def rank_1d(
                     # to the result where appropriate
                     set_as_na = keep_na and mask[lexsort_indexer[i]]
 
-                    # For all cases except TIEBREAK_FIRST when not setting
-                    # nulls, we set the same value at each index
+                    # For all cases except TIEBREAK_FIRST and a non-null value,
+                    # we set the same value at each index
                     if set_as_na:
                         computed_rank = NaN
                         grp_na_count = dups
@@ -1178,9 +1187,7 @@ def rank_1d(
                     # decrement that from their position. fill in the size of each
                     # group encountered (used by pct calculations later). also be
                     # sure to reset any of the items helping to calculate dups
-                    if at_end or (check_labels and
-                                  (labels[lexsort_indexer[i]]
-                                   != labels[lexsort_indexer[i+1]])):
+                    if group_changed:
                         if tiebreak != TIEBREAK_DENSE:
                             for j in range(grp_start, i + 1):
                                 grp_sizes[lexsort_indexer[j]] = \

From d360871bd03f62c4fd245ea1d026f2e804c86af9 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 20 Mar 2021 19:13:43 -0400
Subject: [PATCH 03/13] WIP

---
 pandas/_libs/algos.pyx | 39 +++++++++++++++++----------------------
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index caec06d498312..cb968d9e8b1e6 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -951,7 +951,7 @@ def rank_1d(
         ndarray[float64_t, ndim=1] grp_sizes, out
         ndarray[rank_t, ndim=1] masked_vals
         ndarray[uint8_t, ndim=1] mask
-        bint keep_na, at_end, next_val_diff, check_labels, set_as_na
+        bint keep_na, at_end, next_val_diff, check_labels, set_as_na, group_changed
         rank_t nan_fill_val
         float computed_rank
 
@@ -1086,8 +1086,7 @@ def rank_1d(
                 # reset the dups and sum_ranks, knowing that a new value is
                 # coming up. the conditional also needs to handle nan equality
                 # and the end of iteration
-                if next_val_diff or (mask[lexsort_indexer[i]]
-                                     ^ mask[lexsort_indexer[i+1]]):
+                if next_val_diff or not group_changed:
                     dups = sum_ranks = 0
                     grp_vals_seen += 1
 
@@ -1122,32 +1121,29 @@ def rank_1d(
                 dups += 1
                 sum_ranks += i - grp_start + 1
 
-                next_val_diff = at_end or (masked_vals[lexsort_indexer[i]]
-                                    != masked_vals[lexsort_indexer[i+1]])
+                next_val_diff = at_end or (masked_vals[lexsort_indexer[i]] !=
+                                           masked_vals[lexsort_indexer[i+1]])
 
                 # We'll need this check later anyway to determine group size, so just
                 # compute it here since shortcircuiting won't help
                 group_changed = at_end or (check_labels and
-                                       (labels[lexsort_indexer[i]]
-                                        != labels[lexsort_indexer[i+1]]))
+                                           (labels[lexsort_indexer[i]]
+                                            != labels[lexsort_indexer[i+1]]))
 
                 # Update out only when there is a transition of values or labels.
                 # When a new value or group is encountered, go back #dups steps(
                 # the number of occurrence of current value) and assign the ranks
                 # based on the starting index of the current group (grp_start)
                 # and the current index
-                if (next_val_diff
-                        or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])
-                        or (check_labels
-                            and (labels[lexsort_indexer[i]]
-                                 != labels[lexsort_indexer[i+1]]))
-                ):
+                if (next_val_diff or group_changed
+                        or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])):
+
                     # if keep_na, check for missing values and assign back
                     # to the result where appropriate
                     set_as_na = keep_na and mask[lexsort_indexer[i]]
 
-                    # For all cases except TIEBREAK_FIRST and a non-null value,
-                    # we set the same value at each index
+                    # For all cases except TIEBREAK_FIRST when not setting
+                    # nulls, we set the same value at each index
                     if set_as_na:
                         computed_rank = NaN
                         grp_na_count = dups
@@ -1171,13 +1167,12 @@ def rank_1d(
                         for j in range(i - dups + 1, i + 1):
                             out[lexsort_indexer[j]] = computed_rank
 
-                    # look forward to the next value (using the sorting in
-                    # lexsort_indexer) if the value does not equal the current
-                    # value then we need to reset the dups and sum_ranks,
-                    # knowing that a new value is coming up. the conditional
-                    # also needs to handle nan equality and the end of iteration
-                    if next_val_diff or (mask[lexsort_indexer[i]]
-                                         ^ mask[lexsort_indexer[i+1]]):
+                    # look forward to the next value (using the sorting in _as)
+                    # if the value does not equal the current value then we need to
+                    # reset the dups and sum_ranks, knowing that a new value is
+                    # coming up. the conditional also needs to handle nan equality
+                    # and the end of iteration
+                    if next_val_diff or not group_changed:
                         dups = sum_ranks = 0
                         grp_vals_seen += 1
 

From 0aaeee71b6cda176fc89946b32f333d382381a7d Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 20 Mar 2021 19:21:22 -0400
Subject: [PATCH 04/13] WIP

---
 pandas/_libs/algos.pyx | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index cb968d9e8b1e6..d06ab47c116e9 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -985,6 +985,8 @@ def rank_1d(
     else:
         mask = np.zeros(shape=len(masked_vals), dtype=np.uint8)
 
+    # If ascending is true and na_option == 'bottom',
+    # fill with the largest so NaN
     if ascending ^ (na_option == 'top'):
         if rank_t is object:
             nan_fill_val = Infinity()
@@ -1030,13 +1032,12 @@ def rank_1d(
 
             # dups and sum_ranks will be incremented each loop where
             # the value / group remains the same, and should be reset
-            # when either of those change
-            # Used to calculate tiebreakers
+            # when either of those change. Used to calculate tiebreakers
             dups += 1
             sum_ranks += i - grp_start + 1
 
-            next_val_diff = at_end or are_diff(masked_vals[lexsort_indexer[i]],
-                                    masked_vals[lexsort_indexer[i+1]])
+            next_val_diff = at_end or (masked_vals[lexsort_indexer[i]] !=
+                                       masked_vals[lexsort_indexer[i+1]])
 
             # We'll need this check later anyway to determine group size, so just
             # compute it here since shortcircuiting won't help
@@ -1052,7 +1053,7 @@ def rank_1d(
             if (next_val_diff or group_changed
                     or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])):
 
-                # if keep_na, check for missing values and assign back
+                # If keep_na, check for missing values and assign back
                 # to the result where appropriate
                 set_as_na = keep_na and mask[lexsort_indexer[i]]
 
@@ -1081,11 +1082,15 @@ def rank_1d(
                     for j in range(i - dups + 1, i + 1):
                         out[lexsort_indexer[j]] = computed_rank
 
-                # look forward to the next value (using the sorting in _as)
+                # Look forward to the next value (using the sorting in lexsort_indexer)
                 # if the value does not equal the current value then we need to
                 # reset the dups and sum_ranks, knowing that a new value is
-                # coming up. the conditional also needs to handle nan equality
+                # coming up. The conditional also needs to handle nan equality
                 # and the end of iteration
+
+                # This condition is equivalent to `next_val_diff or
+                # (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]]))`
+                # Helps potentially avoid 2 mask lookups
                 if next_val_diff or not group_changed:
                     dups = sum_ranks = 0
                     grp_vals_seen += 1
@@ -1093,8 +1098,8 @@ def rank_1d(
                 # Similar to the previous conditional, check now if we are
                 # moving to a new group. If so, keep track of the index where
                 # the new group occurs, so the tiebreaker calculations can
-                # decrement that from their position. fill in the size of each
-                # group encountered (used by pct calculations later). also be
+                # decrement that from their position. Fill in the size of each
+                # group encountered (used by pct calculations later). Also be
                 # sure to reset any of the items helping to calculate dups
                 if group_changed:
                     if tiebreak != TIEBREAK_DENSE:
@@ -1116,8 +1121,7 @@ def rank_1d(
 
                 # dups and sum_ranks will be incremented each loop where
                 # the value / group remains the same, and should be reset
-                # when either of those change
-                # Used to calculate tiebreakers
+                # when either of those change. Used to calculate tiebreakers
                 dups += 1
                 sum_ranks += i - grp_start + 1
 
@@ -1138,7 +1142,7 @@ def rank_1d(
                 if (next_val_diff or group_changed
                         or (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]])):
 
-                    # if keep_na, check for missing values and assign back
+                    # If keep_na, check for missing values and assign back
                     # to the result where appropriate
                     set_as_na = keep_na and mask[lexsort_indexer[i]]
 
@@ -1167,11 +1171,15 @@ def rank_1d(
                         for j in range(i - dups + 1, i + 1):
                             out[lexsort_indexer[j]] = computed_rank
 
-                    # look forward to the next value (using the sorting in _as)
+                    # Look forward to the next value (using the sorting in lexsort_indexer)
                     # if the value does not equal the current value then we need to
                     # reset the dups and sum_ranks, knowing that a new value is
-                    # coming up. the conditional also needs to handle nan equality
+                    # coming up. The conditional also needs to handle nan equality
                     # and the end of iteration
+
+                    # This condition is equivalent to `next_val_diff or
+                    # (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]]))`
+                    # Helps potentially avoid 2 mask lookups
                     if next_val_diff or not group_changed:
                         dups = sum_ranks = 0
                         grp_vals_seen += 1
@@ -1179,8 +1187,8 @@ def rank_1d(
                     # Similar to the previous conditional, check now if we are
                     # moving to a new group. If so, keep track of the index where
                     # the new group occurs, so the tiebreaker calculations can
-                    # decrement that from their position. fill in the size of each
-                    # group encountered (used by pct calculations later). also be
+                    # decrement that from their position. Fill in the size of each
+                    # group encountered (used by pct calculations later). Also be
                     # sure to reset any of the items helping to calculate dups
                     if group_changed:
                         if tiebreak != TIEBREAK_DENSE:

From fe6495a289b97d21a5bb559b09813eebe2781213 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 20 Mar 2021 19:54:33 -0400
Subject: [PATCH 05/13] Add comments, whitespace

---
 pandas/_libs/algos.pyx | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index d06ab47c116e9..d24c1452be9c4 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -985,8 +985,9 @@ def rank_1d(
     else:
         mask = np.zeros(shape=len(masked_vals), dtype=np.uint8)
 
-    # If ascending is true and na_option == 'bottom',
-    # fill with the largest so NaN
+    # If ascending and na_option == 'bottom' or descending and
+    # na_option == 'top' -> we want to rank NaN as the highest
+    # so fill with the maximum value for the type
     if ascending ^ (na_option == 'top'):
         if rank_t is object:
             nan_fill_val = Infinity()
@@ -997,6 +998,8 @@ def rank_1d(
         else:
             nan_fill_val = np.inf
         order = (masked_vals, mask, labels)
+
+    # Otherwise, fill with the lowest value of the type
     else:
         if rank_t is object:
             nan_fill_val = NegInfinity()
@@ -1036,8 +1039,8 @@ def rank_1d(
             dups += 1
             sum_ranks += i - grp_start + 1
 
-            next_val_diff = at_end or (masked_vals[lexsort_indexer[i]] !=
-                                       masked_vals[lexsort_indexer[i+1]])
+            next_val_diff = at_end or are_diff(masked_vals[lexsort_indexer[i]],
+                                               masked_vals[lexsort_indexer[i+1]])
 
             # We'll need this check later anyway to determine group size, so just
             # compute it here since shortcircuiting won't help
@@ -1058,7 +1061,7 @@ def rank_1d(
                 set_as_na = keep_na and mask[lexsort_indexer[i]]
 
                 # For all cases except TIEBREAK_FIRST when not setting
-                # nulls, we set the same value at each index
+                # nulls, we can set the same value at each index
                 if set_as_na:
                     computed_rank = NaN
                     grp_na_count = dups

From 8fae616f2edfd6dc2936f23c8273ad7e006fdf06 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 20 Mar 2021 20:17:35 -0400
Subject: [PATCH 06/13] Simplify conditional

---
 pandas/_libs/algos.pyx | 82 ++++++++++++++++++++++--------------------
 1 file changed, 43 insertions(+), 39 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index d24c1452be9c4..a016dbcc07280 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -1060,19 +1060,25 @@ def rank_1d(
                 # to the result where appropriate
                 set_as_na = keep_na and mask[lexsort_indexer[i]]
 
-                # For all cases except TIEBREAK_FIRST when not setting
-                # nulls, we can set the same value at each index
-                if set_as_na:
-                    computed_rank = NaN
-                    grp_na_count = dups
-                elif tiebreak == TIEBREAK_AVERAGE:
-                    computed_rank = sum_ranks / <float64_t>dups
-                elif tiebreak == TIEBREAK_MIN:
-                    computed_rank = i - grp_start - dups + 2
-                elif tiebreak == TIEBREAK_MAX:
-                    computed_rank = i - grp_start + 1
-                elif tiebreak == TIEBREAK_DENSE:
-                    computed_rank = grp_vals_seen
+                # For all cases except TIEBREAK_FIRST for non-null values
+                # we set the same value at each index
+                if set_as_na or tiebreak != TIEBREAK_FIRST:
+                    if set_as_na:
+                        computed_rank = NaN
+                        grp_na_count = dups
+                    elif tiebreak == TIEBREAK_AVERAGE:
+                        computed_rank = sum_ranks / <float64_t>dups
+                    elif tiebreak == TIEBREAK_MIN:
+                        computed_rank = i - grp_start - dups + 2
+                    elif tiebreak == TIEBREAK_MAX:
+                        computed_rank = i - grp_start + 1
+                    elif tiebreak == TIEBREAK_DENSE:
+                        computed_rank = grp_vals_seen
+
+                    for j in range(i - dups + 1, i + 1):
+                        out[lexsort_indexer[j]] = computed_rank
+
+                # Otherwise, we need to iterate a compute a rank per index
                 else:
                     for j in range(i - dups + 1, i + 1):
                         if ascending:
@@ -1081,10 +1087,6 @@ def rank_1d(
                             out[lexsort_indexer[j]] = \
                                 (2 * i - j - dups + 2 - grp_start)
 
-                if set_as_na or tiebreak != TIEBREAK_FIRST:
-                    for j in range(i - dups + 1, i + 1):
-                        out[lexsort_indexer[j]] = computed_rank
-
                 # Look forward to the next value (using the sorting in lexsort_indexer)
                 # if the value does not equal the current value then we need to
                 # reset the dups and sum_ranks, knowing that a new value is
@@ -1149,19 +1151,25 @@ def rank_1d(
                     # to the result where appropriate
                     set_as_na = keep_na and mask[lexsort_indexer[i]]
 
-                    # For all cases except TIEBREAK_FIRST when not setting
-                    # nulls, we set the same value at each index
-                    if set_as_na:
-                        computed_rank = NaN
-                        grp_na_count = dups
-                    elif tiebreak == TIEBREAK_AVERAGE:
-                        computed_rank = sum_ranks / <float64_t>dups
-                    elif tiebreak == TIEBREAK_MIN:
-                        computed_rank = i - grp_start - dups + 2
-                    elif tiebreak == TIEBREAK_MAX:
-                        computed_rank = i - grp_start + 1
-                    elif tiebreak == TIEBREAK_DENSE:
-                        computed_rank = grp_vals_seen
+                    # For all cases except TIEBREAK_FIRST for non-null values
+                    # we set the same value at each index
+                    if set_as_na or tiebreak != TIEBREAK_FIRST:
+                        if set_as_na:
+                            computed_rank = NaN
+                            grp_na_count = dups
+                        elif tiebreak == TIEBREAK_AVERAGE:
+                            computed_rank = sum_ranks / <float64_t>dups
+                        elif tiebreak == TIEBREAK_MIN:
+                            computed_rank = i - grp_start - dups + 2
+                        elif tiebreak == TIEBREAK_MAX:
+                            computed_rank = i - grp_start + 1
+                        elif tiebreak == TIEBREAK_DENSE:
+                            computed_rank = grp_vals_seen
+
+                        for j in range(i - dups + 1, i + 1):
+                            out[lexsort_indexer[j]] = computed_rank
+
+                    # Otherwise, we need to iterate a compute a rank per index
                     else:
                         for j in range(i - dups + 1, i + 1):
                             if ascending:
@@ -1170,15 +1178,11 @@ def rank_1d(
                                 out[lexsort_indexer[j]] = \
                                     (2 * i - j - dups + 2 - grp_start)
 
-                    if set_as_na or tiebreak != TIEBREAK_FIRST:
-                        for j in range(i - dups + 1, i + 1):
-                            out[lexsort_indexer[j]] = computed_rank
-
-                    # Look forward to the next value (using the sorting in lexsort_indexer)
-                    # if the value does not equal the current value then we need to
-                    # reset the dups and sum_ranks, knowing that a new value is
-                    # coming up. The conditional also needs to handle nan equality
-                    # and the end of iteration
+                    # Look forward to the next value (using the sorting in
+                    # lexsort_indexer). If the value does not equal the current
+                    # value then we need to reset the dups and sum_ranks, knowing
+                    # that a new value is coming up. The conditional also needs
+                    # to handle nan equality and the end of iteration
 
                     # This condition is equivalent to `next_val_diff or
                     # (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]]))`

From f9479e377d5ae3c7ee19db84a85816d25b2b7ffd Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 20 Mar 2021 20:35:02 -0400
Subject: [PATCH 07/13] Remove unused var

---
 pandas/_libs/algos.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index a016dbcc07280..b03d06bfba98b 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -946,7 +946,7 @@ def rank_1d(
     cdef:
         TiebreakEnumType tiebreak
         Py_ssize_t i, j, N, grp_start=0, dups=0, sum_ranks=0
-        Py_ssize_t grp_vals_seen=1, grp_na_count=0, grp_tie_count=0
+        Py_ssize_t grp_vals_seen=1, grp_na_count=0
         ndarray[int64_t, ndim=1] lexsort_indexer
         ndarray[float64_t, ndim=1] grp_sizes, out
         ndarray[rank_t, ndim=1] masked_vals

From a2bea3d933113f1af2253b9b411a1d94de191361 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sun, 21 Mar 2021 01:07:07 -0400
Subject: [PATCH 08/13] Avoid compiler warning

---
 pandas/_libs/algos.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index b03d06bfba98b..ca0b1c19aec60 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -953,7 +953,7 @@ def rank_1d(
         ndarray[uint8_t, ndim=1] mask
         bint keep_na, at_end, next_val_diff, check_labels, set_as_na, group_changed
         rank_t nan_fill_val
-        float computed_rank
+        float64_t computed_rank = 0
 
     tiebreak = tiebreakers[ties_method]
     keep_na = na_option == 'keep'

From ba5dc7cc6ab1e1a103dcef775acf3992f5775981 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 22 Mar 2021 17:58:39 -0400
Subject: [PATCH 09/13] Simplify changes

---
 pandas/_libs/algos.pyx | 129 ++++++++++++++++++-----------------------
 1 file changed, 57 insertions(+), 72 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index ca0b1c19aec60..281d2b9a38b2f 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -951,11 +951,14 @@ def rank_1d(
         ndarray[float64_t, ndim=1] grp_sizes, out
         ndarray[rank_t, ndim=1] masked_vals
         ndarray[uint8_t, ndim=1] mask
-        bint keep_na, at_end, next_val_diff, check_labels, set_as_na, group_changed
+        bint keep_na, at_end, next_val_diff, check_labels, group_changed
         rank_t nan_fill_val
-        float64_t computed_rank = 0
 
     tiebreak = tiebreakers[ties_method]
+    if tiebreak == TIEBREAK_FIRST:
+        if not ascending:
+            tiebreak = TIEBREAK_FIRST_DESCENDING
+
     keep_na = na_option == 'keep'
 
     N = len(values)
@@ -1058,45 +1061,36 @@ def rank_1d(
 
                 # If keep_na, check for missing values and assign back
                 # to the result where appropriate
-                set_as_na = keep_na and mask[lexsort_indexer[i]]
-
-                # For all cases except TIEBREAK_FIRST for non-null values
-                # we set the same value at each index
-                if set_as_na or tiebreak != TIEBREAK_FIRST:
-                    if set_as_na:
-                        computed_rank = NaN
-                        grp_na_count = dups
-                    elif tiebreak == TIEBREAK_AVERAGE:
-                        computed_rank = sum_ranks / <float64_t>dups
-                    elif tiebreak == TIEBREAK_MIN:
-                        computed_rank = i - grp_start - dups + 2
-                    elif tiebreak == TIEBREAK_MAX:
-                        computed_rank = i - grp_start + 1
-                    elif tiebreak == TIEBREAK_DENSE:
-                        computed_rank = grp_vals_seen
-
+                if keep_na and mask[lexsort_indexer[i]]:
+                    grp_na_count = dups
                     for j in range(i - dups + 1, i + 1):
-                        out[lexsort_indexer[j]] = computed_rank
-
-                # Otherwise, we need to iterate a compute a rank per index
-                else:
+                        out[lexsort_indexer[j]] = NaN
+                elif tiebreak == TIEBREAK_AVERAGE:
                     for j in range(i - dups + 1, i + 1):
-                        if ascending:
-                            out[lexsort_indexer[j]] = j + 1 - grp_start
-                        else:
-                            out[lexsort_indexer[j]] = \
-                                (2 * i - j - dups + 2 - grp_start)
+                        out[lexsort_indexer[j]] = sum_ranks / <float64_t>dups
+                elif tiebreak == TIEBREAK_MIN:
+                    for j in range(i - dups + 1, i + 1):
+                        out[lexsort_indexer[j]] = i - grp_start - dups + 2
+                elif tiebreak == TIEBREAK_MAX:
+                    for j in range(i - dups + 1, i + 1):
+                        out[lexsort_indexer[j]] = i - grp_start + 1
+                elif tiebreak == TIEBREAK_FIRST:
+                    for j in range(i - dups + 1, i + 1):
+                        out[lexsort_indexer[j]] = j + 1 - grp_start
+                elif tiebreak == TIEBREAK_FIRST_DESCENDING:
+                    for j in range(i - dups + 1, i + 1):
+                        out[lexsort_indexer[j]] = 2 * i - j - dups + 2 - grp_start
+                elif tiebreak == TIEBREAK_DENSE:
+                    for j in range(i - dups + 1, i + 1):
+                        out[lexsort_indexer[j]] = grp_vals_seen
 
                 # Look forward to the next value (using the sorting in lexsort_indexer)
                 # if the value does not equal the current value then we need to
                 # reset the dups and sum_ranks, knowing that a new value is
                 # coming up. The conditional also needs to handle nan equality
                 # and the end of iteration
-
-                # This condition is equivalent to `next_val_diff or
-                # (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]]))`
-                # Helps potentially avoid 2 mask lookups
-                if next_val_diff or not group_changed:
+                if next_val_diff or (mask[lexsort_indexer[i]]
+                                     ^ mask[lexsort_indexer[i+1]]):
                     dups = sum_ranks = 0
                     grp_vals_seen += 1
 
@@ -1130,8 +1124,8 @@ def rank_1d(
                 dups += 1
                 sum_ranks += i - grp_start + 1
 
-                next_val_diff = at_end or (masked_vals[lexsort_indexer[i]] !=
-                                           masked_vals[lexsort_indexer[i+1]])
+                next_val_diff = at_end or (masked_vals[lexsort_indexer[i]]
+                                           != masked_vals[lexsort_indexer[i+1]])
 
                 # We'll need this check later anyway to determine group size, so just
                 # compute it here since shortcircuiting won't help
@@ -1149,45 +1143,36 @@ def rank_1d(
 
                     # If keep_na, check for missing values and assign back
                     # to the result where appropriate
-                    set_as_na = keep_na and mask[lexsort_indexer[i]]
-
-                    # For all cases except TIEBREAK_FIRST for non-null values
-                    # we set the same value at each index
-                    if set_as_na or tiebreak != TIEBREAK_FIRST:
-                        if set_as_na:
-                            computed_rank = NaN
-                            grp_na_count = dups
-                        elif tiebreak == TIEBREAK_AVERAGE:
-                            computed_rank = sum_ranks / <float64_t>dups
-                        elif tiebreak == TIEBREAK_MIN:
-                            computed_rank = i - grp_start - dups + 2
-                        elif tiebreak == TIEBREAK_MAX:
-                            computed_rank = i - grp_start + 1
-                        elif tiebreak == TIEBREAK_DENSE:
-                            computed_rank = grp_vals_seen
-
+                    if keep_na and mask[lexsort_indexer[i]]:
+                        grp_na_count = dups
                         for j in range(i - dups + 1, i + 1):
-                            out[lexsort_indexer[j]] = computed_rank
-
-                    # Otherwise, we need to iterate a compute a rank per index
-                    else:
+                            out[lexsort_indexer[j]] = NaN
+                    elif tiebreak == TIEBREAK_AVERAGE:
                         for j in range(i - dups + 1, i + 1):
-                            if ascending:
-                                out[lexsort_indexer[j]] = j + 1 - grp_start
-                            else:
-                                out[lexsort_indexer[j]] = \
-                                    (2 * i - j - dups + 2 - grp_start)
-
-                    # Look forward to the next value (using the sorting in
-                    # lexsort_indexer). If the value does not equal the current
-                    # value then we need to reset the dups and sum_ranks, knowing
-                    # that a new value is coming up. The conditional also needs
-                    # to handle nan equality and the end of iteration
-
-                    # This condition is equivalent to `next_val_diff or
-                    # (mask[lexsort_indexer[i]] ^ mask[lexsort_indexer[i+1]]))`
-                    # Helps potentially avoid 2 mask lookups
-                    if next_val_diff or not group_changed:
+                            out[lexsort_indexer[j]] = sum_ranks / <float64_t>dups
+                    elif tiebreak == TIEBREAK_MIN:
+                        for j in range(i - dups + 1, i + 1):
+                            out[lexsort_indexer[j]] = i - grp_start - dups + 2
+                    elif tiebreak == TIEBREAK_MAX:
+                        for j in range(i - dups + 1, i + 1):
+                            out[lexsort_indexer[j]] = i - grp_start + 1
+                    elif tiebreak == TIEBREAK_FIRST:
+                        for j in range(i - dups + 1, i + 1):
+                            out[lexsort_indexer[j]] = j + 1 - grp_start
+                    elif tiebreak == TIEBREAK_FIRST_DESCENDING:
+                        for j in range(i - dups + 1, i + 1):
+                            out[lexsort_indexer[j]] = 2 * i - j - dups + 2 - grp_start
+                    elif tiebreak == TIEBREAK_DENSE:
+                        for j in range(i - dups + 1, i + 1):
+                            out[lexsort_indexer[j]] = grp_vals_seen
+
+                    # Look forward to the next value (using the sorting in lexsort_indexer)
+                    # if the value does not equal the current value then we need to
+                    # reset the dups and sum_ranks, knowing that a new value is
+                    # coming up. The conditional also needs to handle nan equality
+                    # and the end of iteration
+                    if next_val_diff or (mask[lexsort_indexer[i]]
+                                         ^ mask[lexsort_indexer[i+1]]):
                         dups = sum_ranks = 0
                         grp_vals_seen += 1
 

From f6a04b79377bb94dab67a9ab938c1e5939d16325 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 22 Mar 2021 18:08:02 -0400
Subject: [PATCH 10/13] precommit fixup

---
 pandas/_libs/algos.pyx | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 281d2b9a38b2f..4ae134b42b243 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -1166,11 +1166,11 @@ def rank_1d(
                         for j in range(i - dups + 1, i + 1):
                             out[lexsort_indexer[j]] = grp_vals_seen
 
-                    # Look forward to the next value (using the sorting in lexsort_indexer)
-                    # if the value does not equal the current value then we need to
-                    # reset the dups and sum_ranks, knowing that a new value is
-                    # coming up. The conditional also needs to handle nan equality
-                    # and the end of iteration
+                    # Look forward to the next value (using the sorting in
+                    # lexsort_indexer) if the value does not equal the current
+                    # value then we need to reset the dups and sum_ranks, knowing
+                    # that a new value is coming up. The conditional also needs
+                    # to handle nan equality and the end of iteration
                     if next_val_diff or (mask[lexsort_indexer[i]]
                                          ^ mask[lexsort_indexer[i+1]]):
                         dups = sum_ranks = 0

From e1df693920fd44fc3a1c3064ab260e7046b1571b Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Tue, 23 Mar 2021 10:42:23 -0400
Subject: [PATCH 11/13] Update ascending, na_option comment

---
 pandas/_libs/algos.pyx | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 4ae134b42b243..2de8e91198ee1 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -988,9 +988,12 @@ def rank_1d(
     else:
         mask = np.zeros(shape=len(masked_vals), dtype=np.uint8)
 
-    # If ascending and na_option == 'bottom' or descending and
-    # na_option == 'top' -> we want to rank NaN as the highest
-    # so fill with the maximum value for the type
+    # If `na_option == 'top'`, we want to assign the lowest rank
+    # to NaN regardless of ascending/descending. So if ascending,
+    # fill with lowest value of type to end up with lowest rank.
+    # If descending, fill with highest value since descending
+    # will flip the ordering to still end up with lowest rank.
+    # Symmetric logic applies to `na_option == 'bottom'`
     if ascending ^ (na_option == 'top'):
         if rank_t is object:
             nan_fill_val = Infinity()
@@ -1001,8 +1004,6 @@ def rank_1d(
         else:
             nan_fill_val = np.inf
         order = (masked_vals, mask, labels)
-
-    # Otherwise, fill with the lowest value of the type
     else:
         if rank_t is object:
             nan_fill_val = NegInfinity()

From 93b071765777a4f49893d3180db436fa73239f05 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Tue, 23 Mar 2021 13:28:44 -0400
Subject: [PATCH 12/13] Explain tiebreak first behavior

---
 pandas/_libs/algos.pyx | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 2de8e91198ee1..da2b501ca4941 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -1075,9 +1075,17 @@ def rank_1d(
                 elif tiebreak == TIEBREAK_MAX:
                     for j in range(i - dups + 1, i + 1):
                         out[lexsort_indexer[j]] = i - grp_start + 1
+
+                # With n as the previous rank in the group and m as the number
+                # of duplicates in this stretch, if TIEBREAK_FIRST and ascending,
+                # then rankings should be n+1, n+2...n+m
                 elif tiebreak == TIEBREAK_FIRST:
                     for j in range(i - dups + 1, i + 1):
                         out[lexsort_indexer[j]] = j + 1 - grp_start
+
+                # If TIEBREAK_FIRST and descending, the ranking should be
+                # n+m, n+(m-1)...n+1. This is equivalent to
+                # (i - dups + 1) + (i - j + 1) - grp_start
                 elif tiebreak == TIEBREAK_FIRST_DESCENDING:
                     for j in range(i - dups + 1, i + 1):
                         out[lexsort_indexer[j]] = 2 * i - j - dups + 2 - grp_start
@@ -1157,9 +1165,17 @@ def rank_1d(
                     elif tiebreak == TIEBREAK_MAX:
                         for j in range(i - dups + 1, i + 1):
                             out[lexsort_indexer[j]] = i - grp_start + 1
+
+                    # With n as the previous rank in the group and m as the number
+                    # of duplicates in this stretch, if TIEBREAK_FIRST and ascending,
+                    # then rankings should be n + 1, n + 2 ... n + m
                     elif tiebreak == TIEBREAK_FIRST:
                         for j in range(i - dups + 1, i + 1):
                             out[lexsort_indexer[j]] = j + 1 - grp_start
+
+                    # If TIEBREAK_FIRST and descending, the ranking should be
+                    # n + m, n + (m - 1) ... n + 1. This is equivalent to
+                    # (i - dups + 1) + (i - j + 1) - grp_start
                     elif tiebreak == TIEBREAK_FIRST_DESCENDING:
                         for j in range(i - dups + 1, i + 1):
                             out[lexsort_indexer[j]] = 2 * i - j - dups + 2 - grp_start

From c04562dc9591d7453261bead70076ed34ab200f1 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Tue, 23 Mar 2021 13:30:04 -0400
Subject: [PATCH 13/13] Consistent spacing

---
 pandas/_libs/algos.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index da2b501ca4941..cda20e536c11c 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -1078,13 +1078,13 @@ def rank_1d(
 
                 # With n as the previous rank in the group and m as the number
                 # of duplicates in this stretch, if TIEBREAK_FIRST and ascending,
-                # then rankings should be n+1, n+2...n+m
+                # then rankings should be n + 1, n + 2 ... n + m
                 elif tiebreak == TIEBREAK_FIRST:
                     for j in range(i - dups + 1, i + 1):
                         out[lexsort_indexer[j]] = j + 1 - grp_start
 
                 # If TIEBREAK_FIRST and descending, the ranking should be
-                # n+m, n+(m-1)...n+1. This is equivalent to
+                # n + m, n + (m - 1) ... n + 1. This is equivalent to
                 # (i - dups + 1) + (i - j + 1) - grp_start
                 elif tiebreak == TIEBREAK_FIRST_DESCENDING:
                     for j in range(i - dups + 1, i + 1):