diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 26099a94834e8..23675752a4593 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -3,7 +3,7 @@
 intended for public consumption
 """
 from textwrap import dedent
-from typing import Dict
+from typing import Dict, Optional, Tuple, Union
 from warnings import catch_warnings, simplefilter, warn
 
 import numpy as np
@@ -501,9 +501,9 @@ def _factorize_array(values, na_sentinel: int = -1, size_hint=None, na_value=Non
 
     Returns
     -------
-    labels : ndarray
+    codes : ndarray
         An integer ndarray that's an indexer into `uniques`.
-        ``uniques.take(labels)`` will have the same values as `values`.
+        ``uniques.take(codes)`` will have the same values as `values`.
     uniques : ndarray, Index, or Categorical
         The unique valid values. When `values` is Categorical, `uniques`
         is a Categorical. When `values` is some other pandas object, an
@@ -525,27 +525,27 @@ def _factorize_array(values, na_sentinel: int = -1, size_hint=None, na_value=Non
     ``pd.factorize(values)``. The results are identical for methods like
     :meth:`Series.factorize`.
 
-    >>> labels, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b'])
-    >>> labels
+    >>> codes, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b'])
+    >>> codes
     array([0, 0, 1, 2, 0])
     >>> uniques
     array(['b', 'a', 'c'], dtype=object)
 
-    With ``sort=True``, the `uniques` will be sorted, and `labels` will be
+    With ``sort=True``, the `uniques` will be sorted, and `codes` will be
     shuffled so that the relationship is the maintained.
 
-    >>> labels, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b'], sort=True)
-    >>> labels
+    >>> codes, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b'], sort=True)
+    >>> codes
     array([1, 1, 0, 2, 1])
     >>> uniques
     array(['a', 'b', 'c'], dtype=object)
 
-    Missing values are indicated in `labels` with `na_sentinel`
+    Missing values are indicated in `codes` with `na_sentinel`
     (``-1`` by default). Note that missing values are never
     included in `uniques`.
 
-    >>> labels, uniques = pd.factorize(['b', None, 'a', 'c', 'b'])
-    >>> labels
+    >>> codes, uniques = pd.factorize(['b', None, 'a', 'c', 'b'])
+    >>> codes
     array([ 0, -1,  1,  2,  0])
     >>> uniques
     array(['b', 'a', 'c'], dtype=object)
@@ -555,8 +555,8 @@ def _factorize_array(values, na_sentinel: int = -1, size_hint=None, na_value=Non
     will differ. For Categoricals, a `Categorical` is returned.
 
     >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c'])
-    >>> labels, uniques = pd.factorize(cat)
-    >>> labels
+    >>> codes, uniques = pd.factorize(cat)
+    >>> codes
     array([0, 0, 1])
     >>> uniques
     [a, c]
@@ -569,8 +569,8 @@ def _factorize_array(values, na_sentinel: int = -1, size_hint=None, na_value=Non
     returned.
 
     >>> cat = pd.Series(['a', 'a', 'c'])
-    >>> labels, uniques = pd.factorize(cat)
-    >>> labels
+    >>> codes, uniques = pd.factorize(cat)
+    >>> codes
     array([0, 0, 1])
     >>> uniques
     Index(['a', 'c'], dtype='object')
@@ -596,7 +596,7 @@ def _factorize_array(values, na_sentinel: int = -1, size_hint=None, na_value=Non
     sort=dedent(
         """\
     sort : bool, default False
-        Sort `uniques` and shuffle `labels` to maintain the
+        Sort `uniques` and shuffle `codes` to maintain the
         relationship.
     """
     ),
@@ -609,11 +609,17 @@ def _factorize_array(values, na_sentinel: int = -1, size_hint=None, na_value=Non
 )
 @Appender(_shared_docs["factorize"])
 @deprecate_kwarg(old_arg_name="order", new_arg_name=None)
-def factorize(values, sort: bool = False, order=None, na_sentinel=-1, size_hint=None):
+def factorize(
+    values,
+    sort: bool = False,
+    order=None,
+    na_sentinel: int = -1,
+    size_hint: Optional[int] = None,
+) -> Tuple[np.ndarray, Union[np.ndarray, ABCIndex]]:
     # Implementation notes: This method is responsible for 3 things
     # 1.) coercing data to array-like (ndarray, Index, extension array)
-    # 2.) factorizing labels and uniques
-    # 3.) Maybe boxing the output in an Index
+    # 2.) factorizing codes and uniques
+    # 3.) Maybe boxing the uniques in an Index
     #
     # Step 2 is dispatched to extension types (like Categorical). They are
     # responsible only for factorization. All data coercion, sorting and boxing
@@ -624,7 +630,7 @@ def factorize(values, sort: bool = False, order=None, na_sentinel=-1, size_hint=
 
     if is_extension_array_dtype(values):
         values = extract_array(values)
-        labels, uniques = values.factorize(na_sentinel=na_sentinel)
+        codes, uniques = values.factorize(na_sentinel=na_sentinel)
         dtype = original.dtype
     else:
         values, dtype = _ensure_data(values)
@@ -634,13 +640,13 @@ def factorize(values, sort: bool = False, order=None, na_sentinel=-1, size_hint=
         else:
             na_value = None
 
-        labels, uniques = _factorize_array(
+        codes, uniques = _factorize_array(
             values, na_sentinel=na_sentinel, size_hint=size_hint, na_value=na_value
         )
 
     if sort and len(uniques) > 0:
-        uniques, labels = safe_sort(
-            uniques, labels, na_sentinel=na_sentinel, assume_unique=True, verify=False
+        uniques, codes = safe_sort(
+            uniques, codes, na_sentinel=na_sentinel, assume_unique=True, verify=False
         )
 
     uniques = _reconstruct_data(uniques, dtype, original)
@@ -653,7 +659,7 @@ def factorize(values, sort: bool = False, order=None, na_sentinel=-1, size_hint=
 
         uniques = Index(uniques)
 
-    return labels, uniques
+    return codes, uniques
 
 
 def value_counts(
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 2980f0d4cb906..82dabe735581b 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -690,11 +690,11 @@ def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ABCExtensionArra
         Parameters
         ----------
         na_sentinel : int, default -1
-            Value to use in the `labels` array to indicate missing values.
+            Value to use in the `codes` array to indicate missing values.
 
         Returns
         -------
-        labels : ndarray
+        codes : ndarray
             An integer NumPy array that's an indexer into the original
             ExtensionArray.
         uniques : ExtensionArray
@@ -724,12 +724,12 @@ def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ABCExtensionArra
         #    Complete control over factorization.
         arr, na_value = self._values_for_factorize()
 
-        labels, uniques = _factorize_array(
+        codes, uniques = _factorize_array(
             arr, na_sentinel=na_sentinel, na_value=na_value
         )
 
         uniques = self._from_factorized(uniques, self)
-        return labels, uniques
+        return codes, uniques
 
     _extension_array_shared_docs[
         "repeat"
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 075cdf09d531f..14024401ea110 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -710,11 +710,11 @@ def factorize(self, na_sentinel=-1):
         # Currently, ExtensionArray.factorize -> Tuple[ndarray, EA]
         # The sparsity on this is backwards from what Sparse would want. Want
         # ExtensionArray.factorize -> Tuple[EA, EA]
-        # Given that we have to return a dense array of labels, why bother
+        # Given that we have to return a dense array of codes, why bother
         # implementing an efficient factorize?
-        labels, uniques = algos.factorize(np.asarray(self), na_sentinel=na_sentinel)
+        codes, uniques = algos.factorize(np.asarray(self), na_sentinel=na_sentinel)
         uniques = SparseArray(uniques, dtype=self.dtype)
-        return labels, uniques
+        return codes, uniques
 
     def value_counts(self, dropna=True):
         """
diff --git a/pandas/core/base.py b/pandas/core/base.py
index ada0159d21e7e..10e7b5d186bba 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1518,7 +1518,7 @@ def memory_usage(self, deep=False):
         sort=textwrap.dedent(
             """\
             sort : bool, default False
-                Sort `uniques` and shuffle `labels` to maintain the
+                Sort `uniques` and shuffle `codes` to maintain the
                 relationship.
             """
         ),
diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py
index 1508fef86ae62..1a48ccf85f947 100644
--- a/pandas/tests/arrays/categorical/test_algos.py
+++ b/pandas/tests/arrays/categorical/test_algos.py
@@ -11,23 +11,23 @@ def test_factorize(categories, ordered):
     cat = pd.Categorical(
         ["b", "b", "a", "c", None], categories=categories, ordered=ordered
     )
-    labels, uniques = pd.factorize(cat)
-    expected_labels = np.array([0, 0, 1, 2, -1], dtype=np.intp)
+    codes, uniques = pd.factorize(cat)
+    expected_codes = np.array([0, 0, 1, 2, -1], dtype=np.intp)
     expected_uniques = pd.Categorical(
         ["b", "a", "c"], categories=categories, ordered=ordered
     )
 
-    tm.assert_numpy_array_equal(labels, expected_labels)
+    tm.assert_numpy_array_equal(codes, expected_codes)
     tm.assert_categorical_equal(uniques, expected_uniques)
 
 
 def test_factorized_sort():
     cat = pd.Categorical(["b", "b", None, "a"])
-    labels, uniques = pd.factorize(cat, sort=True)
-    expected_labels = np.array([1, 1, -1, 0], dtype=np.intp)
+    codes, uniques = pd.factorize(cat, sort=True)
+    expected_codes = np.array([1, 1, -1, 0], dtype=np.intp)
     expected_uniques = pd.Categorical(["a", "b"])
 
-    tm.assert_numpy_array_equal(labels, expected_labels)
+    tm.assert_numpy_array_equal(codes, expected_codes)
     tm.assert_categorical_equal(uniques, expected_uniques)
 
 
@@ -36,13 +36,13 @@ def test_factorized_sort_ordered():
         ["b", "b", None, "a"], categories=["c", "b", "a"], ordered=True
     )
 
-    labels, uniques = pd.factorize(cat, sort=True)
-    expected_labels = np.array([0, 0, -1, 1], dtype=np.intp)
+    codes, uniques = pd.factorize(cat, sort=True)
+    expected_codes = np.array([0, 0, -1, 1], dtype=np.intp)
     expected_uniques = pd.Categorical(
         ["b", "a"], categories=["c", "b", "a"], ordered=True
     )
 
-    tm.assert_numpy_array_equal(labels, expected_labels)
+    tm.assert_numpy_array_equal(codes, expected_codes)
     tm.assert_categorical_equal(uniques, expected_uniques)
 
 
diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
index 6d47b0c1d1f77..973088cb72e7a 100644
--- a/pandas/tests/extension/base/methods.py
+++ b/pandas/tests/extension/base/methods.py
@@ -113,29 +113,29 @@ def test_unique(self, data, box, method):
 
     @pytest.mark.parametrize("na_sentinel", [-1, -2])
     def test_factorize(self, data_for_grouping, na_sentinel):
-        labels, uniques = pd.factorize(data_for_grouping, na_sentinel=na_sentinel)
-        expected_labels = np.array(
+        codes, uniques = pd.factorize(data_for_grouping, na_sentinel=na_sentinel)
+        expected_codes = np.array(
             [0, 0, na_sentinel, na_sentinel, 1, 1, 0, 2], dtype=np.intp
         )
         expected_uniques = data_for_grouping.take([0, 4, 7])
 
-        tm.assert_numpy_array_equal(labels, expected_labels)
+        tm.assert_numpy_array_equal(codes, expected_codes)
         self.assert_extension_array_equal(uniques, expected_uniques)
 
     @pytest.mark.parametrize("na_sentinel", [-1, -2])
     def test_factorize_equivalence(self, data_for_grouping, na_sentinel):
-        l1, u1 = pd.factorize(data_for_grouping, na_sentinel=na_sentinel)
-        l2, u2 = data_for_grouping.factorize(na_sentinel=na_sentinel)
+        codes_1, uniques_1 = pd.factorize(data_for_grouping, na_sentinel=na_sentinel)
+        codes_2, uniques_2 = data_for_grouping.factorize(na_sentinel=na_sentinel)
 
-        tm.assert_numpy_array_equal(l1, l2)
-        self.assert_extension_array_equal(u1, u2)
+        tm.assert_numpy_array_equal(codes_1, codes_2)
+        self.assert_extension_array_equal(uniques_1, uniques_2)
 
     def test_factorize_empty(self, data):
-        labels, uniques = pd.factorize(data[:0])
-        expected_labels = np.array([], dtype=np.intp)
+        codes, uniques = pd.factorize(data[:0])
+        expected_codes = np.array([], dtype=np.intp)
         expected_uniques = type(data)._from_sequence([], dtype=data[:0].dtype)
 
-        tm.assert_numpy_array_equal(labels, expected_labels)
+        tm.assert_numpy_array_equal(codes, expected_codes)
         self.assert_extension_array_equal(uniques, expected_uniques)
 
     def test_fillna_copy_frame(self, data_missing):
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index a64501040442d..ef844dd97120a 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -32,39 +32,39 @@
 class TestFactorize:
     def test_basic(self):
 
-        labels, uniques = algos.factorize(["a", "b", "b", "a", "a", "c", "c", "c"])
+        codes, uniques = algos.factorize(["a", "b", "b", "a", "a", "c", "c", "c"])
         tm.assert_numpy_array_equal(uniques, np.array(["a", "b", "c"], dtype=object))
 
-        labels, uniques = algos.factorize(
+        codes, uniques = algos.factorize(
             ["a", "b", "b", "a", "a", "c", "c", "c"], sort=True
         )
         exp = np.array([0, 1, 1, 0, 0, 2, 2, 2], dtype=np.intp)
-        tm.assert_numpy_array_equal(labels, exp)
+        tm.assert_numpy_array_equal(codes, exp)
         exp = np.array(["a", "b", "c"], dtype=object)
         tm.assert_numpy_array_equal(uniques, exp)
 
-        labels, uniques = algos.factorize(list(reversed(range(5))))
+        codes, uniques = algos.factorize(list(reversed(range(5))))
         exp = np.array([0, 1, 2, 3, 4], dtype=np.intp)
-        tm.assert_numpy_array_equal(labels, exp)
+        tm.assert_numpy_array_equal(codes, exp)
         exp = np.array([4, 3, 2, 1, 0], dtype=np.int64)
         tm.assert_numpy_array_equal(uniques, exp)
 
-        labels, uniques = algos.factorize(list(reversed(range(5))), sort=True)
+        codes, uniques = algos.factorize(list(reversed(range(5))), sort=True)
 
         exp = np.array([4, 3, 2, 1, 0], dtype=np.intp)
-        tm.assert_numpy_array_equal(labels, exp)
+        tm.assert_numpy_array_equal(codes, exp)
         exp = np.array([0, 1, 2, 3, 4], dtype=np.int64)
         tm.assert_numpy_array_equal(uniques, exp)
 
-        labels, uniques = algos.factorize(list(reversed(np.arange(5.0))))
+        codes, uniques = algos.factorize(list(reversed(np.arange(5.0))))
         exp = np.array([0, 1, 2, 3, 4], dtype=np.intp)
-        tm.assert_numpy_array_equal(labels, exp)
+        tm.assert_numpy_array_equal(codes, exp)
         exp = np.array([4.0, 3.0, 2.0, 1.0, 0.0], dtype=np.float64)
         tm.assert_numpy_array_equal(uniques, exp)
 
-        labels, uniques = algos.factorize(list(reversed(np.arange(5.0))), sort=True)
+        codes, uniques = algos.factorize(list(reversed(np.arange(5.0))), sort=True)
         exp = np.array([4, 3, 2, 1, 0], dtype=np.intp)
-        tm.assert_numpy_array_equal(labels, exp)
+        tm.assert_numpy_array_equal(codes, exp)
         exp = np.array([0.0, 1.0, 2.0, 3.0, 4.0], dtype=np.float64)
         tm.assert_numpy_array_equal(uniques, exp)
 
@@ -72,16 +72,16 @@ def test_mixed(self):
 
         # doc example reshaping.rst
         x = Series(["A", "A", np.nan, "B", 3.14, np.inf])
-        labels, uniques = algos.factorize(x)
+        codes, uniques = algos.factorize(x)
 
         exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp)
-        tm.assert_numpy_array_equal(labels, exp)
+        tm.assert_numpy_array_equal(codes, exp)
         exp = Index(["A", "B", 3.14, np.inf])
         tm.assert_index_equal(uniques, exp)
 
-        labels, uniques = algos.factorize(x, sort=True)
+        codes, uniques = algos.factorize(x, sort=True)
         exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp)
-        tm.assert_numpy_array_equal(labels, exp)
+        tm.assert_numpy_array_equal(codes, exp)
         exp = Index([3.14, np.inf, "A", "B"])
         tm.assert_index_equal(uniques, exp)
 
@@ -91,16 +91,16 @@ def test_datelike(self):
         v1 = Timestamp("20130101 09:00:00.00004")
         v2 = Timestamp("20130101")
         x = Series([v1, v1, v1, v2, v2, v1])
-        labels, uniques = algos.factorize(x)
+        codes, uniques = algos.factorize(x)
 
         exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp)
-        tm.assert_numpy_array_equal(labels, exp)
+        tm.assert_numpy_array_equal(codes, exp)
         exp = DatetimeIndex([v1, v2])
         tm.assert_index_equal(uniques, exp)
 
-        labels, uniques = algos.factorize(x, sort=True)
+        codes, uniques = algos.factorize(x, sort=True)
         exp = np.array([1, 1, 1, 0, 0, 1], dtype=np.intp)
-        tm.assert_numpy_array_equal(labels, exp)
+        tm.assert_numpy_array_equal(codes, exp)
         exp = DatetimeIndex([v2, v1])
         tm.assert_index_equal(uniques, exp)
 
@@ -110,28 +110,28 @@ def test_datelike(self):
         x = Series([v1, v1, v1, v2, v2, v1])
 
         # periods are not 'sorted' as they are converted back into an index
-        labels, uniques = algos.factorize(x)
+        codes, uniques = algos.factorize(x)
         exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp)
-        tm.assert_numpy_array_equal(labels, exp)
+        tm.assert_numpy_array_equal(codes, exp)
         tm.assert_index_equal(uniques, pd.PeriodIndex([v1, v2]))
 
-        labels, uniques = algos.factorize(x, sort=True)
+        codes, uniques = algos.factorize(x, sort=True)
         exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp)
-        tm.assert_numpy_array_equal(labels, exp)
+        tm.assert_numpy_array_equal(codes, exp)
         tm.assert_index_equal(uniques, pd.PeriodIndex([v1, v2]))
 
         # GH 5986
         v1 = pd.to_timedelta("1 day 1 min")
         v2 = pd.to_timedelta("1 day")
         x = Series([v1, v2, v1, v1, v2, v2, v1])
-        labels, uniques = algos.factorize(x)
+        codes, uniques = algos.factorize(x)
         exp = np.array([0, 1, 0, 0, 1, 1, 0], dtype=np.intp)
-        tm.assert_numpy_array_equal(labels, exp)
+        tm.assert_numpy_array_equal(codes, exp)
         tm.assert_index_equal(uniques, pd.to_timedelta([v1, v2]))
 
-        labels, uniques = algos.factorize(x, sort=True)
+        codes, uniques = algos.factorize(x, sort=True)
         exp = np.array([1, 0, 1, 1, 0, 0, 1], dtype=np.intp)
-        tm.assert_numpy_array_equal(labels, exp)
+        tm.assert_numpy_array_equal(codes, exp)
         tm.assert_index_equal(uniques, pd.to_timedelta([v2, v1]))
 
     def test_factorize_nan(self):
@@ -158,7 +158,7 @@ def test_factorize_nan(self):
         tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel)
 
     @pytest.mark.parametrize(
-        "data,expected_label,expected_level",
+        "data, expected_codes, expected_uniques",
         [
             (
                 [(1, 1), (1, 2), (0, 0), (1, 2), "nonsense"],
@@ -173,14 +173,14 @@ def test_factorize_nan(self):
             ([(1, 1), (1, 2), (0, 0), (1, 2)], [0, 1, 2, 1], [(1, 1), (1, 2), (0, 0)]),
         ],
     )
-    def test_factorize_tuple_list(self, data, expected_label, expected_level):
+    def test_factorize_tuple_list(self, data, expected_codes, expected_uniques):
         # GH9454
-        result = pd.factorize(data)
+        codes, uniques = pd.factorize(data)
 
-        tm.assert_numpy_array_equal(result[0], np.array(expected_label, dtype=np.intp))
+        tm.assert_numpy_array_equal(codes, np.array(expected_codes, dtype=np.intp))
 
-        expected_level_array = com.asarray_tuplesafe(expected_level, dtype=object)
-        tm.assert_numpy_array_equal(result[1], expected_level_array)
+        expected_uniques_array = com.asarray_tuplesafe(expected_uniques, dtype=object)
+        tm.assert_numpy_array_equal(uniques, expected_uniques_array)
 
     def test_complex_sorting(self):
         # gh 12666 - check no segfault
@@ -197,52 +197,52 @@ def test_complex_sorting(self):
     def test_float64_factorize(self, writable):
         data = np.array([1.0, 1e8, 1.0, 1e-8, 1e8, 1.0], dtype=np.float64)
         data.setflags(write=writable)
-        exp_labels = np.array([0, 1, 0, 2, 1, 0], dtype=np.intp)
-        exp_uniques = np.array([1.0, 1e8, 1e-8], dtype=np.float64)
+        expected_codes = np.array([0, 1, 0, 2, 1, 0], dtype=np.intp)
+        expected_uniques = np.array([1.0, 1e8, 1e-8], dtype=np.float64)
 
-        labels, uniques = algos.factorize(data)
-        tm.assert_numpy_array_equal(labels, exp_labels)
-        tm.assert_numpy_array_equal(uniques, exp_uniques)
+        codes, uniques = algos.factorize(data)
+        tm.assert_numpy_array_equal(codes, expected_codes)
+        tm.assert_numpy_array_equal(uniques, expected_uniques)
 
     def test_uint64_factorize(self, writable):
         data = np.array([2 ** 64 - 1, 1, 2 ** 64 - 1], dtype=np.uint64)
         data.setflags(write=writable)
-        exp_labels = np.array([0, 1, 0], dtype=np.intp)
-        exp_uniques = np.array([2 ** 64 - 1, 1], dtype=np.uint64)
+        expected_codes = np.array([0, 1, 0], dtype=np.intp)
+        expected_uniques = np.array([2 ** 64 - 1, 1], dtype=np.uint64)
 
-        labels, uniques = algos.factorize(data)
-        tm.assert_numpy_array_equal(labels, exp_labels)
-        tm.assert_numpy_array_equal(uniques, exp_uniques)
+        codes, uniques = algos.factorize(data)
+        tm.assert_numpy_array_equal(codes, expected_codes)
+        tm.assert_numpy_array_equal(uniques, expected_uniques)
 
     def test_int64_factorize(self, writable):
         data = np.array([2 ** 63 - 1, -2 ** 63, 2 ** 63 - 1], dtype=np.int64)
         data.setflags(write=writable)
-        exp_labels = np.array([0, 1, 0], dtype=np.intp)
-        exp_uniques = np.array([2 ** 63 - 1, -2 ** 63], dtype=np.int64)
+        expected_codes = np.array([0, 1, 0], dtype=np.intp)
+        expected_uniques = np.array([2 ** 63 - 1, -2 ** 63], dtype=np.int64)
 
-        labels, uniques = algos.factorize(data)
-        tm.assert_numpy_array_equal(labels, exp_labels)
-        tm.assert_numpy_array_equal(uniques, exp_uniques)
+        codes, uniques = algos.factorize(data)
+        tm.assert_numpy_array_equal(codes, expected_codes)
+        tm.assert_numpy_array_equal(uniques, expected_uniques)
 
     def test_string_factorize(self, writable):
         data = np.array(["a", "c", "a", "b", "c"], dtype=object)
         data.setflags(write=writable)
-        exp_labels = np.array([0, 1, 0, 2, 1], dtype=np.intp)
-        exp_uniques = np.array(["a", "c", "b"], dtype=object)
+        expected_codes = np.array([0, 1, 0, 2, 1], dtype=np.intp)
+        expected_uniques = np.array(["a", "c", "b"], dtype=object)
 
-        labels, uniques = algos.factorize(data)
-        tm.assert_numpy_array_equal(labels, exp_labels)
-        tm.assert_numpy_array_equal(uniques, exp_uniques)
+        codes, uniques = algos.factorize(data)
+        tm.assert_numpy_array_equal(codes, expected_codes)
+        tm.assert_numpy_array_equal(uniques, expected_uniques)
 
     def test_object_factorize(self, writable):
         data = np.array(["a", "c", None, np.nan, "a", "b", pd.NaT, "c"], dtype=object)
         data.setflags(write=writable)
-        exp_labels = np.array([0, 1, -1, -1, 0, 2, -1, 1], dtype=np.intp)
-        exp_uniques = np.array(["a", "c", "b"], dtype=object)
+        expected_codes = np.array([0, 1, -1, -1, 0, 2, -1, 1], dtype=np.intp)
+        expected_uniques = np.array(["a", "c", "b"], dtype=object)
 
-        labels, uniques = algos.factorize(data)
-        tm.assert_numpy_array_equal(labels, exp_labels)
-        tm.assert_numpy_array_equal(uniques, exp_uniques)
+        codes, uniques = algos.factorize(data)
+        tm.assert_numpy_array_equal(codes, expected_codes)
+        tm.assert_numpy_array_equal(uniques, expected_uniques)
 
     def test_deprecate_order(self):
         # gh 19727 - check warning is raised for deprecated keyword, order.
@@ -263,11 +263,11 @@ def test_deprecate_order(self):
     )
     def test_parametrized_factorize_na_value_default(self, data):
         # arrays that include the NA default for that type, but isn't used.
-        l, u = algos.factorize(data)
+        codes, uniques = algos.factorize(data)
         expected_uniques = data[[0, 1]]
-        expected_labels = np.array([0, 1, 0], dtype=np.intp)
-        tm.assert_numpy_array_equal(l, expected_labels)
-        tm.assert_numpy_array_equal(u, expected_uniques)
+        expected_codes = np.array([0, 1, 0], dtype=np.intp)
+        tm.assert_numpy_array_equal(codes, expected_codes)
+        tm.assert_numpy_array_equal(uniques, expected_uniques)
 
     @pytest.mark.parametrize(
         "data, na_value",
@@ -282,11 +282,11 @@ def test_parametrized_factorize_na_value_default(self, data):
         ],
     )
     def test_parametrized_factorize_na_value(self, data, na_value):
-        l, u = algos._factorize_array(data, na_value=na_value)
+        codes, uniques = algos._factorize_array(data, na_value=na_value)
         expected_uniques = data[[1, 3]]
-        expected_labels = np.array([-1, 0, -1, 1], dtype=np.intp)
-        tm.assert_numpy_array_equal(l, expected_labels)
-        tm.assert_numpy_array_equal(u, expected_uniques)
+        expected_codes = np.array([-1, 0, -1, 1], dtype=np.intp)
+        tm.assert_numpy_array_equal(codes, expected_codes)
+        tm.assert_numpy_array_equal(uniques, expected_uniques)
 
     @pytest.mark.parametrize("sort", [True, False])
     @pytest.mark.parametrize("na_sentinel", [-1, -10, 100])
@@ -305,14 +305,14 @@ def test_parametrized_factorize_na_value(self, data, na_value):
         ids=["numpy_array", "extension_array"],
     )
     def test_factorize_na_sentinel(self, sort, na_sentinel, data, uniques):
-        labels, uniques = algos.factorize(data, sort=sort, na_sentinel=na_sentinel)
+        codes, uniques = algos.factorize(data, sort=sort, na_sentinel=na_sentinel)
         if sort:
-            expected_labels = np.array([1, 0, na_sentinel, 1], dtype=np.intp)
+            expected_codes = np.array([1, 0, na_sentinel, 1], dtype=np.intp)
             expected_uniques = algos.safe_sort(uniques)
         else:
-            expected_labels = np.array([0, 1, na_sentinel, 0], dtype=np.intp)
+            expected_codes = np.array([0, 1, na_sentinel, 0], dtype=np.intp)
             expected_uniques = uniques
-        tm.assert_numpy_array_equal(labels, expected_labels)
+        tm.assert_numpy_array_equal(codes, expected_codes)
         if isinstance(data, np.ndarray):
             tm.assert_numpy_array_equal(uniques, expected_uniques)
         else:
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 1f19f58e80f26..21fed62e51fdf 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -707,9 +707,9 @@ def test_factorize(self):
             else:
                 exp_arr = np.array(range(len(o)), dtype=np.intp)
                 exp_uniques = o
-            labels, uniques = o.factorize()
+            codes, uniques = o.factorize()
 
-            tm.assert_numpy_array_equal(labels, exp_arr)
+            tm.assert_numpy_array_equal(codes, exp_arr)
             if isinstance(o, Series):
                 tm.assert_index_equal(uniques, Index(orig), check_names=False)
             else:
@@ -736,9 +736,9 @@ def test_factorize_repeated(self):
             exp_arr = np.array(
                 [5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.intp
             )
-            labels, uniques = n.factorize(sort=True)
+            codes, uniques = n.factorize(sort=True)
 
-            tm.assert_numpy_array_equal(labels, exp_arr)
+            tm.assert_numpy_array_equal(codes, exp_arr)
             if isinstance(o, Series):
                 tm.assert_index_equal(
                     uniques, Index(orig).sort_values(), check_names=False
@@ -747,8 +747,8 @@ def test_factorize_repeated(self):
                 tm.assert_index_equal(uniques, o, check_names=False)
 
             exp_arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4], np.intp)
-            labels, uniques = n.factorize(sort=False)
-            tm.assert_numpy_array_equal(labels, exp_arr)
+            codes, uniques = n.factorize(sort=False)
+            tm.assert_numpy_array_equal(codes, exp_arr)
 
             if isinstance(o, Series):
                 expected = Index(o.iloc[5:10].append(o.iloc[:5]))