Skip to content

Commit d343e59

Browse files
authored
BUG: fix Dataframe.join with categorical index leads to unexpected reordering (pandas-dev#47881)
* BUG: fix Dataframe.join with categorical index leads to unexpected reordering * pre-commit issue
1 parent 6ed93fc commit d343e59

File tree

3 files changed

+17
-0
lines changed

3 files changed

+17
-0
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1024,6 +1024,7 @@ Reshaping
10241024
- Bug in :meth:`DataFrame.pivot_table` with ``sort=False`` results in sorted index (:issue:`17041`)
10251025
- Bug in :meth:`concat` when ``axis=1`` and ``sort=False`` where the resulting Index was a :class:`Int64Index` instead of a :class:`RangeIndex` (:issue:`46675`)
10261026
- Bug in :meth:`wide_to_long` raises when ``stubnames`` is missing in columns and ``i`` contains string dtype column (:issue:`46044`)
1027+
- Bug in :meth:`DataFrame.join` with categorical index results in unexpected reordering (:issue:`47812`)
10271028

10281029
Sparse
10291030
^^^^^^

pandas/core/indexes/base.py

+1
Original file line numberDiff line numberDiff line change
@@ -4682,6 +4682,7 @@ def join(
46824682
not isinstance(self, ABCMultiIndex)
46834683
or not any(is_categorical_dtype(dtype) for dtype in self.dtypes)
46844684
)
4685+
and not is_categorical_dtype(self.dtype)
46854686
):
46864687
# Categorical is monotonic if data are ordered as categories, but join can
46874688
# not handle this in case of not lexicographically monotonic GH#38502

pandas/tests/reshape/merge/test_join.py

+15
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,21 @@ def test_join_datetime_string(self):
712712
)
713713
tm.assert_frame_equal(result, expected)
714714

715+
def test_join_with_categorical_index(self):
716+
# GH47812
717+
ix = ["a", "b"]
718+
id1 = pd.CategoricalIndex(ix, categories=ix)
719+
id2 = pd.CategoricalIndex(reversed(ix), categories=reversed(ix))
720+
721+
df1 = DataFrame({"c1": ix}, index=id1)
722+
df2 = DataFrame({"c2": reversed(ix)}, index=id2)
723+
result = df1.join(df2)
724+
expected = DataFrame(
725+
{"c1": ["a", "b"], "c2": ["a", "b"]},
726+
index=pd.CategoricalIndex(["a", "b"], categories=["a", "b"]),
727+
)
728+
tm.assert_frame_equal(result, expected)
729+
715730

716731
def _check_join(left, right, result, join_col, how="left", lsuffix="_x", rsuffix="_y"):
717732

0 commit comments

Comments
 (0)