pandas-dev · robin-mader-bis · Mar 26, 2025 · Mar 28, 2025 · Apr 2, 2025 · Apr 4, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -636,6 +636,7 @@ Bug fixes
 Categorical
 ^^^^^^^^^^^
 - Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
+- Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`)
 - Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`)
 -
 

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -452,7 +452,7 @@ def __init__(
                 if isinstance(values, Index):
                     arr = values._data._pa_array.combine_chunks()
                 else:
-                    arr = values._pa_array.combine_chunks()
+                    arr = extract_array(values)._pa_array.combine_chunks()
                 categories = arr.dictionary.to_pandas(types_mapper=ArrowDtype)
                 codes = arr.indices.to_numpy()
                 dtype = CategoricalDtype(categories, values.dtype.pyarrow_dtype.ordered)

diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -15,6 +15,7 @@
 
 import pandas as pd
 from pandas import (
+    ArrowDtype,
     Categorical,
     DataFrame,
     Grouper,
@@ -2851,3 +2852,38 @@ def test_pivot_margins_with_none_index(self):
             ),
         )
         tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
+    def test_pivot_with_pyarrow_categorical(self):
+        # GH#53051
+
+        pa = pytest.importorskip("pyarrow")
+
+        # Create dataframe with categorical column
+        df = DataFrame(
+            {"string_column": ["A", "B", "C"], "number_column": [1, 2, 3]}
+        ).astype({"string_column": "category", "number_column": "float32"})
+
+        # Convert dataframe to pyarrow backend
+        df = df.astype(
+            {
+                "string_column": ArrowDtype(pa.dictionary(pa.int32(), pa.string())),
+                "number_column": "float[pyarrow]",
+            }
+        )
+
+        # Check that pivot works
+        df = df.pivot(columns=["string_column"], values=["number_column"])
+
+        # Assert that values of result are correct to prevent silent failure
+        multi_index = MultiIndex.from_arrays(
+            [["number_column", "number_column", "number_column"], ["A", "B", "C"]],
+            names=(None, "string_column"),
+        )
+        df_expected = DataFrame(
+            [[1.0, np.nan, np.nan], [np.nan, 2.0, np.nan], [np.nan, np.nan, 3.0]],
+            columns=multi_index,
+        )
+        tm.assert_frame_equal(
+            df, df_expected, check_dtype=False, check_column_type=False
+        )
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
@@ -5,6 +5,7 @@
 
 import pandas as pd
 from pandas import (
+    ArrowDtype,
     DataFrame,
     MultiIndex,
     Series,
@@ -318,6 +319,30 @@ def test_multiindex_dt_with_nan(self):
         expected = Series(["a", "b", "c", "d"], name=("sub", np.nan))
         tm.assert_series_equal(result, expected)
 
+    # Ignore deprecation raised by old versions of pyarrow. Already fixed in
+    # newer versions
+    @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
+    def test_multiindex_with_pyarrow_categorical(self):
+        # GH#53051
+
+        pa = pytest.importorskip("pyarrow")
+
+        # Create dataframe with categorical column
+        df = DataFrame(
+            {"string_column": ["A", "B", "C"], "number_column": [1, 2, 3]}
+        ).astype({"string_column": "category", "number_column": "float32"})
+
+        # Convert dataframe to pyarrow backend
+        df = df.astype(
+            {
+                "string_column": ArrowDtype(pa.dictionary(pa.int32(), pa.string())),
+                "number_column": "float[pyarrow]",
+            }
+        )
+
+        # Check that index can be set
+        df.set_index(["string_column", "number_column"])
+
 
 class TestSorted:
     """everything you wanted to test about sorting"""