Skip to content

Commit bbd6526

Browse files
authored
ENH: pandas.api.interchange.from_dataframe now uses the Arrow PyCapsule Interface if available, only falling back to the Dataframe Interchange Protocol if that fails (#60739)
* add test for list dtype * catch arrowinvalid and keep raising runtimeerror * use rst hyperlink
1 parent 27baf48 commit bbd6526

File tree

3 files changed

+27
-4
lines changed

3 files changed

+27
-4
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ Other enhancements
3030
^^^^^^^^^^^^^^^^^^
3131
- :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`)
3232
- :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`)
33+
- :meth:`pandas.api.interchange.from_dataframe` now uses the `PyCapsule Interface <https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html>`_ if available, only falling back to the Dataframe Interchange Protocol if that fails (:issue:`60739`)
3334
- :class:`pandas.api.typing.NoDefault` is available for typing ``no_default``
3435
- :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
3536
- :func:`pandas.merge` now validates the ``how`` parameter input (merge type) (:issue:`59435`)

pandas/core/interchange/from_dataframe.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ def from_dataframe(df, allow_copy: bool = True) -> pd.DataFrame:
4141
.. note::
4242
4343
For new development, we highly recommend using the Arrow C Data Interface
44-
alongside the Arrow PyCapsule Interface instead of the interchange protocol
44+
alongside the Arrow PyCapsule Interface instead of the interchange protocol.
45+
From pandas 3.0 onwards, `from_dataframe` uses the PyCapsule Interface,
46+
only falling back to the interchange protocol if that fails.
4547
4648
.. warning::
4749
@@ -90,6 +92,18 @@ def from_dataframe(df, allow_copy: bool = True) -> pd.DataFrame:
9092
if isinstance(df, pd.DataFrame):
9193
return df
9294

95+
if hasattr(df, "__arrow_c_stream__"):
96+
try:
97+
pa = import_optional_dependency("pyarrow", min_version="14.0.0")
98+
except ImportError:
99+
# fallback to _from_dataframe
100+
pass
101+
else:
102+
try:
103+
return pa.table(df).to_pandas(zero_copy_only=not allow_copy)
104+
except pa.ArrowInvalid as e:
105+
raise RuntimeError(e) from e
106+
93107
if not hasattr(df, "__dataframe__"):
94108
raise ValueError("`df` does not support __dataframe__")
95109

pandas/tests/interchange/test_impl.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ def test_empty_pyarrow(data):
278278
expected = pd.DataFrame(data)
279279
arrow_df = pa_from_dataframe(expected)
280280
result = from_dataframe(arrow_df)
281-
tm.assert_frame_equal(result, expected)
281+
tm.assert_frame_equal(result, expected, check_column_type=False)
282282

283283

284284
def test_multi_chunk_pyarrow() -> None:
@@ -288,8 +288,7 @@ def test_multi_chunk_pyarrow() -> None:
288288
table = pa.table([n_legs], names=names)
289289
with pytest.raises(
290290
RuntimeError,
291-
match="To join chunks a copy is required which is "
292-
"forbidden by allow_copy=False",
291+
match="Cannot do zero copy conversion into multi-column DataFrame block",
293292
):
294293
pd.api.interchange.from_dataframe(table, allow_copy=False)
295294

@@ -641,3 +640,12 @@ def test_buffer_dtype_categorical(
641640
col = dfi.get_column_by_name("data")
642641
assert col.dtype == expected_dtype
643642
assert col.get_buffers()["data"][1] == expected_buffer_dtype
643+
644+
645+
def test_from_dataframe_list_dtype():
646+
pa = pytest.importorskip("pyarrow", "14.0.0")
647+
data = {"a": [[1, 2], [4, 5, 6]]}
648+
tbl = pa.table(data)
649+
result = from_dataframe(tbl)
650+
expected = pd.DataFrame(data)
651+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)