pandas-dev · jorisvandenbossche · Apr 21, 2023 · Apr 21, 2023 · jbrockmendel · Apr 21, 2023
@@ -145,6 +145,32 @@ i8max = <int64_t>INT64_MAX
 u8max = <uint64_t>UINT64_MAX
 
 
+cdef bint PYARROW_INSTALLED = False
+
+try:
+    import pyarrow as pa
+
+    PYARROW_INSTALLED = True
+except ImportError:
+    pa = None
+
+
+cpdef is_pyarrow_array(obj):
+    if PYARROW_INSTALLED:
+        return isinstance(obj, (pa.Array, pa.ChunkedArray))
+    return False
+
+
+cpdef is_pyarrow_scalar(obj):
+    if PYARROW_INSTALLED:
+        return isinstance(obj, pa.Scalar)
+    return False
+
+
+def is_pyarrow_installed():
+    return PYARROW_INSTALLED
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def memory_usage_of_objects(arr: object[:]) -> int64_t:
@@ -238,6 +264,7 @@ def is_scalar(val: object) -> bool:
 
     # Note: PyNumber_Check check includes Decimal, Fraction, numbers.Number
     return (PyNumber_Check(val)
+            or is_pyarrow_scalar(val)
             or is_period_object(val)
             or is_interval(val)
             or is_offset_object(val))

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -698,6 +698,12 @@ def is_valid_na_for_dtype(obj, dtype: DtypeObj) -> bool:
     """
     if not lib.is_scalar(obj) or not isna(obj):
         return False
+    elif lib.is_pyarrow_scalar(obj):
+        return (
+            obj.is_null()
+            and hasattr(dtype, "pyarrow_dtype")
+            and dtype.pyarrow_dtype == obj.type
+        )
     elif dtype.kind == "M":
         if isinstance(dtype, np.dtype):
             # i.e. not tzaware

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -15,6 +15,7 @@
 
 from pandas._config import using_copy_on_write
 
+from pandas._libs import lib
 from pandas._libs.indexing import NDFrameIndexerBase
 from pandas._libs.lib import item_from_zerodim
 from pandas.compat import PYPY
@@ -2098,8 +2099,15 @@ def _setitem_with_indexer_missing(self, indexer, value):
                 # We should not cast, if we have object dtype because we can
                 # set timedeltas into object series
                 curr_dtype = self.obj.dtype
-                curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype)
-                new_dtype = maybe_promote(curr_dtype, value)[0]
+                if lib.is_pyarrow_scalar(value) and hasattr(
+                    curr_dtype, "pyarrow_dtype"
+                ):
+                    # TODO promote arrow scalar and type
+                    new_dtype = curr_dtype
+                    value = value.as_py()
+                else:
+                    curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype)
+                    new_dtype = maybe_promote(curr_dtype, value)[0]
             else:
                 new_dtype = None
 

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -2557,6 +2557,29 @@ def test_describe_numeric_data(pa_type):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.parametrize(
+    "value, target_value, dtype",
+    [
+        (pa.scalar(4, type="int32"), 4, "int32[pyarrow]"),
+        (pa.scalar(4, type="int64"), 4, "int32[pyarrow]"),
+        # (pa.scalar(4.5, type="float64"), 4, "int32[pyarrow]"),
+        (4, 4, "int32[pyarrow]"),
+        (pd.NA, None, "int32[pyarrow]"),
+        (None, None, "int32[pyarrow]"),
+        (pa.scalar(None, type="int32"), None, "int32[pyarrow]"),
+        (pa.scalar(None, type="int64"), None, "int32[pyarrow]"),
+    ],
+)
+def test_series_setitem_with_enlargement(value, target_value, dtype):
+    # GH-52235
+    # similar to series/inedexing/test_setitem.py::test_setitem_keep_precision
+    # and test_setitem_enlarge_with_na, but for arrow dtypes
+    ser = pd.Series([1, 2, 3], dtype=dtype)
+    ser[3] = value
+    expected = pd.Series([1, 2, 3, target_value], dtype=dtype)
+    tm.assert_series_equal(ser, expected)
+
+
 @pytest.mark.parametrize(
     "pa_type", tm.DATETIME_PYARROW_DTYPES + tm.TIMEDELTA_PYARROW_DTYPES
 )