Skip to content

Commit 3fdfa66

Browse files
Backport PR #47143 on branch 1.4.x (REGR: setitem writing into RangeIndex instead of creating a copy) (#47256)
Backport PR #47143: REGR: setitem writing into RangeIndex instead of creating a copy Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
1 parent 34259fb commit 3fdfa66

File tree

4 files changed

+36
-2
lines changed

4 files changed

+36
-2
lines changed

doc/source/whatsnew/v1.4.3.rst

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Fixed regression in :meth:`DataFrame.replace` when the replacement value was explicitly ``None`` when passed in a dictionary to ``to_replace`` also casting other columns to object dtype even when there were no values to replace (:issue:`46634`)
18+
- Fixed regression when setting values with :meth:`DataFrame.loc` updating :class:`RangeIndex` when index was set as new column and column was updated afterwards (:issue:`47128`)
1819
- Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`)
1920
- Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`)
2021
- Fixed regression in :func:`concat` not sorting columns for mixed column names (:issue:`47127`)

pandas/core/construction.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,7 @@ def sanitize_array(
508508
dtype = dtype.numpy_dtype
509509

510510
# extract ndarray or ExtensionArray, ensure we have no PandasArray
511-
data = extract_array(data, extract_numpy=True)
511+
data = extract_array(data, extract_numpy=True, extract_range=True)
512512

513513
if isinstance(data, np.ndarray) and data.ndim == 0:
514514
if dtype is None:
@@ -583,7 +583,7 @@ def sanitize_array(
583583
# materialize e.g. generators, convert e.g. tuples, abc.ValueView
584584
if hasattr(data, "__array__"):
585585
# e.g. dask array GH#38645
586-
data = np.asarray(data)
586+
data = np.array(data, copy=copy)
587587
else:
588588
data = list(data)
589589

pandas/tests/frame/indexing/test_setitem.py

+9
Original file line numberDiff line numberDiff line change
@@ -852,6 +852,15 @@ def test_frame_setitem_newcol_timestamp(self):
852852
data[ts] = np.nan # works, mostly a smoke-test
853853
assert np.isnan(data[ts]).all()
854854

855+
def test_frame_setitem_rangeindex_into_new_col(self):
856+
# GH#47128
857+
df = DataFrame({"a": ["a", "b"]})
858+
df["b"] = df.index
859+
df.loc[[False, True], "b"] = 100
860+
result = df.loc[[1], :]
861+
expected = DataFrame({"a": ["b"], "b": [100]}, index=[1])
862+
tm.assert_frame_equal(result, expected)
863+
855864

856865
class TestDataFrameSetItemSlicing:
857866
def test_setitem_slice_position(self):

pandas/tests/test_downstream.py

+24
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,27 @@ def test_missing_required_dependency():
269269
output = exc.value.stdout.decode()
270270
for name in ["numpy", "pytz", "dateutil"]:
271271
assert name in output
272+
273+
274+
def test_frame_setitem_dask_array_into_new_col():
275+
# GH#47128
276+
277+
# dask sets "compute.use_numexpr" to False, so catch the current value
278+
# and ensure to reset it afterwards to avoid impacting other tests
279+
olduse = pd.get_option("compute.use_numexpr")
280+
281+
try:
282+
dask = import_module("dask") # noqa:F841
283+
284+
import dask.array as da
285+
286+
dda = da.array([1, 2])
287+
df = DataFrame({"a": ["a", "b"]})
288+
df["b"] = dda
289+
df["c"] = dda
290+
df.loc[[False, True], "b"] = 100
291+
result = df.loc[[1], :]
292+
expected = DataFrame({"a": ["b"], "b": [100], "c": [2]}, index=[1])
293+
tm.assert_frame_equal(result, expected)
294+
finally:
295+
pd.set_option("compute.use_numexpr", olduse)

0 commit comments

Comments
 (0)