Skip to content

Commit 12475e0

Browse files
BUG: preserve object-dtype index when accessing DataFrame column / PERF: improve perf of Series fastpath constructor (#42950)
1 parent 24e360a commit 12475e0

File tree

4 files changed

+38
-19
lines changed

4 files changed

+38
-19
lines changed

asv_bench/benchmarks/series_methods.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import numpy as np
44

55
from pandas import (
6+
Index,
67
NaT,
78
Series,
89
date_range,
@@ -12,20 +13,23 @@
1213

1314

1415
class SeriesConstructor:
15-
16-
params = [None, "dict"]
17-
param_names = ["data"]
18-
19-
def setup(self, data):
16+
def setup(self):
2017
self.idx = date_range(
2118
start=datetime(2015, 10, 26), end=datetime(2016, 1, 1), freq="50s"
2219
)
23-
dict_data = dict(zip(self.idx, range(len(self.idx))))
24-
self.data = None if data is None else dict_data
20+
self.data = dict(zip(self.idx, range(len(self.idx))))
21+
self.array = np.array([1, 2, 3])
22+
self.idx2 = Index(["a", "b", "c"])
2523

26-
def time_constructor(self, data):
24+
def time_constructor_dict(self):
2725
Series(data=self.data, index=self.idx)
2826

27+
def time_constructor_no_data(self):
28+
Series(data=None, index=self.idx)
29+
30+
def time_constructor_fastpath(self):
31+
Series(self.array, index=self.idx2, name="name", fastpath=True)
32+
2933

3034
class ToFrame:
3135
params = [["int64", "datetime64[ns]", "category", "Int64"], [None, "foo"]]

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ Indexing
263263
- Bug when setting a value too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`, :issue:`32878`)
264264
- Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`)
265265
- Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtpye :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`)
266+
- Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`)
266267
-
267268

268269
Missing

pandas/core/series.py

+10-11
Original file line numberDiff line numberDiff line change
@@ -457,8 +457,12 @@ def __init__(
457457
data = SingleArrayManager.from_array(data, index)
458458

459459
NDFrame.__init__(self, data)
460-
self.name = name
461-
self._set_axis(0, index, fastpath=True)
460+
if fastpath:
461+
# skips validation of the name
462+
object.__setattr__(self, "_name", name)
463+
else:
464+
self.name = name
465+
self._set_axis(0, index)
462466

463467
def _init_dict(
464468
self, data, index: Index | None = None, dtype: DtypeObj | None = None
@@ -539,15 +543,14 @@ def _constructor_expanddim(self) -> type[DataFrame]:
539543
def _can_hold_na(self) -> bool:
540544
return self._mgr._can_hold_na
541545

542-
def _set_axis(self, axis: int, labels, fastpath: bool = False) -> None:
546+
def _set_axis(self, axis: int, labels) -> None:
543547
"""
544548
Override generic, we want to set the _typ here.
545549
546550
This is called from the cython code when we set the `index` attribute
547551
directly, e.g. `series.index = [1, 2, 3]`.
548552
"""
549-
if not fastpath:
550-
labels = ensure_index(labels)
553+
labels = ensure_index(labels)
551554

552555
if labels._is_all_dates:
553556
deep_labels = labels
@@ -559,17 +562,13 @@ def _set_axis(self, axis: int, labels, fastpath: bool = False) -> None:
559562
):
560563
try:
561564
labels = DatetimeIndex(labels)
562-
# need to set here because we changed the index
563-
if fastpath:
564-
self._mgr.set_axis(axis, labels)
565565
except (tslibs.OutOfBoundsDatetime, ValueError):
566566
# labels may exceeds datetime bounds,
567567
# or not be a DatetimeIndex
568568
pass
569569

570-
if not fastpath:
571-
# The ensure_index call above ensures we have an Index object
572-
self._mgr.set_axis(axis, labels)
570+
# The ensure_index call above ensures we have an Index object
571+
self._mgr.set_axis(axis, labels)
573572

574573
# ndarray compatibility
575574
@property

pandas/tests/frame/indexing/test_indexing.py

+15
Original file line numberDiff line numberDiff line change
@@ -1526,6 +1526,21 @@ def test_loc_iloc_setitem_non_categorical_rhs(
15261526
with pytest.raises(TypeError, match=msg1):
15271527
indexer(df)[key] = ["c", "c"]
15281528

1529+
@pytest.mark.parametrize("indexer", [tm.getitem, tm.loc, tm.iloc])
1530+
def test_getitem_preserve_object_index_with_dates(self, indexer):
1531+
# https://github.com/pandas-dev/pandas/pull/42950 - when selecting a column
1532+
# from dataframe, don't try to infer object dtype index on Series construction
1533+
idx = date_range("2012", periods=3).astype(object)
1534+
df = DataFrame({0: [1, 2, 3]}, index=idx)
1535+
assert df.index.dtype == object
1536+
1537+
if indexer is tm.getitem:
1538+
ser = indexer(df)[0]
1539+
else:
1540+
ser = indexer(df)[:, 0]
1541+
1542+
assert ser.index.dtype == object
1543+
15291544

15301545
class TestDepreactedIndexers:
15311546
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)