Skip to content

Commit 28edd06

Browse files
TomAugspurgerjorisvandenbossche
authored andcommitted
PERF: Faster Series.__getattribute__ (#20834)
1 parent f799916 commit 28edd06

File tree

10 files changed

+58
-1
lines changed

10 files changed

+58
-1
lines changed

asv_bench/benchmarks/series_methods.py

+13
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,16 @@ def setup(self):
121121

122122
def time_dir_strings(self):
123123
dir(self.s)
124+
125+
126+
class SeriesGetattr(object):
127+
# https://github.com/pandas-dev/pandas/issues/19764
128+
goal_time = 0.2
129+
130+
def setup(self):
131+
self.s = Series(1,
132+
index=date_range("2012-01-01", freq='s',
133+
periods=int(1e6)))
134+
135+
def time_series_datetimeindex_repr(self):
136+
getattr(self.s, 'a', None)

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -961,6 +961,7 @@ Performance Improvements
961961
- Improved performance of :func:`pandas.core.groupby.GroupBy.any` and :func:`pandas.core.groupby.GroupBy.all` (:issue:`15435`)
962962
- Improved performance of :func:`pandas.core.groupby.GroupBy.pct_change` (:issue:`19165`)
963963
- Improved performance of :func:`Series.isin` in the case of categorical dtypes (:issue:`20003`)
964+
- Improved performance of ``getattr(Series, attr)`` when the Series has certain index types. This manifiested in slow printing of large Series with a ``DatetimeIndex`` (:issue:`19764`)
964965
- Fixed a performance regression for :func:`GroupBy.nth` and :func:`GroupBy.last` with some object columns (:issue:`19283`)
965966

966967
.. _whatsnew_0230.docs:

pandas/core/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4375,7 +4375,7 @@ def __getattr__(self, name):
43754375
name in self._accessors):
43764376
return object.__getattribute__(self, name)
43774377
else:
4378-
if name in self._info_axis:
4378+
if self._info_axis._can_hold_identifiers_and_holds_name(name):
43794379
return self[name]
43804380
return object.__getattribute__(self, name)
43814381

pandas/core/indexes/base.py

+13
Original file line numberDiff line numberDiff line change
@@ -2083,6 +2083,19 @@ def __getitem__(self, key):
20832083
else:
20842084
return result
20852085

2086+
def _can_hold_identifiers_and_holds_name(self, name):
2087+
"""
2088+
Faster check for ``name in self`` when we know `name` is a Python
2089+
identifier (e.g. in NDFrame.__getattr__, which hits this to support
2090+
. key lookup). For indexes that can't hold identifiers (everything
2091+
but object & categorical) we just return False.
2092+
2093+
https://github.com/pandas-dev/pandas/issues/19764
2094+
"""
2095+
if self.is_object() or self.is_categorical():
2096+
return name in self
2097+
return False
2098+
20862099
def append(self, other):
20872100
"""
20882101
Append a collection of Index options together

pandas/tests/indexes/datetimelike.py

+5
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88

99
class DatetimeLike(Base):
1010

11+
def test_can_hold_identifiers(self):
12+
idx = self.create_index()
13+
key = idx[0]
14+
assert idx._can_hold_identifiers_and_holds_name(key) is False
15+
1116
def test_shift_identity(self):
1217

1318
idx = self.create_index()

pandas/tests/indexes/test_base.py

+5
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@ def generate_index_types(self, skip_index_keys=[]):
6666
if key not in skip_index_keys:
6767
yield key, index
6868

69+
def test_can_hold_identifiers(self):
70+
idx = self.create_index()
71+
key = idx[0]
72+
assert idx._can_hold_identifiers_and_holds_name(key) is True
73+
6974
def test_new_axis(self):
7075
new_index = self.dateIndex[None, :]
7176
assert new_index.ndim == 2

pandas/tests/indexes/test_category.py

+5
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ def create_index(self, categories=None, ordered=False):
3333
return CategoricalIndex(
3434
list('aabbca'), categories=categories, ordered=ordered)
3535

36+
def test_can_hold_identifiers(self):
37+
idx = self.create_index(categories=list('abcd'))
38+
key = idx[0]
39+
assert idx._can_hold_identifiers_and_holds_name(key) is True
40+
3641
def test_construction(self):
3742

3843
ci = self.create_index(categories=list('abcd'))

pandas/tests/indexes/test_multi.py

+5
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ def setup_method(self, method):
4848
def create_index(self):
4949
return self.index
5050

51+
def test_can_hold_identifiers(self):
52+
idx = self.create_index()
53+
key = idx[0]
54+
assert idx._can_hold_identifiers_and_holds_name(key) is True
55+
5156
def test_boolean_context_compat2(self):
5257

5358
# boolean context compat

pandas/tests/indexes/test_numeric.py

+5
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@ def test_index_rdiv_timedelta(self, scalar_td, index):
6464

6565
class Numeric(Base):
6666

67+
def test_can_hold_identifiers(self):
68+
idx = self.create_index()
69+
key = idx[0]
70+
assert idx._can_hold_identifiers_and_holds_name(key) is False
71+
6772
def test_numeric_compat(self):
6873
pass # override Base method
6974

pandas/tests/indexes/test_range.py

+5
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ def check_binop(self, ops, scalars, idxs):
4444
expected = op(Int64Index(idx), scalar)
4545
tm.assert_index_equal(result, expected)
4646

47+
def test_can_hold_identifiers(self):
48+
idx = self.create_index()
49+
key = idx[0]
50+
assert idx._can_hold_identifiers_and_holds_name(key) is False
51+
4752
def test_binops(self):
4853
ops = [operator.add, operator.sub, operator.mul, operator.floordiv,
4954
operator.truediv]

0 commit comments

Comments
 (0)