Skip to content

Commit f3f10bc

Browse files
committed
PERF: building MultiIndex with categorical levels
1 parent 9a67ff4 commit f3f10bc

File tree

3 files changed

+21
-4
lines changed

3 files changed

+21
-4
lines changed

asv_bench/benchmarks/multiindex_object.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import numpy as np
44
import pandas.util.testing as tm
5-
from pandas import date_range, MultiIndex
5+
from pandas import date_range, MultiIndex, DataFrame
66

77

88
class GetLoc:
@@ -126,4 +126,18 @@ def time_datetime_level_values_sliced(self, mi):
126126
mi[:10].values
127127

128128

129+
class CategoricalLevel:
130+
131+
def setup(self):
132+
133+
self.df = DataFrame({
134+
'a': np.arange(1_000_000, dtype=np.int32),
135+
'b': np.arange(1_000_000, dtype=np.int64),
136+
'c': np.arange(1_000_000, dtype=float),
137+
}).astype({'a': 'category', 'b': 'category'})
138+
139+
def time_categorical_level(self):
140+
self.df.set_index(['a', 'b'])
141+
142+
129143
from .pandas_vb_common import setup # noqa: F401

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,7 @@ Performance Improvements
514514
- Improved performance of :meth:`read_csv` by faster concatenating date columns without extra conversion to string for integer/float zero and float ``NaN``; by faster checking the string for the possibility of being a date (:issue:`25754`)
515515
- Improved performance of :attr:`IntervalIndex.is_unique` by removing conversion to ``MultiIndex`` (:issue:`24813`)
516516
- Restored performance of :meth:`DatetimeIndex.__iter__` by re-enabling specialized code path (:issue:`26702`)
517+
- Improved performance when building :class:`MultiIndex` with at least one :class:`CategoricalIndex` level (:issue:`22044`)
517518

518519
.. _whatsnew_0250.bug_fixes:
519520

pandas/core/arrays/categorical.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -2666,9 +2666,11 @@ def _factorize_from_iterable(values):
26662666
raise TypeError("Input must be list-like")
26672667

26682668
if is_categorical(values):
2669-
if isinstance(values, (ABCCategoricalIndex, ABCSeries)):
2670-
values = values._values
2671-
categories = CategoricalIndex(values.categories, dtype=values.dtype)
2669+
values = CategoricalIndex(values)
2670+
# The CategoricalIndex level we want to build has the same categories
2671+
# as values but its codes are by def [0, ..., len(n_categories) - 1]
2672+
cat_codes = np.arange(len(values.categories), dtype=values.codes.dtype)
2673+
categories = values._create_from_codes(cat_codes)
26722674
codes = values.codes
26732675
else:
26742676
# The value of ordered is irrelevant since we don't use cat as such,

0 commit comments

Comments
 (0)