Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP/API: rename MultiIndex.labels to MultiIndex.codes #23663

Closed
wants to merge 3 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
@@ -1807,7 +1807,7 @@ def to_panel(self):
selfsorted = self

major_axis, minor_axis = selfsorted.index.levels
major_labels, minor_labels = selfsorted.index.labels
major_codes, minor_codes = selfsorted.index.codes
shape = len(major_axis), len(minor_axis)

# preserve names, if any
@@ -1822,8 +1822,8 @@ def to_panel(self):

# create new manager
new_mgr = selfsorted._data.reshape_nd(axes=new_axes,
labels=[major_labels,
minor_labels],
labels=[major_codes,
minor_codes],
shape=shape,
ref_items=selfsorted.columns)

@@ -4259,7 +4259,7 @@ def _maybe_casted_values(index, labels=None):
if isinstance(self.index, MultiIndex):
names = [n if n is not None else ('level_%d' % i)
for (i, n) in enumerate(self.index.names)]
to_insert = lzip(self.index.levels, self.index.labels)
to_insert = lzip(self.index.levels, self.index.codes)
else:
default = 'index' if 'index' not in self else 'level_0'
names = ([default] if self.index.name is None
@@ -7167,8 +7167,9 @@ def _count_level(self, level, axis=0, numeric_only=False):
level = count_axis._get_level_number(level)

level_index = count_axis.levels[level]
labels = ensure_int64(count_axis.labels[level])
counts = lib.count_level_2d(mask, labels, len(level_index), axis=0)
level_codes = ensure_int64(count_axis.codes[level])
counts = lib.count_level_2d(mask, level_codes, len(level_index),
axis=0)

result = DataFrame(counts, index=level_index, columns=agg_axis)

2 changes: 1 addition & 1 deletion pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
@@ -1111,7 +1111,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
lab = cut(Series(val), bins, include_lowest=True)
lev = lab.cat.categories
lab = lev.take(lab.cat.codes)
llab = lambda lab, inc: lab[inc]._multiindex.labels[-1]
llab = lambda lab, inc: lab[inc]._multiindex.codes[-1]

if is_interval_dtype(lab):
# TODO: should we do this inside II?
34 changes: 17 additions & 17 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
@@ -3258,27 +3258,27 @@ def droplevel(self, level=0):
# The two checks above guarantee that here self is a MultiIndex

new_levels = list(self.levels)
new_labels = list(self.labels)
new_codes = list(self.codes)
new_names = list(self.names)

for i in levnums:
new_levels.pop(i)
new_labels.pop(i)
new_codes.pop(i)
new_names.pop(i)

if len(new_levels) == 1:

# set nan if needed
mask = new_labels[0] == -1
result = new_levels[0].take(new_labels[0])
mask = new_codes[0] == -1
result = new_levels[0].take(new_codes[0])
if mask.any():
result = result.putmask(mask, np.nan)

result.name = new_names[0]
return result
else:
from .multi import MultiIndex
return MultiIndex(levels=new_levels, labels=new_labels,
return MultiIndex(levels=new_levels, labels=new_codes,
names=new_names, verify_integrity=False)

_index_shared_docs['get_indexer'] = """
@@ -4054,19 +4054,19 @@ def _get_leaf_sorter(labels):
left_indexer = None
join_index = left
else: # sort the leaves
left_indexer = _get_leaf_sorter(left.labels[:level + 1])
left_indexer = _get_leaf_sorter(left.codes[:level + 1])
join_index = left[left_indexer]

else:
left_lev_indexer = ensure_int64(left_lev_indexer)
rev_indexer = lib.get_reverse_indexer(left_lev_indexer,
len(old_level))

new_lev_labels = algos.take_nd(rev_indexer, left.labels[level],
new_lev_labels = algos.take_nd(rev_indexer, left.codes[level],
allow_fill=False)

new_labels = list(left.labels)
new_labels[level] = new_lev_labels
new_codes = list(left.codes)
new_codes[level] = new_lev_labels

new_levels = list(left.levels)
new_levels[level] = new_level
@@ -4075,7 +4075,7 @@ def _get_leaf_sorter(labels):
left_indexer = np.arange(len(left), dtype=np.intp)
mask = new_lev_labels != -1
if not mask.all():
new_labels = [lab[mask] for lab in new_labels]
new_codes = [lab[mask] for lab in new_codes]
left_indexer = left_indexer[mask]

else: # tie out the order with other
@@ -4086,31 +4086,31 @@ def _get_leaf_sorter(labels):

# missing values are placed first; drop them!
left_indexer = left_indexer[counts[0]:]
new_labels = [lab[left_indexer] for lab in new_labels]
new_codes = [lab[left_indexer] for lab in new_codes]

else: # sort the leaves
mask = new_lev_labels != -1
mask_all = mask.all()
if not mask_all:
new_labels = [lab[mask] for lab in new_labels]
new_codes = [lab[mask] for lab in new_codes]

left_indexer = _get_leaf_sorter(new_labels[:level + 1])
new_labels = [lab[left_indexer] for lab in new_labels]
left_indexer = _get_leaf_sorter(new_codes[:level + 1])
new_codes = [lab[left_indexer] for lab in new_codes]

# left_indexers are w.r.t masked frame.
# reverse to original frame!
if not mask_all:
left_indexer = mask.nonzero()[0][left_indexer]

join_index = MultiIndex(levels=new_levels, labels=new_labels,
join_index = MultiIndex(levels=new_levels, labels=new_codes,
names=left.names, verify_integrity=False)

if right_lev_indexer is not None:
right_indexer = algos.take_nd(right_lev_indexer,
join_index.labels[level],
join_index.codes[level],
allow_fill=False)
else:
right_indexer = join_index.labels[level]
right_indexer = join_index.codes[level]

if flip_order:
left_indexer, right_indexer = right_indexer, left_indexer
Loading