Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit dd81bdd

Browse files
committedAug 26, 2018
ENH: better MultiIndex.__repr__
1 parent 9f6c02d commit dd81bdd

File tree

5 files changed

+272
-74
lines changed

5 files changed

+272
-74
lines changed
 

‎doc/source/whatsnew/v0.24.0.txt

+26
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,32 @@ This is the same behavior as ``Series.values`` for categorical data. See
159159
:ref:`whatsnew_0240.api_breaking.interval_values` for more.
160160

161161

162+
.. _whatsnew_0240.enhancements.multi_index_repr:
163+
164+
Better repr for MultiIndex
165+
^^^^^^^^^^^^^^^^^^^^^^^^^^
166+
167+
Previously, outputting a :class:`MultiIndex` printed the levels/labels of the
168+
multiindex. This was visually unappealing and made it difficult to understand
169+
the structure of the MultiIndex. Also, this could be a problem for large
170+
indices as the output could be slow to print and make the console output
171+
difficult to navigate.
172+
173+
Outputting of ``MultiIndex`` instances now outputs tuples of each row and ensures
174+
that the tuple items are vertically aligned, so it's now much much easier to
175+
understand the structure of the ``MultiIndex``. (:issue:`13480`):
176+
177+
.. ipython:: python
178+
179+
index1=range(1000)
180+
index2 = pd.Index(['a'] * 500 + ['abc'] * 500)
181+
pd.MultiIndex.from_arrays([index1, index2])
182+
183+
For number of rows smaller than :attr:`options.display.max_seq_items`, all
184+
values will be shown (default 100). Horizontally, the output will
185+
truncate, if it's longer than :attr:`options.display.width`.
186+
187+
162188
.. _whatsnew_0240.enhancements.other:
163189

164190
Other Enhancements

‎pandas/core/indexes/multi.py

+46-14
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import numpy as np
88
from pandas._libs import algos as libalgos, index as libindex, lib, Timestamp
99

10-
from pandas.compat import range, zip, lrange, lzip, map
10+
from pandas.compat import range, zip, lrange, lzip, map, u
1111
from pandas.compat.numpy import function as nv
1212
from pandas import compat
1313

@@ -31,7 +31,8 @@
3131
import pandas.core.common as com
3232
import pandas.core.missing as missing
3333
import pandas.core.algorithms as algos
34-
from pandas.io.formats.printing import pprint_thing
34+
from pandas.io.formats.printing import (format_object_summary,
35+
default_pprint, pprint_thing)
3536

3637
from pandas.core.config import get_option
3738

@@ -607,27 +608,58 @@ def _nbytes(self, deep=False):
607608
result += self._engine.sizeof(deep=deep)
608609
return result
609610

611+
def _formatter_func(self, tup):
612+
"""
613+
Formats each item in tup according to its level's formatter function.
614+
"""
615+
formatter_funcs = [level._formatter_func for level in self.levels]
616+
return tuple(func(val) for func, val in zip(formatter_funcs, tup))
617+
610618
def _format_attrs(self):
611619
"""
612620
Return a list of tuples of the (attr,formatted_value)
613621
"""
614-
attrs = [
615-
('levels', ibase.default_pprint(self._levels,
616-
max_seq_items=False)),
617-
('labels', ibase.default_pprint(self._labels,
618-
max_seq_items=False))]
619-
if com._any_not_none(*self.names):
620-
attrs.append(('names', ibase.default_pprint(self.names)))
621-
if self.sortorder is not None:
622-
attrs.append(('sortorder', ibase.default_pprint(self.sortorder)))
622+
attrs = []
623+
attrs.append(('dtype', "'{}'".format(self.dtype)))
624+
if self.names is not None and any(self.names):
625+
attrs.append(('names', default_pprint(self.names)))
626+
max_seq_items = get_option('display.max_seq_items') or len(self)
627+
if len(self) > max_seq_items:
628+
attrs.append(('length', len(self)))
623629
return attrs
624630

625631
def _format_space(self):
626-
return "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
632+
return " "
627633

628634
def _format_data(self, name=None):
629-
# we are formatting thru the attributes
630-
return None
635+
"""
636+
Return the formatted data as a unicode string
637+
"""
638+
return format_object_summary(self, self._formatter_func,
639+
name=name, is_multi=True)
640+
641+
def __unicode__(self):
642+
"""
643+
Return a string representation for this MultiIndex.
644+
645+
Invoked by unicode(df) in py2 only. Yields a Unicode String in both
646+
py2/py3.
647+
"""
648+
klass = self.__class__.__name__
649+
data = self._format_data()
650+
attrs = self._format_attrs()
651+
space = self._format_space()
652+
653+
prepr = (u(",%s") %
654+
space).join(u("%s=%s") % (k, v) for k, v in attrs)
655+
656+
# no data provided, just attributes
657+
if data is None:
658+
data = ''
659+
660+
res = u("%s(%s%s)") % (klass, data, prepr)
661+
662+
return res
631663

632664
def __len__(self):
633665
return len(self.labels[0])

‎pandas/io/formats/printing.py

+65-15
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,8 @@ class TableSchemaFormatter(BaseFormatter):
268268
max_seq_items=max_seq_items)
269269

270270

271-
def format_object_summary(obj, formatter, is_justify=True, name=None):
271+
def format_object_summary(obj, formatter, is_justify=True,
272+
name=None, is_multi=False):
272273
"""
273274
Return the formatted obj as a unicode string
274275
@@ -280,8 +281,10 @@ def format_object_summary(obj, formatter, is_justify=True, name=None):
280281
string formatter for an element
281282
is_justify : boolean
282283
should justify the display
283-
name : name, optiona
284+
name : name, optional
284285
defaults to the class name of the obj
286+
is_multi : bool, default False
287+
Is ``obj`` a :class:`MultiIndex` or not
285288
286289
Returns
287290
-------
@@ -301,7 +304,7 @@ def format_object_summary(obj, formatter, is_justify=True, name=None):
301304
space2 = "\n%s" % (' ' * (len(name) + 2))
302305

303306
n = len(obj)
304-
sep = ','
307+
sep = ',' if not is_multi else (',\n ' + ' ' * len(name))
305308
max_seq_items = get_option('display.max_seq_items') or n
306309

307310
# are we a truncated display
@@ -327,10 +330,10 @@ def best_len(values):
327330

328331
if n == 0:
329332
summary = '[], '
330-
elif n == 1:
333+
elif n == 1 and not is_multi:
331334
first = formatter(obj[0])
332335
summary = '[%s], ' % first
333-
elif n == 2:
336+
elif n == 2 and not is_multi:
334337
first = formatter(obj[0])
335338
last = formatter(obj[-1])
336339
summary = '[%s, %s], ' % (first, last)
@@ -346,15 +349,16 @@ def best_len(values):
346349

347350
# adjust all values to max length if needed
348351
if is_justify:
349-
350-
# however, if we are not truncated and we are only a single
351-
# line, then don't justify
352-
if (is_truncated or
353-
not (len(', '.join(head)) < display_width and
354-
len(', '.join(tail)) < display_width)):
355-
max_len = max(best_len(head), best_len(tail))
356-
head = [x.rjust(max_len) for x in head]
357-
tail = [x.rjust(max_len) for x in tail]
352+
head, tail = _justify(head, tail, display_width, best_len,
353+
is_truncated, is_multi)
354+
if is_multi:
355+
max_space = display_width - len(space2)
356+
item = tail[0]
357+
for i in reversed(range(1, len(item) + 1)):
358+
if len(_pprint_seq(item, max_seq_items=i)) < max_space:
359+
break
360+
head = [_pprint_seq(x, max_seq_items=i) for x in head]
361+
tail = [_pprint_seq(x, max_seq_items=i) for x in tail]
358362

359363
summary = ""
360364
line = space2
@@ -380,7 +384,7 @@ def best_len(values):
380384
summary += line
381385
summary += '],'
382386

383-
if len(summary) > (display_width):
387+
if len(summary) > (display_width) or is_multi:
384388
summary += space1
385389
else: # one row
386390
summary += ' '
@@ -391,6 +395,52 @@ def best_len(values):
391395
return summary
392396

393397

398+
def _justify(head, tail, display_width, best_len,
399+
is_truncated=False, is_multi=False):
400+
"""
401+
Justify each item in head and tail, so they align properly.
402+
"""
403+
if is_multi:
404+
max_length = _max_level_item_length(head + tail)
405+
head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
406+
for seq in head]
407+
tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
408+
for seq in tail]
409+
elif (is_truncated or not (len(', '.join(head)) < display_width and
410+
len(', '.join(tail)) < display_width)):
411+
max_length = max(best_len(head), best_len(tail))
412+
head = [x.rjust(max_length) for x in head]
413+
tail = [x.rjust(max_length) for x in tail]
414+
415+
return head, tail
416+
417+
418+
def _max_level_item_length(seq):
419+
"""
420+
For each position for the sequences in ``seq``, find the largest length.
421+
422+
Used for justifying individual values in a :class:`pandas.MultiIndex`.
423+
424+
Parameters
425+
----------
426+
seq : list-like of list-likes of strings
427+
428+
Returns
429+
-------
430+
max_length : list of ints
431+
432+
Examples
433+
--------
434+
>>> _max_level_item_length([['s', 'ab'], ['abc', 'a']])
435+
[3, 2]
436+
"""
437+
max_length = [0] * len(seq[0])
438+
for inner_seq in seq:
439+
length = [len(item) for item in inner_seq]
440+
max_length = [max(x, y) for x, y in zip(max_length, length)]
441+
return max_length
442+
443+
394444
def format_object_attrs(obj):
395445
"""
396446
Return a list of tuples of the (attr, formatted_value)

0 commit comments

Comments
 (0)
Please sign in to comment.