Skip to content

Commit 1807702

Browse files
committed
changed according to comments
1 parent f92bb0d commit 1807702

File tree

5 files changed

+105
-77
lines changed

5 files changed

+105
-77
lines changed

doc/source/whatsnew/v0.24.0.txt

+5-7
Original file line numberDiff line numberDiff line change
@@ -172,18 +172,16 @@ difficult to navigate.
172172

173173
Outputting of ``MultiIndex`` instances now outputs tuples of each row and ensures
174174
that the tuple items are vertically aligned, so it's now much much easier to
175-
understand the structure of the ``MultiIndex``. (:issue:`13480`):
175+
understand the structure of the ``MultiIndex``. Also, the outputs gets
176+
truncated if it's large. (:issue:`13480`):
176177

177178
.. ipython:: python
178179

179-
index1=range(1000)
180-
index2 = pd.Index(['a'] * 500 + ['abc'] * 500)
181-
pd.MultiIndex.from_arrays([index1, index2])
180+
pd.MultiIndex.from_product([['a', 'abc'], range(500)])
182181

183-
For number of rows smaller than :attr:`options.display.max_seq_items`, all
184-
values will be shown (default: 100 items). Horizontally, the output will
182+
If the number of rows is smaller than :attr:`options.display.max_seq_items`,
183+
all values will be shown (default: 100 items). Horizontally, the output will
185184
truncate, if it's longer than :attr:`options.display.width` (default: 80 characters).
186-
This solves the problem with outputting large MultiIndex instances to the console.
187185

188186

189187
.. _whatsnew_0240.enhancements.other:

pandas/core/indexes/multi.py

+3-27
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import numpy as np
88
from pandas._libs import algos as libalgos, index as libindex, lib, Timestamp
99

10-
from pandas.compat import range, zip, lrange, lzip, map, u
10+
from pandas.compat import range, zip, lrange, lzip, map
1111
from pandas.compat.numpy import function as nv
1212
from pandas import compat
1313

@@ -619,8 +619,7 @@ def _format_attrs(self):
619619
"""
620620
Return a list of tuples of the (attr,formatted_value)
621621
"""
622-
attrs = []
623-
attrs.append(('dtype', "'{}'".format(self.dtype)))
622+
attrs = [('dtype', "'{}'".format(self.dtype))]
624623
if self.names is not None and any(self.names):
625624
attrs.append(('names', default_pprint(self.names)))
626625
max_seq_items = get_option('display.max_seq_items') or len(self)
@@ -636,30 +635,7 @@ def _format_data(self, name=None):
636635
Return the formatted data as a unicode string
637636
"""
638637
return format_object_summary(self, self._formatter_func,
639-
name=name, is_multi=True)
640-
641-
def __unicode__(self):
642-
"""
643-
Return a string representation for this MultiIndex.
644-
645-
Invoked by unicode(df) in py2 only. Yields a Unicode String in both
646-
py2/py3.
647-
"""
648-
klass = self.__class__.__name__
649-
data = self._format_data()
650-
attrs = self._format_attrs()
651-
space = self._format_space()
652-
653-
prepr = (u(",%s") %
654-
space).join(u("%s=%s") % (k, v) for k, v in attrs)
655-
656-
# no data provided, just attributes
657-
if data is None:
658-
data = ''
659-
660-
res = u("%s(%s%s)") % (klass, data, prepr)
661-
662-
return res
638+
name=name, line_break_each_value=True)
663639

664640
def __len__(self):
665641
return len(self.labels[0])

pandas/io/formats/printing.py

+53-24
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ class TableSchemaFormatter(BaseFormatter):
269269

270270

271271
def format_object_summary(obj, formatter, is_justify=True,
272-
name=None, is_multi=False):
272+
name=None, line_break_each_value=False):
273273
"""
274274
Return the formatted obj as a unicode string
275275
@@ -283,8 +283,10 @@ def format_object_summary(obj, formatter, is_justify=True,
283283
should justify the display
284284
name : name, optional
285285
defaults to the class name of the obj
286-
is_multi : bool, default False
287-
Is ``obj`` a :class:`MultiIndex` or not
286+
line_break_each_value : bool, default False
287+
If True, inserts a line break for each value of ``obj``.
288+
If False, only break lines when the a line of values gets wider
289+
than the display width
288290
289291
Returns
290292
-------
@@ -304,7 +306,11 @@ def format_object_summary(obj, formatter, is_justify=True,
304306
space2 = "\n%s" % (' ' * (len(name) + 2))
305307

306308
n = len(obj)
307-
sep = ',' if not is_multi else (',\n ' + ' ' * len(name))
309+
if not line_break_each_value:
310+
sep = ','
311+
else:
312+
# If we want to align on each value, we need a different separator.
313+
sep = (',\n ' + ' ' * len(name))
308314
max_seq_items = get_option('display.max_seq_items') or n
309315

310316
# are we a truncated display
@@ -330,10 +336,10 @@ def best_len(values):
330336

331337
if n == 0:
332338
summary = '[], '
333-
elif n == 1 and not is_multi:
339+
elif n == 1 and not line_break_each_value:
334340
first = formatter(obj[0])
335341
summary = '[%s], ' % first
336-
elif n == 2 and not is_multi:
342+
elif n == 2 and not line_break_each_value:
337343
first = formatter(obj[0])
338344
last = formatter(obj[-1])
339345
summary = '[%s, %s], ' % (first, last)
@@ -349,9 +355,15 @@ def best_len(values):
349355

350356
# adjust all values to max length if needed
351357
if is_justify:
352-
head, tail = _justify(head, tail, display_width, best_len,
353-
is_truncated, is_multi)
354-
if is_multi:
358+
if line_break_each_value:
359+
head, tail = _justify(head, tail)
360+
elif (is_truncated or not (len(', '.join(head)) < display_width and
361+
len(', '.join(tail)) < display_width)):
362+
max_length = max(best_len(head), best_len(tail))
363+
head = [x.rjust(max_length) for x in head]
364+
tail = [x.rjust(max_length) for x in tail]
365+
366+
if line_break_each_value:
355367
max_space = display_width - len(space2)
356368
item = tail[0]
357369
for i in reversed(range(1, len(item) + 1)):
@@ -384,7 +396,7 @@ def best_len(values):
384396
summary += line
385397
summary += '],'
386398

387-
if len(summary) > (display_width) or is_multi:
399+
if len(summary) > (display_width) or line_break_each_value:
388400
summary += space1
389401
else: # one row
390402
summary += ' '
@@ -395,23 +407,40 @@ def best_len(values):
395407
return summary
396408

397409

398-
def _justify(head, tail, display_width, best_len,
399-
is_truncated=False, is_multi=False):
410+
def _justify(head, tail):
400411
"""
401-
Justify each item in head and tail, so they align properly.
412+
Justify each item in each list-like in head and tail, so each item
413+
right-aligns when the two list-likes are stacked vertically.
414+
415+
Parameters
416+
----------
417+
head : list-like of list-likes of strings
418+
tail : list-like of list-likes of strings
419+
420+
Returns
421+
-------
422+
head : list of tuples of strings
423+
tail : list of tuples of strings
424+
425+
Examples
426+
--------
427+
>>> _justify([['a', 'b']], [['abc', 'abcd']])
428+
([(' a', ' b')], [('abc', 'abcd')])
402429
"""
403-
if is_multi:
404-
max_length = _max_level_item_length(head + tail)
405-
head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
406-
for seq in head]
407-
tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
408-
for seq in tail]
409-
elif (is_truncated or not (len(', '.join(head)) < display_width and
410-
len(', '.join(tail)) < display_width)):
411-
max_length = max(best_len(head), best_len(tail))
412-
head = [x.rjust(max_length) for x in head]
413-
tail = [x.rjust(max_length) for x in tail]
430+
combined = head + tail # type: List[str]
431+
432+
# For each position for the sequences in ``combined``,
433+
# find the length of the largest string.
434+
max_length = [0] * len(combined[0]) # type: List[int]
435+
for inner_seq in combined:
436+
length = [len(item) for item in inner_seq]
437+
max_length = [max(x, y) for x, y in zip(max_length, length)]
414438

439+
# justify each item in each list-like in head and tail using max_length
440+
head = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
441+
for seq in head]
442+
tail = [tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length))
443+
for seq in tail]
415444
return head, tail
416445

417446

pandas/tests/indexes/multi/conftest.py

+27
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
import numpy as np
44
import pytest
5+
6+
import pandas as pd
57
from pandas import Index, MultiIndex
68

79

@@ -53,3 +55,28 @@ def holder():
5355
def compat_props():
5456
# a MultiIndex must have these properties associated with it
5557
return ['shape', 'ndim', 'size']
58+
59+
60+
@pytest.fixture
61+
def narrow_multi_index():
62+
"""
63+
Return a MultiIndex that is less wide than the display (<80 characters).
64+
"""
65+
n = 1000
66+
ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n))
67+
dti = pd.date_range('2000-01-01', freq='s', periods=n * 2)
68+
return pd.MultiIndex.from_arrays([ci, ci.codes + 9, dti],
69+
names=['a', 'b', 'dti'])
70+
71+
72+
@pytest.fixture
73+
def wide_multi_index():
74+
"""
75+
Return a MultiIndex that is wider than the display (>80 characters).
76+
"""
77+
n = 1000
78+
ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n))
79+
dti = pd.date_range('2000-01-01', freq='s', periods=n * 2)
80+
levels = [ci, ci.codes + 9, dti, dti, dti]
81+
names = ['a', 'b', 'dti_1', 'dti_2', 'dti_3']
82+
return pd.MultiIndex.from_arrays(levels, names=names)

pandas/tests/indexes/multi/test_format.py

+17-19
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,13 @@ def test_repr_with_unicode_data():
5959
assert "\\u" not in repr(index) # we don't want unicode-escaped
6060

6161

62+
def test_repr_roundtrip_raises():
63+
mi = MultiIndex.from_product([list('ab'), range(3)],
64+
names=['first', 'second'])
65+
with pytest.raises(TypeError):
66+
eval(repr(mi))
67+
68+
6269
def test_unicode_string_with_unicode():
6370
d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
6471
idx = pd.DataFrame(d).set_index(["a", "b"]).index
@@ -90,17 +97,6 @@ def test_repr_max_seq_item_setting(idx):
9097
@pytest.mark.skipif(PY2, reason="repr output is different for python2")
9198
class TestRepr(object):
9299

93-
def setup_class(self):
94-
n = 1000
95-
ci = pd.CategoricalIndex(list('a' * n) + (['abc'] * n))
96-
dti = pd.date_range('2000-01-01', freq='s', periods=n * 2)
97-
self.narrow_mi = pd.MultiIndex.from_arrays([ci, ci.codes + 9, dti],
98-
names=['a', 'b', 'dti'])
99-
100-
levels = [ci, ci.codes + 9, dti, dti, dti]
101-
names = ['a', 'b', 'dti_1', 'dti_2', 'dti_3']
102-
self.wide_mi = pd.MultiIndex.from_arrays(levels, names=names)
103-
104100
def test_repr(self, idx):
105101
result = idx[:1].__repr__()
106102
expected = """MultiIndex([('foo', 'one')],
@@ -127,14 +123,15 @@ def test_repr(self, idx):
127123
dtype='object', names=['first', 'second'], length=6)"""
128124
assert result == expected
129125

130-
def test_rjust(self):
131-
result = self.narrow_mi[:1].__repr__()
126+
def test_rjust(self, narrow_multi_index):
127+
mi = narrow_multi_index
128+
result = mi[:1].__repr__()
132129
expected = """\
133130
MultiIndex([('a', 9, '2000-01-01 00:00:00')],
134131
dtype='object', names=['a', 'b', 'dti'])"""
135132
assert result == expected
136133

137-
result = self.narrow_mi[::500].__repr__()
134+
result = mi[::500].__repr__()
138135
expected = """\
139136
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
140137
( 'a', 9, '2000-01-01 00:08:20'),
@@ -143,7 +140,7 @@ def test_rjust(self):
143140
dtype='object', names=['a', 'b', 'dti'])"""
144141
assert result == expected
145142

146-
result = self.narrow_mi.__repr__()
143+
result = mi.__repr__()
147144
expected = """\
148145
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
149146
( 'a', 9, '2000-01-01 00:00:01'),
@@ -169,13 +166,14 @@ def test_rjust(self):
169166
dtype='object', names=['a', 'b', 'dti'], length=2000)"""
170167
assert result == expected
171168

172-
def test_tuple_width(self):
173-
result = self.wide_mi[:1].__repr__()
169+
def test_tuple_width(self, wide_multi_index):
170+
mi = wide_multi_index
171+
result = mi[:1].__repr__()
174172
expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)],
175173
dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
176174
assert result == expected
177175

178-
result = self.wide_mi[:10].__repr__()
176+
result = mi[:10].__repr__()
179177
expected = """\
180178
MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
181179
('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
@@ -190,7 +188,7 @@ def test_tuple_width(self):
190188
dtype='object', names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
191189
assert result == expected
192190

193-
result = self.wide_mi.__repr__()
191+
result = mi.__repr__()
194192
expected = """\
195193
MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
196194
( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),

0 commit comments

Comments
 (0)