Skip to content

Commit 09633b8

Browse files
charlesdong1991jreback
authored andcommittedFeb 6, 2019
BUG: DataFrame.merge(suffixes=) does not respect None (#24819)
1 parent 776530c commit 09633b8

File tree

4 files changed

+91
-10
lines changed

4 files changed

+91
-10
lines changed
 

‎doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ Groupby/Resample/Rolling
181181
Reshaping
182182
^^^^^^^^^
183183

184+
- Bug in :func:`pandas.merge` adds a string of ``None`` if ``None`` is assigned in suffixes instead of remain the column name as-is (:issue:`24782`).
184185
- Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`)
185186
- :func:`to_records` now accepts dtypes to its `column_dtypes` parameter (:issue:`24895`)
186187
-

‎pandas/core/internals/managers.py

+19-7
Original file line numberDiff line numberDiff line change
@@ -1971,16 +1971,28 @@ def items_overlap_with_suffix(left, lsuffix, right, rsuffix):
19711971
raise ValueError('columns overlap but no suffix specified: '
19721972
'{rename}'.format(rename=to_rename))
19731973

1974-
def lrenamer(x):
1975-
if x in to_rename:
1976-
return '{x}{lsuffix}'.format(x=x, lsuffix=lsuffix)
1977-
return x
1974+
def renamer(x, suffix):
1975+
"""Rename the left and right indices.
1976+
1977+
If there is overlap, and suffix is not None, add
1978+
suffix, otherwise, leave it as-is.
1979+
1980+
Parameters
1981+
----------
1982+
x : original column name
1983+
suffix : str or None
19781984
1979-
def rrenamer(x):
1980-
if x in to_rename:
1981-
return '{x}{rsuffix}'.format(x=x, rsuffix=rsuffix)
1985+
Returns
1986+
-------
1987+
x : renamed column name
1988+
"""
1989+
if x in to_rename and suffix is not None:
1990+
return '{x}{suffix}'.format(x=x, suffix=suffix)
19821991
return x
19831992

1993+
lrenamer = partial(renamer, suffix=lsuffix)
1994+
rrenamer = partial(renamer, suffix=rsuffix)
1995+
19841996
return (_transform_index(left, lrenamer),
19851997
_transform_index(right, rrenamer))
19861998

‎pandas/core/reshape/merge.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,15 @@ def merge_ordered(left, right, on=None,
159159
left DataFrame
160160
fill_method : {'ffill', None}, default None
161161
Interpolation method for data
162-
suffixes : 2-length sequence (tuple, list, ...)
163-
Suffix to apply to overlapping column names in the left and right
164-
side, respectively
162+
suffixes : Sequence, default is ("_x", "_y")
163+
A length-2 sequence where each element is optionally a string
164+
indicating the suffix to add to overlapping column names in
165+
`left` and `right` respectively. Pass a value of `None` instead
166+
of a string to indicate that the column name from `left` or
167+
`right` should be left as-is, with no suffix. At least one of the
168+
values must not be None.
169+
170+
.. versionchanged:: 0.25.0
165171
how : {'left', 'right', 'outer', 'inner'}, default 'outer'
166172
* left: use only keys from left frame (SQL: left outer join)
167173
* right: use only keys from right frame (SQL: right outer join)

‎pandas/tests/reshape/merge/test_merge.py

+62
Original file line numberDiff line numberDiff line change
@@ -1526,3 +1526,65 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm):
15261526
with pytest.raises(ValueError, match=msg):
15271527
result = pd.merge(a, b, on=on, left_on=left_on, right_on=right_on,
15281528
left_index=left_index, right_index=right_index)
1529+
1530+
1531+
@pytest.mark.parametrize("col1, col2, kwargs, expected_cols", [
1532+
(0, 0, dict(suffixes=("", "_dup")), ["0", "0_dup"]),
1533+
(0, 0, dict(suffixes=(None, "_dup")), [0, "0_dup"]),
1534+
(0, 0, dict(suffixes=("_x", "_y")), ["0_x", "0_y"]),
1535+
("a", 0, dict(suffixes=(None, "_y")), ["a", 0]),
1536+
(0.0, 0.0, dict(suffixes=("_x", None)), ["0.0_x", 0.0]),
1537+
("b", "b", dict(suffixes=(None, "_y")), ["b", "b_y"]),
1538+
("a", "a", dict(suffixes=("_x", None)), ["a_x", "a"]),
1539+
("a", "b", dict(suffixes=("_x", None)), ["a", "b"]),
1540+
("a", "a", dict(suffixes=[None, "_x"]), ["a", "a_x"]),
1541+
(0, 0, dict(suffixes=["_a", None]), ["0_a", 0]),
1542+
("a", "a", dict(), ["a_x", "a_y"]),
1543+
(0, 0, dict(), ["0_x", "0_y"])
1544+
])
1545+
def test_merge_suffix(col1, col2, kwargs, expected_cols):
1546+
# issue: 24782
1547+
a = pd.DataFrame({col1: [1, 2, 3]})
1548+
b = pd.DataFrame({col2: [4, 5, 6]})
1549+
1550+
expected = pd.DataFrame([[1, 4], [2, 5], [3, 6]],
1551+
columns=expected_cols)
1552+
1553+
result = a.merge(b, left_index=True, right_index=True, **kwargs)
1554+
tm.assert_frame_equal(result, expected)
1555+
1556+
result = pd.merge(a, b, left_index=True, right_index=True, **kwargs)
1557+
tm.assert_frame_equal(result, expected)
1558+
1559+
1560+
@pytest.mark.parametrize("col1, col2, suffixes", [
1561+
("a", "a", [None, None]),
1562+
("a", "a", (None, None)),
1563+
("a", "a", ("", None)),
1564+
(0, 0, [None, None]),
1565+
(0, 0, (None, ""))
1566+
])
1567+
def test_merge_suffix_error(col1, col2, suffixes):
1568+
# issue: 24782
1569+
a = pd.DataFrame({col1: [1, 2, 3]})
1570+
b = pd.DataFrame({col2: [3, 4, 5]})
1571+
1572+
# TODO: might reconsider current raise behaviour, see issue 24782
1573+
msg = "columns overlap but no suffix specified"
1574+
with pytest.raises(ValueError, match=msg):
1575+
pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes)
1576+
1577+
1578+
@pytest.mark.parametrize("col1, col2, suffixes", [
1579+
("a", "a", None),
1580+
(0, 0, None)
1581+
])
1582+
def test_merge_suffix_none_error(col1, col2, suffixes):
1583+
# issue: 24782
1584+
a = pd.DataFrame({col1: [1, 2, 3]})
1585+
b = pd.DataFrame({col2: [3, 4, 5]})
1586+
1587+
# TODO: might reconsider current raise behaviour, see GH24782
1588+
msg = "iterable"
1589+
with pytest.raises(TypeError, match=msg):
1590+
pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes)

0 commit comments

Comments
 (0)