Skip to content

Commit 7025c59

Browse files
charlesdong1991jreback
authored andcommitted
ENH: Add ignore_index for df.drop_duplicates (#30405)
1 parent 0a3c1d7 commit 7025c59

File tree

3 files changed

+47
-1
lines changed

3 files changed

+47
-1
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ Other enhancements
209209
- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`)
210210
- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`)
211211

212+
- :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`)
212213

213214
Build Changes
214215
^^^^^^^^^^^^^

pandas/core/frame.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -4614,6 +4614,7 @@ def drop_duplicates(
46144614
subset: Optional[Union[Hashable, Sequence[Hashable]]] = None,
46154615
keep: Union[str, bool] = "first",
46164616
inplace: bool = False,
4617+
ignore_index: bool = False,
46174618
) -> Optional["DataFrame"]:
46184619
"""
46194620
Return DataFrame with duplicate rows removed.
@@ -4633,6 +4634,10 @@ def drop_duplicates(
46334634
- False : Drop all duplicates.
46344635
inplace : bool, default False
46354636
Whether to drop duplicates in place or to return a copy.
4637+
ignore_index : bool, default False
4638+
If True, the resulting axis will be labeled 0, 1, …, n - 1.
4639+
4640+
.. versionadded:: 1.0.0
46364641
46374642
Returns
46384643
-------
@@ -4648,9 +4653,16 @@ def drop_duplicates(
46484653
if inplace:
46494654
(inds,) = (-duplicated)._ndarray_values.nonzero()
46504655
new_data = self._data.take(inds)
4656+
4657+
if ignore_index:
4658+
new_data.axes[1] = ibase.default_index(len(inds))
46514659
self._update_inplace(new_data)
46524660
else:
4653-
return self[-duplicated]
4661+
result = self[-duplicated]
4662+
4663+
if ignore_index:
4664+
result.index = ibase.default_index(len(result))
4665+
return result
46544666

46554667
return None
46564668

pandas/tests/frame/methods/test_drop_duplicates.py

+33
Original file line numberDiff line numberDiff line change
@@ -391,3 +391,36 @@ def test_drop_duplicates_inplace():
391391
expected = orig2.drop_duplicates(["A", "B"], keep=False)
392392
result = df2
393393
tm.assert_frame_equal(result, expected)
394+
395+
396+
@pytest.mark.parametrize(
397+
"origin_dict, output_dict, ignore_index, output_index",
398+
[
399+
({"A": [2, 2, 3]}, {"A": [2, 3]}, True, [0, 1]),
400+
({"A": [2, 2, 3]}, {"A": [2, 3]}, False, [0, 2]),
401+
({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, True, [0, 1]),
402+
({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, False, [0, 2]),
403+
],
404+
)
405+
def test_drop_duplicates_ignore_index(
406+
origin_dict, output_dict, ignore_index, output_index
407+
):
408+
# GH 30114
409+
df = DataFrame(origin_dict)
410+
expected = DataFrame(output_dict, index=output_index)
411+
412+
# Test when inplace is False
413+
result = df.drop_duplicates(ignore_index=ignore_index)
414+
tm.assert_frame_equal(result, expected)
415+
416+
# to verify original dataframe is not mutated
417+
tm.assert_frame_equal(df, DataFrame(origin_dict))
418+
419+
# Test when inplace is True
420+
copied_df = df.copy()
421+
422+
copied_df.drop_duplicates(ignore_index=ignore_index, inplace=True)
423+
tm.assert_frame_equal(copied_df, expected)
424+
425+
# to verify that input is unchanged
426+
tm.assert_frame_equal(df, DataFrame(origin_dict))

0 commit comments

Comments
 (0)