@@ -2801,8 +2801,9 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None,
2801
2801
else :
2802
2802
return result
2803
2803
2804
+ @deprecate_kwarg ('take_last' , 'take' , mapping = {True : 'last' , False : 'first' })
2804
2805
@deprecate_kwarg (old_arg_name = 'cols' , new_arg_name = 'subset' )
2805
- def drop_duplicates (self , subset = None , take_last = False , inplace = False ):
2806
+ def drop_duplicates (self , subset = None , take = 'first' , inplace = False ):
2806
2807
"""
2807
2808
Return DataFrame with duplicate rows removed, optionally only
2808
2809
considering certain columns
@@ -2812,8 +2813,11 @@ def drop_duplicates(self, subset=None, take_last=False, inplace=False):
2812
2813
subset : column label or sequence of labels, optional
2813
2814
Only consider certain columns for identifying duplicates, by
2814
2815
default use all of the columns
2815
- take_last : boolean, default False
2816
- Take the last observed row in a row. Defaults to the first row
2816
+ take : {'first', 'last', 'all'}, default 'first'
2817
+ - ``first`` : Take the first observed row in a row.
2818
+ - ``last`` : Take the last observed row in a row.
2819
+ - ``all`` : Remove all duplicates in a row.
2820
+ take_last : deprecated
2817
2821
inplace : boolean, default False
2818
2822
Whether to drop duplicates in place or to return a copy
2819
2823
cols : kwargs only argument of subset [deprecated]
@@ -2822,7 +2826,7 @@ def drop_duplicates(self, subset=None, take_last=False, inplace=False):
2822
2826
-------
2823
2827
deduplicated : DataFrame
2824
2828
"""
2825
- duplicated = self .duplicated (subset , take_last = take_last )
2829
+ duplicated = self .duplicated (subset , take = take )
2826
2830
2827
2831
if inplace :
2828
2832
inds , = (- duplicated ).nonzero ()
@@ -2831,8 +2835,9 @@ def drop_duplicates(self, subset=None, take_last=False, inplace=False):
2831
2835
else :
2832
2836
return self [- duplicated ]
2833
2837
2838
+ @deprecate_kwarg ('take_last' , 'take' , mapping = {True : 'last' , False : 'first' })
2834
2839
@deprecate_kwarg (old_arg_name = 'cols' , new_arg_name = 'subset' )
2835
- def duplicated (self , subset = None , take_last = False ):
2840
+ def duplicated (self , subset = None , take = 'first' ):
2836
2841
"""
2837
2842
Return boolean Series denoting duplicate rows, optionally only
2838
2843
considering certain columns
@@ -2842,9 +2847,11 @@ def duplicated(self, subset=None, take_last=False):
2842
2847
subset : column label or sequence of labels, optional
2843
2848
Only consider certain columns for identifying duplicates, by
2844
2849
default use all of the columns
2845
- take_last : boolean, default False
2846
- For a set of distinct duplicate rows, flag all but the last row as
2847
- duplicated. Default is for all but the first row to be flagged
2850
+ take : {'first', 'last', 'all'}, default 'first'
2851
+ - ``first`` : Take the first observed row in a row.
2852
+ - ``last`` : Take the last observed row in a row.
2853
+ - ``all`` : Remove all duplicates in a row.
2854
+ take_last : deprecated
2848
2855
cols : kwargs only argument of subset [deprecated]
2849
2856
2850
2857
Returns
@@ -2870,7 +2877,7 @@ def f(vals):
2870
2877
labels , shape = map (list , zip ( * map (f , vals )))
2871
2878
2872
2879
ids = get_group_index (labels , shape , sort = False , xnull = False )
2873
- return Series (duplicated_int64 (ids , take_last ), index = self .index )
2880
+ return Series (duplicated_int64 (ids , take ), index = self .index )
2874
2881
2875
2882
#----------------------------------------------------------------------
2876
2883
# Sorting
0 commit comments