@@ -4559,11 +4559,15 @@ def nlargest(self, n, columns, keep='first'):
4559
4559
Number of rows to return.
4560
4560
columns : label or list of labels
4561
4561
Column label(s) to order by.
4562
- keep : {'first', 'last'}, default 'first'
4562
+ keep : {'first', 'last', 'all' }, default 'first'
4563
4563
Where there are duplicate values:
4564
4564
4565
4565
- `first` : prioritize the first occurrence(s)
4566
4566
- `last` : prioritize the last occurrence(s)
4567
+ - ``all`` : do not drop any duplicates, even it means
4568
+ selecting more than `n` items.
4569
+
4570
+ .. versionadded:: 0.24.0
4567
4571
4568
4572
Returns
4569
4573
-------
@@ -4586,42 +4590,51 @@ def nlargest(self, n, columns, keep='first'):
4586
4590
4587
4591
Examples
4588
4592
--------
4589
- >>> df = pd.DataFrame({'a': [1, 10, 8, 10, -1 ],
4590
- ... 'b': list('abdce '),
4591
- ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})
4593
+ >>> df = pd.DataFrame({'a': [1, 10, 8, 11, 8, 2 ],
4594
+ ... 'b': list('abdcef '),
4595
+ ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0, 9.0 ]})
4592
4596
>>> df
4593
4597
a b c
4594
4598
0 1 a 1.0
4595
4599
1 10 b 2.0
4596
4600
2 8 d NaN
4597
- 3 10 c 3.0
4598
- 4 -1 e 4.0
4601
+ 3 11 c 3.0
4602
+ 4 8 e 4.0
4603
+ 5 2 f 9.0
4599
4604
4600
4605
In the following example, we will use ``nlargest`` to select the three
4601
4606
rows having the largest values in column "a".
4602
4607
4603
4608
>>> df.nlargest(3, 'a')
4604
4609
a b c
4610
+ 3 11 c 3.0
4605
4611
1 10 b 2.0
4606
- 3 10 c 3.0
4607
4612
2 8 d NaN
4608
4613
4609
4614
When using ``keep='last'``, ties are resolved in reverse order:
4610
4615
4611
4616
>>> df.nlargest(3, 'a', keep='last')
4612
4617
a b c
4613
- 3 10 c 3.0
4618
+ 3 11 c 3.0
4619
+ 1 10 b 2.0
4620
+ 4 8 e 4.0
4621
+
4622
+ When using ``keep='all'``, all duplicate items are maintained
4623
+ >>> df.nlargest(3, 'a', keep='all')
4624
+ a b c
4625
+ 3 11 c 3.0
4614
4626
1 10 b 2.0
4615
4627
2 8 d NaN
4628
+ 4 8 e 4.0
4616
4629
4617
4630
To order by the largest values in column "a" and then "c", we can
4618
4631
specify multiple columns like in the next example.
4619
4632
4620
4633
>>> df.nlargest(3, ['a', 'c'])
4621
4634
a b c
4622
- 3 10 c 3.0
4635
+ 4 8 e 4.0
4636
+ 3 11 c 3.0
4623
4637
1 10 b 2.0
4624
- 2 8 d NaN
4625
4638
4626
4639
Attempting to use ``nlargest`` on non-numeric dtypes will raise a
4627
4640
``TypeError``:
@@ -4645,25 +4658,73 @@ def nsmallest(self, n, columns, keep='first'):
4645
4658
Number of items to retrieve
4646
4659
columns : list or str
4647
4660
Column name or names to order by
4648
- keep : {'first', 'last'}, default 'first'
4661
+ keep : {'first', 'last', 'all' }, default 'first'
4649
4662
Where there are duplicate values:
4650
4663
- ``first`` : take the first occurrence.
4651
4664
- ``last`` : take the last occurrence.
4665
+ - ``all`` : do not drop any duplicates, even it means
4666
+ selecting more than `n` items.
4667
+
4668
+ .. versionadded:: 0.24.0
4652
4669
4653
4670
Returns
4654
4671
-------
4655
4672
DataFrame
4656
4673
4657
4674
Examples
4658
4675
--------
4659
- >>> df = pd.DataFrame({'a': [1, 10, 8, 11, -1],
4660
- ... 'b': list('abdce'),
4661
- ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})
4676
+ >>> df = pd.DataFrame({'a': [1, 10, 8, 11, 8, 2],
4677
+ ... 'b': list('abdcef'),
4678
+ ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0, 9.0]})
4679
+ >>> df
4680
+ a b c
4681
+ 0 1 a 1.0
4682
+ 1 10 b 2.0
4683
+ 2 8 d NaN
4684
+ 3 11 c 3.0
4685
+ 4 8 e 4.0
4686
+ 5 2 f 9.0
4687
+
4688
+ In the following example, we will use ``nsmallest`` to select the
4689
+ three rows having the smallest values in column "a".
4690
+
4662
4691
>>> df.nsmallest(3, 'a')
4663
- a b c
4664
- 4 -1 e 4
4665
- 0 1 a 1
4666
- 2 8 d NaN
4692
+ a b c
4693
+ 0 1 a 1.0
4694
+ 5 2 f 9.0
4695
+ 2 8 d NaN
4696
+
4697
+ When using ``keep='last'``, ties are resolved in reverse order:
4698
+
4699
+ >>> df.nsmallest(3, 'a', keep='last')
4700
+ a b c
4701
+ 0 1 a 1.0
4702
+ 5 2 f 9.0
4703
+ 4 8 e 4.0
4704
+
4705
+ When using ``keep='all'``, all duplicate items are maintained
4706
+ >>> df.nsmallest(3, 'a', keep='all')
4707
+ a b c
4708
+ 0 1 a 1.0
4709
+ 5 2 f 9.0
4710
+ 2 8 d NaN
4711
+ 4 8 e 4.0
4712
+
4713
+ To order by the largest values in column "a" and then "c", we can
4714
+ specify multiple columns like in the next example.
4715
+
4716
+ >>> df.nsmallest(3, ['a', 'c'])
4717
+ a b c
4718
+ 0 1 a 1.0
4719
+ 5 2 f 9.0
4720
+ 4 8 e 4.0
4721
+
4722
+ Attempting to use ``nsmallest`` on non-numeric dtypes will raise a
4723
+ ``TypeError``:
4724
+
4725
+ >>> df.nsmallest(3, 'b')
4726
+ Traceback (most recent call last):
4727
+ TypeError: Column 'b' has dtype object, cannot use method 'nsmallest'
4667
4728
"""
4668
4729
return algorithms .SelectNFrame (self ,
4669
4730
n = n ,
0 commit comments