Skip to content

Commit db978c7

Browse files
charlesdong1991WillAyd
authored andcommitted
ENH: Add Series.str.casefold (#25419)
1 parent ae1ab89 commit db978c7

File tree

5 files changed

+26
-7
lines changed

5 files changed

+26
-7
lines changed

doc/source/reference/series.rst

+1
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,7 @@ strings and apply several methods to it. These can be accessed like
409409
:template: autosummary/accessor_method.rst
410410

411411
Series.str.capitalize
412+
Series.str.casefold
412413
Series.str.cat
413414
Series.str.center
414415
Series.str.contains

doc/source/user_guide/text.rst

+1
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,7 @@ Method Summary
600600
:meth:`~Series.str.partition`;Equivalent to ``str.partition``
601601
:meth:`~Series.str.rpartition`;Equivalent to ``str.rpartition``
602602
:meth:`~Series.str.lower`;Equivalent to ``str.lower``
603+
:meth:`~Series.str.casefold`;Equivalent to ``str.casefold``
603604
:meth:`~Series.str.upper`;Equivalent to ``str.upper``
604605
:meth:`~Series.str.find`;Equivalent to ``str.find``
605606
:meth:`~Series.str.rfind`;Equivalent to ``str.rfind``

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Other Enhancements
2222
- Indexing of ``DataFrame`` and ``Series`` now accepts zerodim ``np.ndarray`` (:issue:`24919`)
2323
- :meth:`Timestamp.replace` now supports the ``fold`` argument to disambiguate DST transition times (:issue:`25017`)
2424
- :meth:`DataFrame.at_time` and :meth:`Series.at_time` now support :meth:`datetime.time` objects with timezones (:issue:`24043`)
25+
- ``Series.str`` has gained :meth:`Series.str.casefold` method to removes all case distinctions present in a string (:issue:`25405`)
2526
- :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`)
2627
- :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behaviour of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`)
2728
-

pandas/core/strings.py

+13-6
Original file line numberDiff line numberDiff line change
@@ -2926,7 +2926,7 @@ def rindex(self, sub, start=0, end=None):
29262926

29272927
_shared_docs['casemethods'] = ("""
29282928
Convert strings in the Series/Index to %(type)s.
2929-
2929+
%(version)s
29302930
Equivalent to :meth:`str.%(method)s`.
29312931
29322932
Returns
@@ -2943,6 +2943,7 @@ def rindex(self, sub, start=0, end=None):
29432943
remaining to lowercase.
29442944
Series.str.swapcase : Converts uppercase to lowercase and lowercase to
29452945
uppercase.
2946+
Series.str.casefold: Removes all case distinctions in the string.
29462947
29472948
Examples
29482949
--------
@@ -2989,12 +2990,15 @@ def rindex(self, sub, start=0, end=None):
29892990
3 sWaPcAsE
29902991
dtype: object
29912992
""")
2992-
_shared_docs['lower'] = dict(type='lowercase', method='lower')
2993-
_shared_docs['upper'] = dict(type='uppercase', method='upper')
2994-
_shared_docs['title'] = dict(type='titlecase', method='title')
2993+
_shared_docs['lower'] = dict(type='lowercase', method='lower', version='')
2994+
_shared_docs['upper'] = dict(type='uppercase', method='upper', version='')
2995+
_shared_docs['title'] = dict(type='titlecase', method='title', version='')
29952996
_shared_docs['capitalize'] = dict(type='be capitalized',
2996-
method='capitalize')
2997-
_shared_docs['swapcase'] = dict(type='be swapcased', method='swapcase')
2997+
method='capitalize', version='')
2998+
_shared_docs['swapcase'] = dict(type='be swapcased', method='swapcase',
2999+
version='')
3000+
_shared_docs['casefold'] = dict(type='be casefolded', method='casefold',
3001+
version='\n .. versionadded:: 0.25.0\n')
29983002
lower = _noarg_wrapper(lambda x: x.lower(),
29993003
docstring=_shared_docs['casemethods'] %
30003004
_shared_docs['lower'])
@@ -3010,6 +3014,9 @@ def rindex(self, sub, start=0, end=None):
30103014
swapcase = _noarg_wrapper(lambda x: x.swapcase(),
30113015
docstring=_shared_docs['casemethods'] %
30123016
_shared_docs['swapcase'])
3017+
casefold = _noarg_wrapper(lambda x: x.casefold(),
3018+
docstring=_shared_docs['casemethods'] %
3019+
_shared_docs['casefold'])
30133020

30143021
_shared_docs['ismethods'] = ("""
30153022
Check whether all characters in each string are %(type)s.

pandas/tests/test_strings.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def assert_series_or_index_equal(left, right):
7676
'len', 'lower', 'lstrip', 'partition',
7777
'rpartition', 'rsplit', 'rstrip',
7878
'slice', 'slice_replace', 'split',
79-
'strip', 'swapcase', 'title', 'upper'
79+
'strip', 'swapcase', 'title', 'upper', 'casefold'
8080
], [()] * 100, [{}] * 100))
8181
ids, _, _ = zip(*_any_string_method) # use method name as fixture-id
8282

@@ -3440,3 +3440,12 @@ def test_method_on_bytes(self):
34403440
expected = Series(np.array(
34413441
['ad', 'be', 'cf'], 'S2').astype(object))
34423442
tm.assert_series_equal(result, expected)
3443+
3444+
@pytest.mark.skipif(compat.PY2, reason='not in python2')
3445+
def test_casefold(self):
3446+
# GH25405
3447+
expected = Series(['ss', NA, 'case', 'ssd'])
3448+
s = Series(['ß', NA, 'case', 'ßd'])
3449+
result = s.str.casefold()
3450+
3451+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)