Skip to content

Commit 499aa67

Browse files
[ArrowStringArray] TST: more parameterised testing - part 4 (#40963)
1 parent cc0ddf6 commit 499aa67

File tree

4 files changed

+78
-56
lines changed

4 files changed

+78
-56
lines changed

pandas/tests/series/methods/test_astype.py

+19-9
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import pytest
1111

1212
from pandas._libs.tslibs import iNaT
13+
import pandas.util._test_decorators as td
1314

1415
from pandas import (
1516
NA,
@@ -246,25 +247,34 @@ def test_td64_series_astype_object(self):
246247
assert result.dtype == np.object_
247248

248249
@pytest.mark.parametrize(
249-
"values",
250+
"data, dtype",
250251
[
251-
Series(["x", "y", "z"], dtype="string"),
252-
Series(["x", "y", "z"], dtype="category"),
253-
Series(3 * [Timestamp("2020-01-01", tz="UTC")]),
254-
Series(3 * [Interval(0, 1)]),
252+
(["x", "y", "z"], "string"),
253+
pytest.param(
254+
["x", "y", "z"],
255+
"arrow_string",
256+
marks=td.skip_if_no("pyarrow", min_version="1.0.0"),
257+
),
258+
(["x", "y", "z"], "category"),
259+
(3 * [Timestamp("2020-01-01", tz="UTC")], None),
260+
(3 * [Interval(0, 1)], None),
255261
],
256262
)
257263
@pytest.mark.parametrize("errors", ["raise", "ignore"])
258-
def test_astype_ignores_errors_for_extension_dtypes(self, values, errors):
264+
def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors):
259265
# https://github.com/pandas-dev/pandas/issues/35471
266+
267+
from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401
268+
269+
ser = Series(data, dtype=dtype)
260270
if errors == "ignore":
261-
expected = values
262-
result = values.astype(float, errors="ignore")
271+
expected = ser
272+
result = ser.astype(float, errors="ignore")
263273
tm.assert_series_equal(result, expected)
264274
else:
265275
msg = "(Cannot cast)|(could not convert)"
266276
with pytest.raises((ValueError, TypeError), match=msg):
267-
values.astype(float, errors=errors)
277+
ser.astype(float, errors=errors)
268278

269279
@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
270280
def test_astype_from_float_to_str(self, dtype):

pandas/tests/series/methods/test_update.py

+27-23
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
import pandas.util._test_decorators as td
5+
46
from pandas import (
57
CategoricalDtype,
68
DataFrame,
@@ -9,6 +11,7 @@
911
Timestamp,
1012
)
1113
import pandas._testing as tm
14+
from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401
1215

1316

1417
class TestUpdate:
@@ -82,37 +85,38 @@ def test_update_from_non_series(self, series, other, expected):
8285
tm.assert_series_equal(series, expected)
8386

8487
@pytest.mark.parametrize(
85-
"result, target, expected",
88+
"data, other, expected, dtype",
8689
[
87-
(
88-
Series(["a", None], dtype="string"),
89-
Series([None, "b"], dtype="string"),
90-
Series(["a", "b"], dtype="string"),
91-
),
92-
(
93-
Series([1, None], dtype="Int64"),
94-
Series([None, 2], dtype="Int64"),
95-
Series([1, 2], dtype="Int64"),
90+
(["a", None], [None, "b"], ["a", "b"], "string"),
91+
pytest.param(
92+
["a", None],
93+
[None, "b"],
94+
["a", "b"],
95+
"arrow_string",
96+
marks=td.skip_if_no("pyarrow", min_version="1.0.0"),
9697
),
98+
([1, None], [None, 2], [1, 2], "Int64"),
99+
([True, None], [None, False], [True, False], "boolean"),
97100
(
98-
Series([True, None], dtype="boolean"),
99-
Series([None, False], dtype="boolean"),
100-
Series([True, False], dtype="boolean"),
101+
["a", None],
102+
[None, "b"],
103+
["a", "b"],
104+
CategoricalDtype(categories=["a", "b"]),
101105
),
102106
(
103-
Series(["a", None], dtype=CategoricalDtype(categories=["a", "b"])),
104-
Series([None, "b"], dtype=CategoricalDtype(categories=["a", "b"])),
105-
Series(["a", "b"], dtype=CategoricalDtype(categories=["a", "b"])),
106-
),
107-
(
108-
Series([Timestamp(year=2020, month=1, day=1, tz="Europe/London"), NaT]),
109-
Series([NaT, Timestamp(year=2020, month=1, day=1, tz="Europe/London")]),
110-
Series([Timestamp(year=2020, month=1, day=1, tz="Europe/London")] * 2),
107+
[Timestamp(year=2020, month=1, day=1, tz="Europe/London"), NaT],
108+
[NaT, Timestamp(year=2020, month=1, day=1, tz="Europe/London")],
109+
[Timestamp(year=2020, month=1, day=1, tz="Europe/London")] * 2,
110+
"datetime64[ns, Europe/London]",
111111
),
112112
],
113113
)
114-
def test_update_extension_array_series(self, result, target, expected):
115-
result.update(target)
114+
def test_update_extension_array_series(self, data, other, expected, dtype):
115+
result = Series(data, dtype=dtype)
116+
other = Series(other, dtype=dtype)
117+
expected = Series(expected, dtype=dtype)
118+
119+
result.update(other)
116120
tm.assert_series_equal(result, expected)
117121

118122
def test_update_with_categorical_type(self):

pandas/tests/strings/test_find_replace.py

+17-15
Original file line numberDiff line numberDiff line change
@@ -364,26 +364,28 @@ def test_match():
364364

365365
def test_fullmatch():
366366
# GH 32806
367-
values = Series(["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"])
368-
result = values.str.fullmatch(".*BAD[_]+.*BAD")
369-
exp = Series([True, False, np.nan, False])
370-
tm.assert_series_equal(result, exp)
371-
372-
# Make sure that the new string arrays work
373-
string_values = Series(
374-
["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"], dtype="string"
375-
)
376-
result = string_values.str.fullmatch(".*BAD[_]+.*BAD")
377-
# Result is nullable boolean with StringDtype
378-
string_exp = Series([True, False, np.nan, False], dtype="boolean")
379-
tm.assert_series_equal(result, string_exp)
367+
ser = Series(["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"])
368+
result = ser.str.fullmatch(".*BAD[_]+.*BAD")
369+
expected = Series([True, False, np.nan, False])
370+
tm.assert_series_equal(result, expected)
380371

381-
values = Series(["ab", "AB", "abc", "ABC"])
382-
result = values.str.fullmatch("ab", case=False)
372+
ser = Series(["ab", "AB", "abc", "ABC"])
373+
result = ser.str.fullmatch("ab", case=False)
383374
expected = Series([True, True, False, False])
384375
tm.assert_series_equal(result, expected)
385376

386377

378+
def test_fullmatch_nullable_string_dtype(nullable_string_dtype):
379+
ser = Series(
380+
["fooBAD__barBAD", "BAD_BADleroybrown", None, "foo"],
381+
dtype=nullable_string_dtype,
382+
)
383+
result = ser.str.fullmatch(".*BAD[_]+.*BAD")
384+
# Result is nullable boolean
385+
expected = Series([True, False, np.nan, False], dtype="boolean")
386+
tm.assert_series_equal(result, expected)
387+
388+
387389
def test_findall():
388390
values = Series(["fooBAD__barBAD", np.nan, "foo", "BAD"])
389391

pandas/tests/strings/test_strings.py

+15-9
Original file line numberDiff line numberDiff line change
@@ -136,17 +136,23 @@ def test_repeat():
136136
tm.assert_series_equal(rs, xp)
137137

138138

139-
def test_repeat_with_null():
139+
def test_repeat_with_null(nullable_string_dtype, request):
140140
# GH: 31632
141-
values = Series(["a", None], dtype="string")
142-
result = values.str.repeat([3, 4])
143-
exp = Series(["aaa", None], dtype="string")
144-
tm.assert_series_equal(result, exp)
145141

146-
values = Series(["a", "b"], dtype="string")
147-
result = values.str.repeat([3, None])
148-
exp = Series(["aaa", None], dtype="string")
149-
tm.assert_series_equal(result, exp)
142+
if nullable_string_dtype == "arrow_string":
143+
reason = 'Attribute "dtype" are different'
144+
mark = pytest.mark.xfail(reason=reason)
145+
request.node.add_marker(mark)
146+
147+
ser = Series(["a", None], dtype=nullable_string_dtype)
148+
result = ser.str.repeat([3, 4])
149+
expected = Series(["aaa", None], dtype=nullable_string_dtype)
150+
tm.assert_series_equal(result, expected)
151+
152+
ser = Series(["a", "b"], dtype=nullable_string_dtype)
153+
result = ser.str.repeat([3, None])
154+
expected = Series(["aaa", None], dtype=nullable_string_dtype)
155+
tm.assert_series_equal(result, expected)
150156

151157

152158
def test_empty_str_methods():

0 commit comments

Comments
 (0)