[ArrowStringArray] TST: more parameterised testing - part 4 (#40963)

simonjayhawkins · web-flow · commit 499aa67a6e71 · 2021-04-15T21:03:57.000-04:00
diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py
@@ -10,6 +10,7 @@
 import pytest
 
 from pandas._libs.tslibs import iNaT
+import pandas.util._test_decorators as td
 
 from pandas import (
     NA,
@@ -246,25 +247,34 @@ def test_td64_series_astype_object(self):
         assert result.dtype == np.object_
 
     @pytest.mark.parametrize(
-        "values",
+        "data, dtype",
         [
-            Series(["x", "y", "z"], dtype="string"),
-            Series(["x", "y", "z"], dtype="category"),
-            Series(3 * [Timestamp("2020-01-01", tz="UTC")]),
-            Series(3 * [Interval(0, 1)]),
+            (["x", "y", "z"], "string"),
+            pytest.param(
+                ["x", "y", "z"],
+                "arrow_string",
+                marks=td.skip_if_no("pyarrow", min_version="1.0.0"),
+            ),
+            (["x", "y", "z"], "category"),
+            (3 * [Timestamp("2020-01-01", tz="UTC")], None),
+            (3 * [Interval(0, 1)], None),
         ],
     )
     @pytest.mark.parametrize("errors", ["raise", "ignore"])
-    def test_astype_ignores_errors_for_extension_dtypes(self, values, errors):
+    def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors):
         # https://github.com/pandas-dev/pandas/issues/35471
+
+        from pandas.core.arrays.string_arrow import ArrowStringDtype  # noqa: F401
+
+        ser = Series(data, dtype=dtype)
         if errors == "ignore":
-            expected = values
-            result = values.astype(float, errors="ignore")
+            expected = ser
+            result = ser.astype(float, errors="ignore")
             tm.assert_series_equal(result, expected)
         else:
             msg = "(Cannot cast)|(could not convert)"
             with pytest.raises((ValueError, TypeError), match=msg):
-                values.astype(float, errors=errors)
+                ser.astype(float, errors=errors)
 
     @pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
     def test_astype_from_float_to_str(self, dtype):
diff --git a/pandas/tests/series/methods/test_update.py b/pandas/tests/series/methods/test_update.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 from pandas import (
     CategoricalDtype,
     DataFrame,
@@ -9,6 +11,7 @@
     Timestamp,
 )
 import pandas._testing as tm
+from pandas.core.arrays.string_arrow import ArrowStringDtype  # noqa: F401
 
 
 class TestUpdate:
@@ -82,37 +85,38 @@ def test_update_from_non_series(self, series, other, expected):
         tm.assert_series_equal(series, expected)
 
     @pytest.mark.parametrize(
-        "result, target, expected",
+        "data, other, expected, dtype",
         [
-            (
-                Series(["a", None], dtype="string"),
-                Series([None, "b"], dtype="string"),
-                Series(["a", "b"], dtype="string"),
-            ),
-            (
-                Series([1, None], dtype="Int64"),
-                Series([None, 2], dtype="Int64"),
-                Series([1, 2], dtype="Int64"),
+            (["a", None], [None, "b"], ["a", "b"], "string"),
+            pytest.param(
+                ["a", None],
+                [None, "b"],
+                ["a", "b"],
+                "arrow_string",
+                marks=td.skip_if_no("pyarrow", min_version="1.0.0"),
             ),
+            ([1, None], [None, 2], [1, 2], "Int64"),
+            ([True, None], [None, False], [True, False], "boolean"),
             (
-                Series([True, None], dtype="boolean"),
-                Series([None, False], dtype="boolean"),
-                Series([True, False], dtype="boolean"),
+                ["a", None],
+                [None, "b"],
+                ["a", "b"],
+                CategoricalDtype(categories=["a", "b"]),
             ),
             (
-                Series(["a", None], dtype=CategoricalDtype(categories=["a", "b"])),
-                Series([None, "b"], dtype=CategoricalDtype(categories=["a", "b"])),
-                Series(["a", "b"], dtype=CategoricalDtype(categories=["a", "b"])),
-            ),
-            (
-                Series([Timestamp(year=2020, month=1, day=1, tz="Europe/London"), NaT]),
-                Series([NaT, Timestamp(year=2020, month=1, day=1, tz="Europe/London")]),
-                Series([Timestamp(year=2020, month=1, day=1, tz="Europe/London")] * 2),
+                [Timestamp(year=2020, month=1, day=1, tz="Europe/London"), NaT],
+                [NaT, Timestamp(year=2020, month=1, day=1, tz="Europe/London")],
+                [Timestamp(year=2020, month=1, day=1, tz="Europe/London")] * 2,
+                "datetime64[ns, Europe/London]",
             ),
         ],
     )
-    def test_update_extension_array_series(self, result, target, expected):
-        result.update(target)
+    def test_update_extension_array_series(self, data, other, expected, dtype):
+        result = Series(data, dtype=dtype)
+        other = Series(other, dtype=dtype)
+        expected = Series(expected, dtype=dtype)
+
+        result.update(other)
         tm.assert_series_equal(result, expected)
 
     def test_update_with_categorical_type(self):
diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
@@ -364,26 +364,28 @@ def test_match():
 
 def test_fullmatch():
     # GH 32806
-    values = Series(["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"])
-    result = values.str.fullmatch(".*BAD[_]+.*BAD")
-    exp = Series([True, False, np.nan, False])
-    tm.assert_series_equal(result, exp)
-
-    # Make sure that the new string arrays work
-    string_values = Series(
-        ["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"], dtype="string"
-    )
-    result = string_values.str.fullmatch(".*BAD[_]+.*BAD")
-    # Result is nullable boolean with StringDtype
-    string_exp = Series([True, False, np.nan, False], dtype="boolean")
-    tm.assert_series_equal(result, string_exp)
+    ser = Series(["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"])
+    result = ser.str.fullmatch(".*BAD[_]+.*BAD")
+    expected = Series([True, False, np.nan, False])
+    tm.assert_series_equal(result, expected)
 
-    values = Series(["ab", "AB", "abc", "ABC"])
-    result = values.str.fullmatch("ab", case=False)
+    ser = Series(["ab", "AB", "abc", "ABC"])
+    result = ser.str.fullmatch("ab", case=False)
     expected = Series([True, True, False, False])
     tm.assert_series_equal(result, expected)
 
 
+def test_fullmatch_nullable_string_dtype(nullable_string_dtype):
+    ser = Series(
+        ["fooBAD__barBAD", "BAD_BADleroybrown", None, "foo"],
+        dtype=nullable_string_dtype,
+    )
+    result = ser.str.fullmatch(".*BAD[_]+.*BAD")
+    # Result is nullable boolean
+    expected = Series([True, False, np.nan, False], dtype="boolean")
+    tm.assert_series_equal(result, expected)
+
+
 def test_findall():
     values = Series(["fooBAD__barBAD", np.nan, "foo", "BAD"])
 
diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py
@@ -136,17 +136,23 @@ def test_repeat():
     tm.assert_series_equal(rs, xp)
 
 
-def test_repeat_with_null():
+def test_repeat_with_null(nullable_string_dtype, request):
     # GH: 31632
-    values = Series(["a", None], dtype="string")
-    result = values.str.repeat([3, 4])
-    exp = Series(["aaa", None], dtype="string")
-    tm.assert_series_equal(result, exp)
 
-    values = Series(["a", "b"], dtype="string")
-    result = values.str.repeat([3, None])
-    exp = Series(["aaa", None], dtype="string")
-    tm.assert_series_equal(result, exp)
+    if nullable_string_dtype == "arrow_string":
+        reason = 'Attribute "dtype" are different'
+        mark = pytest.mark.xfail(reason=reason)
+        request.node.add_marker(mark)
+
+    ser = Series(["a", None], dtype=nullable_string_dtype)
+    result = ser.str.repeat([3, 4])
+    expected = Series(["aaa", None], dtype=nullable_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+    ser = Series(["a", "b"], dtype=nullable_string_dtype)
+    result = ser.str.repeat([3, None])
+    expected = Series(["aaa", None], dtype=nullable_string_dtype)
+    tm.assert_series_equal(result, expected)
 
 
 def test_empty_str_methods():