From c697a66f9f258c3775d4f8eb545600362653cdd6 Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Tue, 12 Nov 2019 15:59:26 +0530 Subject: [PATCH 1/3] Update .format(...) strings to f-expressions --- asv_bench/benchmarks/categoricals.py | 10 +++++----- asv_bench/benchmarks/gil.py | 4 ++-- asv_bench/benchmarks/index_object.py | 2 +- asv_bench/benchmarks/io/csv.py | 8 ++++---- asv_bench/benchmarks/io/excel.py | 2 +- asv_bench/benchmarks/io/hdf.py | 2 +- asv_bench/benchmarks/io/json.py | 4 ++-- asv_bench/benchmarks/io/msgpack.py | 2 +- asv_bench/benchmarks/io/pickle.py | 2 +- asv_bench/benchmarks/io/sql.py | 4 ++-- asv_bench/benchmarks/io/stata.py | 4 ++-- asv_bench/benchmarks/timedelta.py | 6 +++--- 12 files changed, 25 insertions(+), 25 deletions(-) diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 4384ccb7fa8b3..e21d859d18c8c 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -84,7 +84,7 @@ class ValueCounts: def setup(self, dropna): n = 5 * 10 ** 5 - arr = ["s{:04d}".format(i) for i in np.random.randint(0, n // 10, size=n)] + arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)] self.ts = pd.Series(arr).astype("category") def time_value_counts(self, dropna): @@ -102,7 +102,7 @@ def time_rendering(self): class SetCategories: def setup(self): n = 5 * 10 ** 5 - arr = ["s{:04d}".format(i) for i in np.random.randint(0, n // 10, size=n)] + arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)] self.ts = pd.Series(arr).astype("category") def time_set_categories(self): @@ -112,7 +112,7 @@ def time_set_categories(self): class RemoveCategories: def setup(self): n = 5 * 10 ** 5 - arr = ["s{:04d}".format(i) for i in np.random.randint(0, n // 10, size=n)] + arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)] self.ts = pd.Series(arr).astype("category") def time_remove_categories(self): @@ -166,7 +166,7 @@ def setup(self, dtype): sample_size = 100 arr = [i for i in np.random.randint(0, n // 10, size=n)] if dtype == "object": - arr = ["s{:04d}".format(i) for i in arr] + arr = [f"s{i:04d}" for i in arr] self.sample = np.random.choice(arr, sample_size) self.series = pd.Series(arr).astype("category") @@ -225,7 +225,7 @@ def setup(self, index): elif index == "non_monotonic": self.data = pd.Categorical.from_codes([0, 1, 2] * N, categories=categories) else: - raise ValueError("Invalid index param: {}".format(index)) + raise ValueError(f"Invalid index param: {index}") self.scalar = 10000 self.list = list(range(10000)) diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py index 43c8594b8c8df..97b670ae640ac 100644 --- a/asv_bench/benchmarks/gil.py +++ b/asv_bench/benchmarks/gil.py @@ -252,11 +252,11 @@ def setup(self, dtype): "object": DataFrame( "foo", index=range(rows), - columns=["object%03d".format(i) for i in range(5)], + columns=[f"object%03d" for i in range(5)], ), } - self.fname = "__test_{}__.csv".format(dtype) + self.fname = f"__test_{dtype}__.csv" df = data[dtype] df.to_csv(self.fname) diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index a94960d494707..f1d5209ac65ef 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -146,7 +146,7 @@ class Indexing: def setup(self, dtype): N = 10 ** 6 - self.idx = getattr(tm, "make{}Index".format(dtype))(N) + self.idx = getattr(tm, f"make{dtype}Index")(N) self.array_mask = (np.arange(N) % 3) == 0 self.series_mask = Series(self.array_mask) self.sorted = self.idx.sort_values() diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py index 9b8599b0a1b64..85c4bbdbefdf6 100644 --- a/asv_bench/benchmarks/io/csv.py +++ b/asv_bench/benchmarks/io/csv.py @@ -202,7 +202,7 @@ def setup(self, sep, thousands): data = np.random.randn(N, K) * np.random.randint(100, 10000, (N, K)) df = DataFrame(data) if thousands is not None: - fmt = ":{}".format(thousands) + fmt = f":{thousands}" fmt = "{" + fmt + "}" df = df.applymap(lambda x: fmt.format(x)) df.to_csv(self.fname, sep=sep) @@ -231,7 +231,7 @@ def setup(self, sep, decimal, float_precision): floats = [ "".join(random.choice(string.digits) for _ in range(28)) for _ in range(15) ] - rows = sep.join(["0{}".format(decimal) + "{}"] * 3) + "\n" + rows = sep.join([f"0{decimal}" + "{}"] * 3) + "\n" data = rows * 5 data = data.format(*floats) * 200 # 1000 x 3 strings csv self.StringIO_input = StringIO(data) @@ -310,7 +310,7 @@ class ReadCSVCachedParseDates(StringIORewind): def setup(self, do_cache): data = ( - "\n".join("10/{}".format(year) for year in range(2000, 2100)) + "\n" + "\n".join(f"10/{year}" for year in range(2000, 2100)) + "\n" ) * 10 self.StringIO_input = StringIO(data) @@ -336,7 +336,7 @@ class ReadCSVMemoryGrowth(BaseIO): def setup(self): with open(self.fname, "w") as f: for i in range(self.num_rows): - f.write("{i}\n".format(i=i)) + f.write(f"{i}\n") def mem_parser_chunks(self): # see gh-24805. diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py index c97cf768e27d9..75d87140488e3 100644 --- a/asv_bench/benchmarks/io/excel.py +++ b/asv_bench/benchmarks/io/excel.py @@ -14,7 +14,7 @@ def _generate_dataframe(): C = 5 df = DataFrame( np.random.randn(N, C), - columns=["float{}".format(i) for i in range(C)], + columns=[f"float{i}" for i in range(C)], index=date_range("20000101", periods=N, freq="H"), ) df["object"] = tm.makeStringIndex(N) diff --git a/asv_bench/benchmarks/io/hdf.py b/asv_bench/benchmarks/io/hdf.py index b78dc63d17130..88c1a3dc48ea4 100644 --- a/asv_bench/benchmarks/io/hdf.py +++ b/asv_bench/benchmarks/io/hdf.py @@ -115,7 +115,7 @@ def setup(self, format): C = 5 self.df = DataFrame( np.random.randn(N, C), - columns=["float{}".format(i) for i in range(C)], + columns=[f"float{i}" for i in range(C)], index=date_range("20000101", periods=N, freq="H"), ) self.df["object"] = tm.makeStringIndex(N) diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py index 5c1d39776b91c..8f037e94e0095 100644 --- a/asv_bench/benchmarks/io/json.py +++ b/asv_bench/benchmarks/io/json.py @@ -20,7 +20,7 @@ def setup(self, orient, index): } df = DataFrame( np.random.randn(N, 5), - columns=["float_{}".format(i) for i in range(5)], + columns=[f"float_{i}" for i in range(5)], index=indexes[index], ) df.to_json(self.fname, orient=orient) @@ -43,7 +43,7 @@ def setup(self, index): } df = DataFrame( np.random.randn(N, 5), - columns=["float_{}".format(i) for i in range(5)], + columns=[f"float_{i}" for i in range(5)], index=indexes[index], ) df.to_json(self.fname, orient="records", lines=True) diff --git a/asv_bench/benchmarks/io/msgpack.py b/asv_bench/benchmarks/io/msgpack.py index f5038602539ab..a5b8b81bed85b 100644 --- a/asv_bench/benchmarks/io/msgpack.py +++ b/asv_bench/benchmarks/io/msgpack.py @@ -15,7 +15,7 @@ def setup(self): C = 5 self.df = DataFrame( np.random.randn(N, C), - columns=["float{}".format(i) for i in range(C)], + columns=[f"float{i}" for i in range(C)], index=date_range("20000101", periods=N, freq="H"), ) self.df["object"] = tm.makeStringIndex(N) diff --git a/asv_bench/benchmarks/io/pickle.py b/asv_bench/benchmarks/io/pickle.py index 647e9d27dec9d..12620656dd2bf 100644 --- a/asv_bench/benchmarks/io/pickle.py +++ b/asv_bench/benchmarks/io/pickle.py @@ -13,7 +13,7 @@ def setup(self): C = 5 self.df = DataFrame( np.random.randn(N, C), - columns=["float{}".format(i) for i in range(C)], + columns=[f"float{i}" for i in range(C)], index=date_range("20000101", periods=N, freq="H"), ) self.df["object"] = tm.makeStringIndex(N) diff --git a/asv_bench/benchmarks/io/sql.py b/asv_bench/benchmarks/io/sql.py index fe84c869717e3..6cc7f56ae3d65 100644 --- a/asv_bench/benchmarks/io/sql.py +++ b/asv_bench/benchmarks/io/sql.py @@ -19,7 +19,7 @@ def setup(self, connection): "sqlite": sqlite3.connect(":memory:"), } self.table_name = "test_type" - self.query_all = "SELECT * FROM {}".format(self.table_name) + self.query_all = f"SELECT * FROM {self.table_name}" self.con = con[connection] self.df = DataFrame( { @@ -58,7 +58,7 @@ def setup(self, connection, dtype): "sqlite": sqlite3.connect(":memory:"), } self.table_name = "test_type" - self.query_col = "SELECT {} FROM {}".format(dtype, self.table_name) + self.query_col = f"SELECT {dtype} FROM {self.table_name}" self.con = con[connection] self.df = DataFrame( { diff --git a/asv_bench/benchmarks/io/stata.py b/asv_bench/benchmarks/io/stata.py index 28829785d72e9..f3125f8598418 100644 --- a/asv_bench/benchmarks/io/stata.py +++ b/asv_bench/benchmarks/io/stata.py @@ -17,7 +17,7 @@ def setup(self, convert_dates): C = self.C = 5 self.df = DataFrame( np.random.randn(N, C), - columns=["float{}".format(i) for i in range(C)], + columns=[f"float{i}" for i in range(C)], index=date_range("20000101", periods=N, freq="H"), ) self.df["object"] = tm.makeStringIndex(self.N) @@ -47,7 +47,7 @@ def setup(self, convert_dates): for i in range(10): missing_data = np.random.randn(self.N) missing_data[missing_data < 0] = np.nan - self.df["missing_{0}".format(i)] = missing_data + self.df[f"missing_{i}"] = missing_data self.df.to_stata(self.fname, self.convert_dates) diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py index 828134b80aa3d..37418d752f833 100644 --- a/asv_bench/benchmarks/timedelta.py +++ b/asv_bench/benchmarks/timedelta.py @@ -14,8 +14,8 @@ def setup(self): self.str_days = [] self.str_seconds = [] for i in self.ints: - self.str_days.append("{0} days".format(i)) - self.str_seconds.append("00:00:{0:02d}".format(i)) + self.str_days.append(f"{i} days") + self.str_seconds.append(f"00:00:{i:02d}") def time_convert_int(self): to_timedelta(self.ints, unit="s") @@ -34,7 +34,7 @@ class ToTimedeltaErrors: def setup(self, errors): ints = np.random.randint(0, 60, size=10000) - self.arr = ["{0} days".format(i) for i in ints] + self.arr = [f"{i} days" for i in ints] self.arr[-1] = "apple" def time_convert(self, errors): From 4ca5c72093d0b36506a5c8afdc59bacfaf51d892 Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Thu, 14 Nov 2019 09:36:00 +0530 Subject: [PATCH 2/3] Run black formatting on changed files --- asv_bench/benchmarks/gil.py | 4 +--- asv_bench/benchmarks/io/csv.py | 4 +--- pandas/core/algorithms.py | 2 +- pandas/core/groupby/grouper.py | 6 +++++- pandas/io/common.py | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py index 97b670ae640ac..4683619d145e1 100644 --- a/asv_bench/benchmarks/gil.py +++ b/asv_bench/benchmarks/gil.py @@ -250,9 +250,7 @@ def setup(self, dtype): np.random.randn(rows, cols), index=date_range("1/1/2000", periods=rows) ), "object": DataFrame( - "foo", - index=range(rows), - columns=[f"object%03d" for i in range(5)], + "foo", index=range(rows), columns=[f"object%03d" for i in range(5)], ), } diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py index 85c4bbdbefdf6..adb3dd95e3574 100644 --- a/asv_bench/benchmarks/io/csv.py +++ b/asv_bench/benchmarks/io/csv.py @@ -309,9 +309,7 @@ class ReadCSVCachedParseDates(StringIORewind): param_names = ["do_cache"] def setup(self, do_cache): - data = ( - "\n".join(f"10/{year}" for year in range(2000, 2100)) + "\n" - ) * 10 + data = ("\n".join(f"10/{year}" for year in range(2000, 2100)) + "\n") * 10 self.StringIO_input = StringIO(data) def time_read_csv_cached(self, do_cache): diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index b49a9d7957d51..ea75d46048e63 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1159,7 +1159,7 @@ def compute(self, method): n = min(n, narr) kth_val = algos.kth_smallest(arr.copy(), n - 1) - ns, = np.nonzero(arr <= kth_val) + (ns,) = np.nonzero(arr <= kth_val) inds = ns[arr[ns].argsort(kind="mergesort")] if self.keep != "all": diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index e6e3ee62459ca..0edc3e4a4ff3d 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -288,7 +288,11 @@ def __init__( if self.name is None: self.name = index.names[level] - self.grouper, self._codes, self._group_index = index._get_grouper_for_level( # noqa: E501 + ( + self.grouper, + self._codes, + self._group_index, + ) = index._get_grouper_for_level( # noqa: E501 self.grouper, level ) diff --git a/pandas/io/common.py b/pandas/io/common.py index 0bef14e4999c7..e08fd37e65ad9 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -109,7 +109,7 @@ def _is_url(url) -> bool: def _expand_user( - filepath_or_buffer: FilePathOrBuffer[AnyStr] + filepath_or_buffer: FilePathOrBuffer[AnyStr], ) -> FilePathOrBuffer[AnyStr]: """Return the argument with an initial component of ~ or ~user replaced by that user's home directory. @@ -139,7 +139,7 @@ def _validate_header_arg(header) -> None: def _stringify_path( - filepath_or_buffer: FilePathOrBuffer[AnyStr] + filepath_or_buffer: FilePathOrBuffer[AnyStr], ) -> FilePathOrBuffer[AnyStr]: """Attempt to convert a path-like object to a string. From 27cdc551eb665c66fcc8d78d5b0a996c2ef068a7 Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Thu, 14 Nov 2019 23:39:19 +0530 Subject: [PATCH 3/3] Remove unnecessary variable --- asv_bench/benchmarks/gil.py | 2 +- pandas/core/algorithms.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py index 4683619d145e1..860c6cc6192bb 100644 --- a/asv_bench/benchmarks/gil.py +++ b/asv_bench/benchmarks/gil.py @@ -250,7 +250,7 @@ def setup(self, dtype): np.random.randn(rows, cols), index=date_range("1/1/2000", periods=rows) ), "object": DataFrame( - "foo", index=range(rows), columns=[f"object%03d" for i in range(5)], + "foo", index=range(rows), columns=["object%03d" for _ in range(5)] ), } diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index ea75d46048e63..b49a9d7957d51 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1159,7 +1159,7 @@ def compute(self, method): n = min(n, narr) kth_val = algos.kth_smallest(arr.copy(), n - 1) - (ns,) = np.nonzero(arr <= kth_val) + ns, = np.nonzero(arr <= kth_val) inds = ns[arr[ns].argsort(kind="mergesort")] if self.keep != "all":