Skip to content

Commit 718a00c

Browse files
chelsea-lintswast
andauthored
fix: series.(to_csv|to_json) leverages bq export (#452)
Co-authored-by: Tim Sweña (Swast) <swast@google.com>
1 parent 8f6e955 commit 718a00c

File tree

5 files changed

+40
-77
lines changed

5 files changed

+40
-77
lines changed

bigframes/dataframe.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -2588,16 +2588,16 @@ def to_json(
25882588
if "*" not in path_or_buf:
25892589
raise NotImplementedError(ERROR_IO_REQUIRES_WILDCARD)
25902590

2591-
if lines is True and orient != "records":
2592-
raise ValueError(
2593-
"'lines' keyword is only valid when 'orient' is 'records'."
2594-
)
2595-
25962591
# TODO(ashleyxu) Support lines=False for small tables with arrays and TO_JSON_STRING.
25972592
# See: https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#to_json_string
25982593
if lines is False:
25992594
raise NotImplementedError(
2600-
f"Only newline delimited JSON format is supported. {constants.FEEDBACK_LINK}"
2595+
f"Only newline-delimited JSON is supported. Add `lines=True` to your function call. {constants.FEEDBACK_LINK}"
2596+
)
2597+
2598+
if lines is True and orient != "records":
2599+
raise ValueError(
2600+
"'lines' keyword is only valid when 'orient' is 'records'."
26012601
)
26022602

26032603
result_table = self._run_io_query(

bigframes/series.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -1390,9 +1390,10 @@ def to_frame(self, name: blocks.Label = None) -> bigframes.dataframe.DataFrame:
13901390
)
13911391
return bigframes.dataframe.DataFrame(block)
13921392

1393-
def to_csv(self, path_or_buf=None, **kwargs) -> typing.Optional[str]:
1394-
# TODO(b/280651142): Implement version that leverages bq export native csv support to bypass local pandas step.
1395-
return self.to_pandas().to_csv(path_or_buf, **kwargs)
1393+
def to_csv(
1394+
self, path_or_buf: str, sep=",", *, header: bool = True, index: bool = True
1395+
) -> None:
1396+
return self.to_frame().to_csv(path_or_buf, sep=sep, header=header, index=index)
13961397

13971398
def to_dict(self, into: type[dict] = dict) -> typing.Mapping:
13981399
return typing.cast(dict, self.to_pandas().to_dict(into)) # type: ignore
@@ -1402,14 +1403,17 @@ def to_excel(self, excel_writer, sheet_name="Sheet1", **kwargs) -> None:
14021403

14031404
def to_json(
14041405
self,
1405-
path_or_buf=None,
1406+
path_or_buf: str,
14061407
orient: typing.Literal[
14071408
"split", "records", "index", "columns", "values", "table"
14081409
] = "columns",
1409-
**kwargs,
1410-
) -> typing.Optional[str]:
1411-
# TODO(b/280651142): Implement version that leverages bq export native csv support to bypass local pandas step.
1412-
return self.to_pandas().to_json(path_or_buf, **kwargs)
1410+
*,
1411+
lines: bool = False,
1412+
index: bool = True,
1413+
) -> None:
1414+
return self.to_frame().to_json(
1415+
path_or_buf=path_or_buf, orient=orient, lines=lines, index=index
1416+
)
14131417

14141418
def to_latex(
14151419
self, buf=None, columns=None, header=True, index=True, **kwargs

tests/system/small/test_series.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -2384,18 +2384,30 @@ def test_to_frame(scalars_dfs):
23842384
assert_pandas_df_equal(bf_result, pd_result)
23852385

23862386

2387-
def test_to_json(scalars_df_index, scalars_pandas_df_index):
2388-
bf_result = scalars_df_index["int64_col"].to_json()
2389-
pd_result = scalars_pandas_df_index["int64_col"].to_json()
2387+
def test_to_json(gcs_folder, scalars_df_index, scalars_pandas_df_index):
2388+
path = gcs_folder + "test_series_to_json*.jsonl"
2389+
scalars_df_index["int64_col"].to_json(path, lines=True, orient="records")
2390+
gcs_df = pd.read_json(path, lines=True)
23902391

2391-
assert bf_result == pd_result
2392+
pd.testing.assert_series_equal(
2393+
gcs_df["int64_col"].astype(pd.Int64Dtype()),
2394+
scalars_pandas_df_index["int64_col"],
2395+
check_dtype=False,
2396+
check_index=False,
2397+
)
23922398

23932399

2394-
def test_to_csv(scalars_df_index, scalars_pandas_df_index):
2395-
bf_result = scalars_df_index["int64_col"].to_csv()
2396-
pd_result = scalars_pandas_df_index["int64_col"].to_csv()
2400+
def test_to_csv(gcs_folder, scalars_df_index, scalars_pandas_df_index):
2401+
path = gcs_folder + "test_series_to_csv*.csv"
2402+
scalars_df_index["int64_col"].to_csv(path)
2403+
gcs_df = pd.read_csv(path)
23972404

2398-
assert bf_result == pd_result
2405+
pd.testing.assert_series_equal(
2406+
gcs_df["int64_col"].astype(pd.Int64Dtype()),
2407+
scalars_pandas_df_index["int64_col"],
2408+
check_dtype=False,
2409+
check_index=False,
2410+
)
23992411

24002412

24012413
def test_to_latex(scalars_df_index, scalars_pandas_df_index):

third_party/bigframes_vendored/pandas/core/generic.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def to_json(
183183
*,
184184
index: bool = True,
185185
lines: bool = False,
186-
) -> str | None:
186+
) -> None:
187187
"""Convert the object to a JSON string, written to Cloud Storage.
188188
189189
Note NaN's and None will be converted to null and datetime objects
@@ -241,7 +241,7 @@ def to_json(
241241
"""
242242
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
243243

244-
def to_csv(self, path_or_buf: str, *, index: bool = True) -> str | None:
244+
def to_csv(self, path_or_buf: str, *, index: bool = True) -> None:
245245
"""Write object to a comma-separated values (csv) file on Cloud Storage.
246246
247247
Args:

third_party/bigframes_vendored/pandas/core/series.py

-53
Original file line numberDiff line numberDiff line change
@@ -535,59 +535,6 @@ def to_xarray(self):
535535
"""
536536
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
537537

538-
def to_json(
539-
self,
540-
path_or_buf=None,
541-
orient: Literal[
542-
"split", "records", "index", "columns", "values", "table"
543-
] = "columns",
544-
**kwarg,
545-
) -> str | None:
546-
"""
547-
Convert the object to a JSON string.
548-
549-
Note NaN's and None will be converted to null and datetime objects
550-
will be converted to UNIX timestamps.
551-
552-
Args:
553-
path_or_buf (str, path object, file-like object, or None, default None):
554-
String, path object (implementing os.PathLike[str]), or file-like
555-
object implementing a write() function. If None, the result is
556-
returned as a string.
557-
orient ({"split", "records", "index", "columns", "values", "table"}, default "columns"):
558-
Indication of expected JSON string format.
559-
'split' : dict like {{'index' -> [index], 'columns' -> [columns],'data' -> [values]}}
560-
'records' : list like [{{column -> value}}, ... , {{column -> value}}]
561-
'index' : dict like {{index -> {{column -> value}}}}
562-
'columns' : dict like {{column -> {{index -> value}}}}
563-
'values' : just the values array
564-
'table' : dict like {{'schema': {{schema}}, 'data': {{data}}}}
565-
Describing the data, where data component is like ``orient='records'``.
566-
567-
Returns:
568-
None or str: If path_or_buf is None, returns the resulting json format as a
569-
string. Otherwise returns None.
570-
"""
571-
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
572-
573-
def to_csv(self, path_or_buf: str, *, index: bool = True) -> str | None:
574-
"""
575-
Write object to a comma-separated values (csv) file.
576-
577-
Args:
578-
path_or_buf (str, path object, file-like object, or None, default None):
579-
String, path object (implementing os.PathLike[str]), or file-like
580-
object implementing a write() function. If None, the result is
581-
returned as a string. If a non-binary file object is passed, it should
582-
be opened with `newline=''`, disabling universal newlines. If a binary
583-
file object is passed, `mode` might need to contain a `'b'`.
584-
585-
Returns:
586-
None or str: If path_or_buf is None, returns the resulting csv format
587-
as a string. Otherwise returns None.
588-
"""
589-
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
590-
591538
def agg(self, func):
592539
"""
593540
Aggregate using one or more operations over the specified axis.

0 commit comments

Comments
 (0)