Skip to content

Commit e2e100a

Browse files
authored
fix: sanitize big-ints in table manager (#4468)
Fixes #4465
1 parent 34434e5 commit e2e100a

File tree

12 files changed

+285
-59
lines changed

12 files changed

+285
-59
lines changed

marimo/_output/data/data.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import base64
55
import io
6-
from typing import Union
6+
from typing import Any, Union
77

88
from marimo._plugins.core.media import is_data_empty
99
from marimo._runtime.virtual_file import (
@@ -171,3 +171,37 @@ def any_data(data: Union[str, bytes, io.BytesIO], ext: str) -> VirtualFile:
171171
return item.virtual_file
172172

173173
raise ValueError(f"Unsupported data type: {type(data)}")
174+
175+
176+
def sanitize_json_bigint(
177+
data: Union[str, dict[str, Any], list[dict[str, Any]]],
178+
) -> str:
179+
"""Sanitize JSON bigint to a string.
180+
181+
This is necessary because the frontend will round ints larger than
182+
Number.MAX_SAFE_INTEGER to Number.MAX_SAFE_INTEGER.
183+
"""
184+
from json import dumps, loads
185+
186+
# JavaScript's safe integer limits
187+
MAX_SAFE_INTEGER = 9007199254740991
188+
MIN_SAFE_INTEGER = -9007199254740991
189+
190+
def convert_bigint(obj: Any) -> Any:
191+
if isinstance(obj, dict):
192+
return {k: convert_bigint(v) for k, v in obj.items()} # type: ignore
193+
elif isinstance(obj, list):
194+
return [convert_bigint(item) for item in obj] # type: ignore
195+
elif isinstance(obj, int) and (
196+
obj > MAX_SAFE_INTEGER or obj < MIN_SAFE_INTEGER
197+
):
198+
return str(obj)
199+
else:
200+
return obj
201+
202+
if isinstance(data, str):
203+
as_json = loads(data)
204+
else:
205+
as_json = data
206+
207+
return dumps(convert_bigint(as_json), indent=None, separators=(",", ":"))

marimo/_plugins/ui/_impl/tables/default_table.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -88,20 +88,24 @@ def to_data(
8888
) -> JSONType:
8989
return self._normalize_data(self.apply_formatting(format_mapping).data)
9090

91-
def to_csv(self, format_mapping: Optional[FormatMapping] = None) -> bytes:
91+
def to_csv_str(
92+
self, format_mapping: Optional[FormatMapping] = None
93+
) -> str:
9294
if isinstance(self.data, dict) and not self.is_column_oriented:
93-
return DefaultTableManager(self._normalize_data(self.data)).to_csv(
94-
format_mapping
95-
)
95+
return DefaultTableManager(
96+
self._normalize_data(self.data)
97+
).to_csv_str(format_mapping)
9698

97-
return self._as_table_manager().to_csv(format_mapping)
99+
return self._as_table_manager().to_csv_str(format_mapping)
98100

99-
def to_json(self, format_mapping: Optional[FormatMapping] = None) -> bytes:
101+
def to_json_str(
102+
self, format_mapping: Optional[FormatMapping] = None
103+
) -> str:
100104
if isinstance(self.data, dict) and not self.is_column_oriented:
101105
return DefaultTableManager(
102106
self._normalize_data(self.data)
103-
).to_json(format_mapping)
104-
return self._as_table_manager().to_json(format_mapping)
107+
).to_json_str(format_mapping)
108+
return self._as_table_manager().to_json_str(format_mapping)
105109

106110
def select_rows(self, indices: list[int]) -> DefaultTableManager:
107111
if isinstance(self.data, dict):

marimo/_plugins/ui/_impl/tables/ibis_table.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,15 @@ def create() -> type[TableManager[Any]]:
4040
class IbisTableManager(TableManager[ibis.Table]):
4141
type = "ibis"
4242

43-
def to_csv(
43+
def to_csv_str(
4444
self, format_mapping: Optional[FormatMapping] = None
45-
) -> bytes:
46-
return self._as_table_manager().to_csv(format_mapping)
45+
) -> str:
46+
return self._as_table_manager().to_csv_str(format_mapping)
4747

48-
def to_json(
48+
def to_json_str(
4949
self, format_mapping: Optional[FormatMapping] = None
50-
) -> bytes:
51-
return self._as_table_manager().to_json(format_mapping)
50+
) -> str:
51+
return self._as_table_manager().to_json_str(format_mapping)
5252

5353
def supports_download(self) -> bool:
5454
return False

marimo/_plugins/ui/_impl/tables/narwhals_table.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# Copyright 2024 Marimo. All rights reserved.
22
from __future__ import annotations
33

4-
import json
54
from functools import cached_property
65
from typing import Any, Optional, Union, cast
76

@@ -11,6 +10,7 @@
1110
from marimo import _loggers
1211
from marimo._data.models import ColumnSummary, ExternalDataType
1312
from marimo._dependencies.dependencies import DependencyManager
13+
from marimo._output.data.data import sanitize_json_bigint
1414
from marimo._plugins.core.media import io_to_data_url
1515
from marimo._plugins.ui._impl.tables.format import (
1616
FormatMapping,
@@ -60,28 +60,28 @@ def with_new_data(
6060
# of the subclass with the native data.
6161
return self.__class__(data.to_native())
6262

63-
def to_csv(
63+
def to_csv_str(
6464
self,
6565
format_mapping: Optional[FormatMapping] = None,
66-
) -> bytes:
66+
) -> str:
6767
_data = self.apply_formatting(format_mapping).as_frame()
68-
return dataframe_to_csv(_data).encode("utf-8")
68+
return dataframe_to_csv(_data)
6969

70-
def to_json(self, format_mapping: Optional[FormatMapping] = None) -> bytes:
70+
def to_json_str(
71+
self, format_mapping: Optional[FormatMapping] = None
72+
) -> str:
7173
try:
72-
csv_str = self.to_csv(format_mapping=format_mapping).decode(
73-
"utf-8"
74-
)
74+
csv_str = self.to_csv_str(format_mapping=format_mapping)
7575
except Exception as e:
7676
LOGGER.debug(
7777
f"Failed to use format mapping: {str(e)}, falling back to default"
7878
)
79-
csv_str = self.to_csv().decode("utf-8")
79+
csv_str = self.to_csv_str()
8080

8181
import csv
8282

8383
csv_reader = csv.DictReader(csv_str.splitlines())
84-
return json.dumps([row for row in csv_reader]).encode("utf-8")
84+
return sanitize_json_bigint([row for row in csv_reader])
8585

8686
def apply_formatting(
8787
self, format_mapping: Optional[FormatMapping]

marimo/_plugins/ui/_impl/tables/pandas_table.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from marimo import _loggers
1111
from marimo._data.models import ExternalDataType
12+
from marimo._output.data.data import sanitize_json_bigint
1213
from marimo._plugins.ui._impl.tables.format import (
1314
FormatMapping,
1415
format_value,
@@ -44,30 +45,28 @@ def __init__(self, data: pd.DataFrame) -> None:
4445
def schema(self) -> pd.Series[Any]:
4546
return self._original_data.dtypes # type: ignore
4647

47-
# We override narwhals's to_csv to handle pandas
48+
# We override narwhals's to_csv_str to handle pandas
4849
# headers
49-
def to_csv(
50+
def to_csv_str(
5051
self, format_mapping: Optional[FormatMapping] = None
51-
) -> bytes:
52+
) -> str:
5253
has_headers = len(self.get_row_headers()) > 0
5354
# Pandas omits H:M:S for datetimes when H:M:S is identically
5455
# 0; this doesn't play well with our frontend table component,
5556
# so we use an explicit date format.
56-
return (
57-
self.apply_formatting(format_mapping)
58-
._original_data.to_csv(
59-
# By adding %H:%M:%S and %z, we ensure that the
60-
# datetime is displayed in the frontend with the
61-
# correct timezone.
62-
index=has_headers,
63-
date_format="%Y-%m-%d %H:%M:%S%z",
64-
)
65-
.encode("utf-8")
57+
return self.apply_formatting(
58+
format_mapping
59+
)._original_data.to_csv(
60+
# By adding %H:%M:%S and %z, we ensure that the
61+
# datetime is displayed in the frontend with the
62+
# correct timezone.
63+
index=has_headers,
64+
date_format="%Y-%m-%d %H:%M:%S%z",
6665
)
6766

68-
def to_json(
67+
def to_json_str(
6968
self, format_mapping: Optional[FormatMapping] = None
70-
) -> bytes:
69+
) -> str:
7170
from pandas.api.types import (
7271
is_complex_dtype,
7372
is_object_dtype,
@@ -98,7 +97,9 @@ def to_json(
9897
"Error handling complex or timedelta64 dtype",
9998
exc_info=e,
10099
)
101-
return result.to_json(orient="records").encode("utf-8")
100+
return sanitize_json_bigint(
101+
result.to_json(orient="records")
102+
)
102103

103104
# Flatten row multi-index
104105
if isinstance(result.index, pd.MultiIndex) or (
@@ -120,10 +121,9 @@ def to_json(
120121
"Indexes with more than one level are not supported properly, call reset_index() to flatten"
121122
)
122123

123-
return result.to_json(
124-
orient="records",
125-
date_format="iso",
126-
).encode("utf-8")
124+
return sanitize_json_bigint(
125+
result.to_json(orient="records", date_format="iso")
126+
)
127127

128128
def to_arrow_ipc(self) -> bytes:
129129
out = io.BytesIO()

marimo/_plugins/ui/_impl/tables/polars_table.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from marimo._data.models import (
1212
ExternalDataType,
1313
)
14+
from marimo._output.data.data import sanitize_json_bigint
1415
from marimo._plugins.ui._impl.tables.format import (
1516
FormatMapping,
1617
format_value,
@@ -64,13 +65,13 @@ def to_arrow_ipc(self) -> bytes:
6465

6566
# We override narwhals's to_csv to handle polars
6667
# nested data types.
67-
def to_csv(
68+
def to_csv_str(
6869
self,
6970
format_mapping: Optional[FormatMapping] = None,
70-
) -> bytes:
71+
) -> str:
7172
_data = self.apply_formatting(format_mapping).collect()
7273
try:
73-
return _data.write_csv().encode("utf-8")
74+
return _data.write_csv()
7475
except pl.exceptions.ComputeError:
7576
# Likely CSV format does not support nested data or objects
7677
# Try to convert columns to json or strings
@@ -99,11 +100,11 @@ def to_csv(
99100
result = self._convert_time_to_string(
100101
result, column
101102
)
102-
return result.write_csv().encode("utf-8")
103+
return result.write_csv()
103104

104-
def to_json(
105+
def to_json_str(
105106
self, format_mapping: Optional[FormatMapping] = None
106-
) -> bytes:
107+
) -> str:
107108
result = self.apply_formatting(format_mapping).collect()
108109
try:
109110
for column in result.get_columns():
@@ -112,7 +113,7 @@ def to_json(
112113
result = self._convert_time_to_string(
113114
result, column
114115
)
115-
return result.write_json().encode("utf-8")
116+
return sanitize_json_bigint(result.write_json())
116117
except (
117118
BaseException
118119
): # Sometimes, polars throws a generic exception
@@ -140,7 +141,7 @@ def to_json(
140141
result, column
141142
)
142143

143-
return result.write_json().encode("utf-8")
144+
return sanitize_json_bigint(result.write_json())
144145

145146
def _convert_time_to_string(
146147
self, result: pl.DataFrame, column: pl.Series

marimo/_plugins/ui/_impl/tables/table_manager.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,19 +94,30 @@ def sort_values(
9494
pass
9595

9696
@abc.abstractmethod
97+
def to_csv_str(
98+
self,
99+
format_mapping: Optional[FormatMapping] = None,
100+
) -> str:
101+
pass
102+
97103
def to_csv(
98104
self,
99105
format_mapping: Optional[FormatMapping] = None,
100106
) -> bytes:
101-
pass
107+
return self.to_csv_str(format_mapping).encode("utf-8")
102108

103109
def to_arrow_ipc(self) -> bytes:
104110
raise NotImplementedError("Arrow format not supported")
105111

106112
@abc.abstractmethod
107-
def to_json(self, format_mapping: Optional[FormatMapping] = None) -> bytes:
113+
def to_json_str(
114+
self, format_mapping: Optional[FormatMapping] = None
115+
) -> str:
108116
pass
109117

118+
def to_json(self, format_mapping: Optional[FormatMapping] = None) -> bytes:
119+
return self.to_json_str(format_mapping).encode("utf-8")
120+
110121
@abc.abstractmethod
111122
def select_rows(self, indices: list[int]) -> TableManager[Any]:
112123
pass
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
2+
3+
import marimo
4+
5+
__generated_with = "0.12.7"
6+
app = marimo.App(width="medium")
7+
8+
9+
@app.cell(hide_code=True)
10+
def _(mo):
11+
mo.md(r"""## Big Ints""")
12+
return
13+
14+
15+
@app.cell(hide_code=True)
16+
def _():
17+
data = {
18+
"int": [1, 2, 3],
19+
"bigint_1": [
20+
1000000000000000000,
21+
1000000000000000001,
22+
1000000000000000002,
23+
],
24+
"bigint_2": [
25+
2000000000000000000,
26+
2000000000000000001,
27+
2000000000000000002,
28+
],
29+
"bigint_3": [
30+
3000000000000000000,
31+
3000000000000000001,
32+
3000000000000000002,
33+
],
34+
}
35+
return (data,)
36+
37+
38+
@app.cell
39+
def _(data):
40+
import pandas as pd
41+
42+
pd.DataFrame(data)
43+
return
44+
45+
46+
@app.cell
47+
def _(data):
48+
import polars as pl
49+
50+
pl.DataFrame(data)
51+
return
52+
53+
54+
@app.cell
55+
def _():
56+
import marimo as mo
57+
return (mo,)
58+
59+
60+
if __name__ == "__main__":
61+
app.run()

0 commit comments

Comments
 (0)