|
29 | 29 |
|
30 | 30 | import bigframes.constants as constants
|
31 | 31 | import third_party.bigframes_vendored.google_cloud_bigquery._pandas_helpers as gcb3p_pandas_helpers
|
| 32 | +import third_party.bigframes_vendored.ibis.expr.operations as vendored_ibis_ops |
32 | 33 |
|
33 | 34 | # Type hints for Pandas dtypes supported by BigQuery DataFrame
|
34 | 35 | Dtype = Union[
|
|
96 | 97 | ibis_dtypes.Timestamp(timezone="UTC"),
|
97 | 98 | pd.ArrowDtype(pa.timestamp("us", tz="UTC")),
|
98 | 99 | ),
|
| 100 | + (ibis_dtypes.binary, pd.ArrowDtype(pa.binary())), |
| 101 | + ( |
| 102 | + ibis_dtypes.Decimal(precision=38, scale=9, nullable=True), |
| 103 | + pd.ArrowDtype(pa.decimal128(38, 9)), |
| 104 | + ), |
| 105 | + ( |
| 106 | + ibis_dtypes.Decimal(precision=76, scale=38, nullable=True), |
| 107 | + pd.ArrowDtype(pa.decimal256(76, 38)), |
| 108 | + ), |
99 | 109 | )
|
100 | 110 |
|
101 | 111 | BIGFRAMES_TO_IBIS: Dict[Dtype, ibis_dtypes.DataType] = {
|
|
111 | 121 | ibis_dtypes.time: pa.time64("us"),
|
112 | 122 | ibis_dtypes.Timestamp(timezone=None): pa.timestamp("us"),
|
113 | 123 | ibis_dtypes.Timestamp(timezone="UTC"): pa.timestamp("us", tz="UTC"),
|
| 124 | + ibis_dtypes.binary: pd.ArrowDtype(pa.binary()), |
| 125 | + ibis_dtypes.Decimal(precision=38, scale=9, nullable=True): pd.ArrowDtype( |
| 126 | + pa.decimal128(38, 9) |
| 127 | + ), |
| 128 | + ibis_dtypes.Decimal(precision=76, scale=38, nullable=True): pd.ArrowDtype( |
| 129 | + pa.decimal256(76, 38) |
| 130 | + ), |
114 | 131 | }
|
115 | 132 |
|
116 | 133 | ARROW_TO_IBIS = {arrow: ibis for ibis, arrow in IBIS_TO_ARROW.items()}
|
|
124 | 141 | )
|
125 | 142 | IBIS_TO_BIGFRAMES.update(
|
126 | 143 | {
|
127 |
| - ibis_dtypes.binary: np.dtype("O"), |
128 |
| - ibis_dtypes.json: np.dtype("O"), |
129 |
| - ibis_dtypes.Decimal(precision=38, scale=9, nullable=True): np.dtype("O"), |
130 |
| - ibis_dtypes.Decimal(precision=76, scale=38, nullable=True): np.dtype("O"), |
131 | 144 | ibis_dtypes.GeoSpatial(
|
132 | 145 | geotype="geography", srid=4326, nullable=True
|
133 | 146 | ): gpd.array.GeometryDtype(),
|
@@ -177,7 +190,7 @@ def ibis_dtype_to_bigframes_dtype(
|
177 | 190 | # our IO returns them as objects. Eventually, we should support them as
|
178 | 191 | # ArrowDType (and update the IO accordingly)
|
179 | 192 | if isinstance(ibis_dtype, ibis_dtypes.Array):
|
180 |
| - return np.dtype("O") |
| 193 | + return pd.ArrowDtype(ibis_dtype_to_arrow_dtype(ibis_dtype)) |
181 | 194 |
|
182 | 195 | if isinstance(ibis_dtype, ibis_dtypes.Struct):
|
183 | 196 | return pd.ArrowDtype(ibis_dtype_to_arrow_dtype(ibis_dtype))
|
@@ -223,21 +236,13 @@ def ibis_value_to_canonical_type(value: ibis_types.Value) -> ibis_types.Value:
|
223 | 236 | This is useful in cases where multiple types correspond to the same BigFrames dtype.
|
224 | 237 | """
|
225 | 238 | ibis_type = value.type()
|
| 239 | + name = value.get_name() |
| 240 | + if ibis_type.is_json(): |
| 241 | + value = vendored_ibis_ops.ToJsonString(value).to_expr() |
| 242 | + return value.name(name) |
226 | 243 | # Allow REQUIRED fields to be joined with NULLABLE fields.
|
227 | 244 | nullable_type = ibis_type.copy(nullable=True)
|
228 |
| - return value.cast(nullable_type).name(value.get_name()) |
229 |
| - |
230 |
| - |
231 |
| -def ibis_table_to_canonical_types(table: ibis_types.Table) -> ibis_types.Table: |
232 |
| - """Converts an Ibis table expression to canonical types. |
233 |
| -
|
234 |
| - This is useful in cases where multiple types correspond to the same BigFrames dtype. |
235 |
| - """ |
236 |
| - casted_columns = [] |
237 |
| - for column_name in table.columns: |
238 |
| - column = typing.cast(ibis_types.Value, table[column_name]) |
239 |
| - casted_columns.append(ibis_value_to_canonical_type(column)) |
240 |
| - return table.select(*casted_columns) |
| 245 | + return value.cast(nullable_type).name(name) |
241 | 246 |
|
242 | 247 |
|
243 | 248 | def arrow_dtype_to_ibis_dtype(arrow_dtype: pa.DataType) -> ibis_dtypes.DataType:
|
|
0 commit comments