|
24 | 24 | import dataclasses
|
25 | 25 | import functools
|
26 | 26 | import itertools
|
| 27 | +import os |
27 | 28 | import random
|
28 | 29 | import typing
|
29 | 30 | from typing import Iterable, List, Literal, Mapping, Optional, Sequence, Tuple
|
|
41 | 42 | import bigframes.core.guid as guid
|
42 | 43 | import bigframes.core.join_def as join_defs
|
43 | 44 | import bigframes.core.ordering as ordering
|
| 45 | +import bigframes.core.schema as bf_schema |
44 | 46 | import bigframes.core.tree_properties as tree_properties
|
45 | 47 | import bigframes.core.utils
|
46 | 48 | import bigframes.core.utils as utils
|
47 | 49 | import bigframes.dtypes
|
| 50 | +import bigframes.features |
48 | 51 | import bigframes.operations as ops
|
49 | 52 | import bigframes.operations.aggregations as agg_ops
|
50 | 53 | import bigframes.session._io.pandas
|
@@ -411,7 +414,32 @@ def _to_dataframe(self, result) -> pd.DataFrame:
|
411 | 414 | """Convert BigQuery data to pandas DataFrame with specific dtypes."""
|
412 | 415 | dtypes = dict(zip(self.index_columns, self.index.dtypes))
|
413 | 416 | dtypes.update(zip(self.value_columns, self.dtypes))
|
414 |
| - return self.session._rows_to_dataframe(result, dtypes) |
| 417 | + result_dataframe = self.session._rows_to_dataframe(result, dtypes) |
| 418 | + # Runs strict validations to ensure internal type predictions and ibis are completely in sync |
| 419 | + # Do not execute these validations outside of testing suite. |
| 420 | + if "PYTEST_CURRENT_TEST" in os.environ: |
| 421 | + self._validate_result_schema(result_dataframe) |
| 422 | + return result_dataframe |
| 423 | + |
| 424 | + def _validate_result_schema(self, result_df: pd.DataFrame): |
| 425 | + ibis_schema = self.expr._compiled_schema |
| 426 | + internal_schema = self.expr.node.schema |
| 427 | + actual_schema = bf_schema.ArraySchema( |
| 428 | + tuple( |
| 429 | + bf_schema.SchemaItem(name, dtype) # type: ignore |
| 430 | + for name, dtype in result_df.dtypes.items() |
| 431 | + ) |
| 432 | + ) |
| 433 | + if not bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable: |
| 434 | + return |
| 435 | + if internal_schema != actual_schema: |
| 436 | + raise ValueError( |
| 437 | + f"This error should only occur while testing. BigFrames internal schema: {internal_schema} does not match actual schema: {actual_schema}" |
| 438 | + ) |
| 439 | + if ibis_schema != actual_schema: |
| 440 | + raise ValueError( |
| 441 | + f"This error should only occur while testing. Ibis schema: {ibis_schema} does not match actual schema: {actual_schema}" |
| 442 | + ) |
415 | 443 |
|
416 | 444 | def to_pandas(
|
417 | 445 | self,
|
@@ -1204,7 +1232,7 @@ def _standard_stats(self, column_id) -> typing.Sequence[agg_ops.UnaryAggregateOp
|
1204 | 1232 | # TODO: annotate aggregations themself with this information
|
1205 | 1233 | dtype = self.expr.get_column_type(column_id)
|
1206 | 1234 | stats: list[agg_ops.UnaryAggregateOp] = [agg_ops.count_op]
|
1207 |
| - if dtype not in bigframes.dtypes.UNORDERED_DTYPES: |
| 1235 | + if bigframes.dtypes.is_orderable(dtype): |
1208 | 1236 | stats += [agg_ops.min_op, agg_ops.max_op]
|
1209 | 1237 | if dtype in bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE:
|
1210 | 1238 | # Notable exclusions:
|
|
0 commit comments