Skip to content

Commit ae4ff8d

Browse files
refactor: Add type constraints to internal op definitions. (#532)
1 parent ae528d7 commit ae4ff8d

File tree

5 files changed

+472
-147
lines changed

5 files changed

+472
-147
lines changed

bigframes/core/nodes.py

+6
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,12 @@ def __hash__(self):
318318
class ProjectionNode(UnaryNode):
319319
assignments: typing.Tuple[typing.Tuple[ex.Expression, str], ...]
320320

321+
def __post_init__(self):
322+
input_types = self.child.schema._mapping
323+
for expression, id in self.assignments:
324+
# throws TypeError if invalid
325+
_ = expression.output_type(input_types)
326+
321327
def __hash__(self):
322328
return self._node_hash
323329

bigframes/dtypes.py

+45-2
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,19 @@
4747
# None represents the type of a None scalar.
4848
ExpressionType = typing.Optional[Dtype]
4949

50-
# Used when storing Null expressions
51-
DEFAULT_DTYPE = pd.Float64Dtype()
5250

5351
INT_DTYPE = pd.Int64Dtype()
5452
FLOAT_DTYPE = pd.Float64Dtype()
5553
BOOL_DTYPE = pd.BooleanDtype()
5654
STRING_DTYPE = pd.StringDtype(storage="pyarrow")
55+
BYTES_DTYPE = pd.ArrowDtype(pa.binary())
56+
DATE_DTYPE = pd.ArrowDtype(pa.date32())
57+
TIME_DTYPE = pd.ArrowDtype(pa.time64("us"))
58+
DATETIME_DTYPE = pd.ArrowDtype(pa.timestamp("us"))
59+
TIMESTAMP_DTYPE = pd.ArrowDtype(pa.timestamp("us", tz="UTC"))
60+
61+
# Used when storing Null expressions
62+
DEFAULT_DTYPE = FLOAT_DTYPE
5763

5864
# On BQ side, ARRAY, STRUCT, GEOGRAPHY, JSON are not orderable
5965
UNORDERED_DTYPES = [gpd.array.GeometryDtype()]
@@ -100,6 +106,43 @@
100106
pd.ArrowDtype(pa.decimal256(76, 38)),
101107
]
102108

109+
110+
## dtype predicates - use these to maintain consistency
111+
def is_datetime_like(type: ExpressionType) -> bool:
112+
return type in (DATETIME_DTYPE, TIMESTAMP_DTYPE)
113+
114+
115+
def is_date_like(type: ExpressionType) -> bool:
116+
return type in (DATETIME_DTYPE, TIMESTAMP_DTYPE, DATE_DTYPE)
117+
118+
119+
def is_time_like(type: ExpressionType) -> bool:
120+
return type in (DATETIME_DTYPE, TIMESTAMP_DTYPE, TIME_DTYPE)
121+
122+
123+
def is_binary_like(type: ExpressionType) -> bool:
124+
return type in (BOOL_DTYPE, BYTES_DTYPE, INT_DTYPE)
125+
126+
127+
def is_string_like(type: ExpressionType) -> bool:
128+
return type in (STRING_DTYPE, BYTES_DTYPE)
129+
130+
131+
def is_array_like(type: ExpressionType) -> bool:
132+
if isinstance(type, pd.ArrowDtype) and isinstance(type.pyarrow_dtype, pa.ListType):
133+
return True
134+
else:
135+
return type in (STRING_DTYPE, BYTES_DTYPE)
136+
137+
138+
def is_numeric(type: ExpressionType) -> bool:
139+
return type in NUMERIC_BIGFRAMES_TYPES_PERMISSIVE
140+
141+
142+
def is_comparable(type: ExpressionType) -> bool:
143+
return (type is not None) and (type not in UNORDERED_DTYPES)
144+
145+
103146
# Type hints for Ibis data types that can be read to Python objects by BigQuery DataFrame
104147
ReadOnlyIbisDtype = Union[
105148
ibis_dtypes.Binary,

0 commit comments

Comments
 (0)