Skip to content

Commit e9fe815

Browse files
authored
feat: add GeoSeries.difference() and bigframes.bigquery.st_difference() (#1471)
* feat: add GeoSeries.difference() * add st_difference method and test cases for geo.difference and st_difference * update method and tests * update method and tests * update test to cover different inputs * update test assertion * update testing with single geometry object and add notebook tag to ignore the exception * fix docstrings * modify geo_difference to make it available for use and update tests and notebook * fix my py type error
1 parent 08ea02c commit e9fe815

File tree

11 files changed

+758
-100
lines changed

11 files changed

+758
-100
lines changed

bigframes/bigquery/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
unix_millis,
2828
unix_seconds,
2929
)
30-
from bigframes.bigquery._operations.geo import st_area
30+
from bigframes.bigquery._operations.geo import st_area, st_difference
3131
from bigframes.bigquery._operations.json import (
3232
json_extract,
3333
json_extract_array,
@@ -48,6 +48,7 @@
4848
"array_to_string",
4949
# geo ops
5050
"st_area",
51+
"st_difference",
5152
# json ops
5253
"json_set",
5354
"json_extract",

bigframes/bigquery/_operations/geo.py

+120
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from __future__ import annotations
1616

1717
from bigframes import operations as ops
18+
import bigframes.dtypes
1819
import bigframes.geopandas
1920
import bigframes.series
2021

@@ -91,3 +92,122 @@ def st_area(series: bigframes.series.Series) -> bigframes.series.Series:
9192
series = series._apply_unary_op(ops.geo_area_op)
9293
series.name = None
9394
return series
95+
96+
97+
def st_difference(
98+
series: bigframes.series.Series, other: bigframes.series.Series
99+
) -> bigframes.series.Series:
100+
"""
101+
Returns a GEOGRAPHY that represents the point set difference of
102+
`geography_1` and `geography_2`. Therefore, the result consists of the part
103+
of `geography_1` that doesn't intersect with `geography_2`.
104+
105+
If `geometry_1` is completely contained in `geometry_2`, then ST_DIFFERENCE
106+
returns an empty GEOGRAPHY.
107+
108+
..note::
109+
BigQuery's Geography functions, like `st_difference`, interpret the geometry
110+
data type as a point set on the Earth's surface. A point set is a set
111+
of points, lines, and polygons on the WGS84 reference spheroid, with
112+
geodesic edges. See: https://cloud.google.com/bigquery/docs/geospatial-data
113+
114+
**Examples:**
115+
116+
>>> import bigframes as bpd
117+
>>> import bigframes.bigquery as bbq
118+
>>> import bigframes.geopandas
119+
>>> from shapely.geometry import Polygon, LineString, Point
120+
>>> bpd.options.display.progress_bar = None
121+
122+
We can check two GeoSeries against each other, row by row.
123+
124+
>>> s1 = bigframes.geopandas.GeoSeries(
125+
... [
126+
... Polygon([(0, 0), (2, 2), (0, 2)]),
127+
... Polygon([(0, 0), (2, 2), (0, 2)]),
128+
... LineString([(0, 0), (2, 2)]),
129+
... LineString([(2, 0), (0, 2)]),
130+
... Point(0, 1),
131+
... ],
132+
... )
133+
>>> s2 = bigframes.geopandas.GeoSeries(
134+
... [
135+
... Polygon([(0, 0), (1, 1), (0, 1)]),
136+
... LineString([(1, 0), (1, 3)]),
137+
... LineString([(2, 0), (0, 2)]),
138+
... Point(1, 1),
139+
... Point(0, 1),
140+
... ],
141+
... index=range(1, 6),
142+
... )
143+
144+
>>> s1
145+
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
146+
1 POLYGON ((0 0, 2 2, 0 2, 0 0))
147+
2 LINESTRING (0 0, 2 2)
148+
3 LINESTRING (2 0, 0 2)
149+
4 POINT (0 1)
150+
dtype: geometry
151+
152+
>>> s2
153+
1 POLYGON ((0 0, 1 1, 0 1, 0 0))
154+
2 LINESTRING (1 0, 1 3)
155+
3 LINESTRING (2 0, 0 2)
156+
4 POINT (1 1)
157+
5 POINT (0 1)
158+
dtype: geometry
159+
160+
>>> bbq.st_difference(s1, s2)
161+
0 None
162+
1 POLYGON ((0.99954 1, 2 2, 0 2, 0 1, 0.99954 1))
163+
2 LINESTRING (0 0, 1 1.00046, 2 2)
164+
3 GEOMETRYCOLLECTION EMPTY
165+
4 POINT (0 1)
166+
5 None
167+
dtype: geometry
168+
169+
We can also check difference of single shapely geometries:
170+
171+
>>> sbq1 = bigframes.geopandas.GeoSeries(
172+
... [
173+
... Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])
174+
... ]
175+
... )
176+
>>> sbq2 = bigframes.geopandas.GeoSeries(
177+
... [
178+
... Polygon([(4, 2), (6, 2), (8, 6), (4, 2)])
179+
... ]
180+
... )
181+
182+
>>> sbq1
183+
0 POLYGON ((0 0, 10 0, 10 10, 0 0))
184+
dtype: geometry
185+
186+
>>> sbq2
187+
0 POLYGON ((4 2, 6 2, 8 6, 4 2))
188+
dtype: geometry
189+
190+
>>> bbq.st_difference(sbq1, sbq2)
191+
0 POLYGON ((0 0, 10 0, 10 10, 0 0), (8 6, 6 2, 4...
192+
dtype: geometry
193+
194+
Additionally, we can check difference of a GeoSeries against a single shapely geometry:
195+
196+
>>> bbq.st_difference(s1, sbq2)
197+
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
198+
1 None
199+
2 None
200+
3 None
201+
4 None
202+
dtype: geometry
203+
204+
Args:
205+
other (bigframes.series.Series or geometric object):
206+
The GeoSeries (elementwise) or geometric object to find the difference to.
207+
208+
Returns:
209+
bigframes.series.Series:
210+
A GeoSeries of the points in each aligned geometry that are not
211+
in other.
212+
"""
213+
return series._apply_binary_op(other, ops.geo_st_difference_op)

bigframes/core/compile/scalar_op_compiler.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -1001,11 +1001,6 @@ def normalize_op_impl(x: ibis_types.Value):
10011001

10021002

10031003
# Geo Ops
1004-
@scalar_op_compiler.register_unary_op(ops.geo_st_boundary_op, pass_op=False)
1005-
def geo_st_boundary_op_impl(x: ibis_types.Value):
1006-
return st_boundary(x)
1007-
1008-
10091004
@scalar_op_compiler.register_unary_op(ops.geo_area_op)
10101005
def geo_area_op_impl(x: ibis_types.Value):
10111006
return typing.cast(ibis_types.GeoSpatialValue, x).area()
@@ -1016,6 +1011,18 @@ def geo_st_astext_op_impl(x: ibis_types.Value):
10161011
return typing.cast(ibis_types.GeoSpatialValue, x).as_text()
10171012

10181013

1014+
@scalar_op_compiler.register_unary_op(ops.geo_st_boundary_op, pass_op=False)
1015+
def geo_st_boundary_op_impl(x: ibis_types.Value):
1016+
return st_boundary(x)
1017+
1018+
1019+
@scalar_op_compiler.register_binary_op(ops.geo_st_difference_op, pass_op=False)
1020+
def geo_st_difference_op_impl(x: ibis_types.Value, y: ibis_types.Value):
1021+
return typing.cast(ibis_types.GeoSpatialValue, x).difference(
1022+
typing.cast(ibis_types.GeoSpatialValue, y)
1023+
)
1024+
1025+
10191026
@scalar_op_compiler.register_unary_op(ops.geo_st_geogfromtext_op)
10201027
def geo_st_geogfromtext_op_impl(x: ibis_types.Value):
10211028
# Ibis doesn't seem to provide a dedicated method to cast from string to geography,

bigframes/geopandas/geoseries.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def area(self, crs=None) -> bigframes.series.Series: # type: ignore
6262
6363
Raises:
6464
NotImplementedError:
65-
GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), insetead.
65+
GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead.
6666
"""
6767
raise NotImplementedError(
6868
f"GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. {constants.FEEDBACK_LINK}"
@@ -93,3 +93,6 @@ def to_wkt(self: GeoSeries) -> bigframes.series.Series:
9393
series = self._apply_unary_op(ops.geo_st_astext_op)
9494
series.name = None
9595
return series
96+
97+
def difference(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # type: ignore
98+
return self._apply_binary_op(other, ops.geo_st_difference_op)

bigframes/operations/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
geo_area_op,
9191
geo_st_astext_op,
9292
geo_st_boundary_op,
93+
geo_st_difference_op,
9394
geo_st_geogfromtext_op,
9495
geo_st_geogpoint_op,
9596
geo_x_op,
@@ -366,6 +367,7 @@
366367
# Geo ops
367368
"geo_area_op",
368369
"geo_st_boundary_op",
370+
"geo_st_difference_op",
369371
"geo_st_astext_op",
370372
"geo_st_geogfromtext_op",
371373
"geo_st_geogpoint_op",

bigframes/operations/geo_ops.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,17 @@
3737
),
3838
)
3939

40+
geo_st_difference_op = base_ops.create_binary_op(
41+
name="geo_st_difference", type_signature=op_typing.BinaryGeo()
42+
)
43+
4044
geo_st_geogfromtext_op = base_ops.create_unary_op(
4145
name="geo_st_geogfromtext",
4246
type_signature=op_typing.FixedOutputType(
4347
dtypes.is_string_like, dtypes.GEO_DTYPE, description="string-like"
4448
),
4549
)
4650

47-
4851
geo_st_geogpoint_op = base_ops.create_binary_op(
4952
name="geo_st_geogpoint", type_signature=op_typing.BinaryNumericGeo()
5053
)

bigframes/operations/type.py

+14
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,20 @@ def output_type(
122122

123123

124124
@dataclasses.dataclass
125+
@dataclasses.dataclass
126+
class BinaryGeo(BinaryTypeSignature):
127+
"""Type signature for geo functions like difference that can map geo to geo."""
128+
129+
def output_type(
130+
self, left_type: ExpressionType, right_type: ExpressionType
131+
) -> ExpressionType:
132+
if (left_type is not None) and not bigframes.dtypes.is_geo_like(left_type):
133+
raise TypeError(f"Type {left_type} is not geo")
134+
if (right_type is not None) and not bigframes.dtypes.is_geo_like(right_type):
135+
raise TypeError(f"Type {right_type} is not numeric")
136+
return bigframes.dtypes.GEO_DTYPE
137+
138+
125139
class BinaryNumericGeo(BinaryTypeSignature):
126140
"""Type signature for geo functions like from_xy that can map ints to ints."""
127141

0 commit comments

Comments
 (0)