Skip to content

Commit ac74a34

Browse files
authoredDec 19, 2023
fix: Avoid implicit join when using join with unnest (#924)
* fix: avoid implicit join when using join with unnest When using JOIN with UNNEST statements, and then creating a SELECT statement based on it, the UNNESTed table will appear twice in the FROM clause, causing an implicit join of the table with itself * Add safety checks * Add tests and fix cover
1 parent 3960ac3 commit ac74a34

File tree

2 files changed

+173
-1
lines changed

2 files changed

+173
-1
lines changed
 

‎sqlalchemy_bigquery/base.py

+8
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,14 @@ def _known_tables(self):
269269
if table is not None:
270270
known_tables.add(table.name)
271271

272+
# If we have the table in the `from` of our parent, do not add the alias
273+
# as this will add the table twice and cause an implicit JOIN for that
274+
# table on itself
275+
asfrom_froms = self.stack[-1].get("asfrom_froms", [])
276+
for from_ in asfrom_froms:
277+
if isinstance(from_, Table):
278+
known_tables.add(from_.name)
279+
272280
return known_tables
273281

274282
def visit_column(

‎tests/unit/test_compiler.py

+165-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import sqlalchemy.exc
2222

2323
from .conftest import setup_table
24-
from .conftest import sqlalchemy_1_4_or_higher
24+
from .conftest import sqlalchemy_1_4_or_higher, sqlalchemy_before_1_4
2525

2626

2727
def test_constraints_are_ignored(faux_conn, metadata):
@@ -114,3 +114,167 @@ def test_no_alias_for_known_tables_cte(faux_conn, metadata):
114114
)
115115
found_cte_sql = q.compile(faux_conn).string
116116
assert found_cte_sql == expected_cte_sql
117+
118+
119+
def prepare_implicit_join_base_query(
120+
faux_conn, metadata, select_from_table2, old_syntax
121+
):
122+
table1 = setup_table(
123+
faux_conn, "table1", metadata, sqlalchemy.Column("foo", sqlalchemy.Integer)
124+
)
125+
table2 = setup_table(
126+
faux_conn,
127+
"table2",
128+
metadata,
129+
sqlalchemy.Column("foos", sqlalchemy.ARRAY(sqlalchemy.Integer)),
130+
sqlalchemy.Column("bar", sqlalchemy.Integer),
131+
)
132+
F = sqlalchemy.func
133+
134+
unnested_col_name = "unnested_foos"
135+
unnested_foos = F.unnest(table2.c.foos).alias(unnested_col_name)
136+
unnested_foo_col = sqlalchemy.Column(unnested_col_name)
137+
138+
# Set up initial query
139+
cols = [table1.c.foo, table2.c.bar] if select_from_table2 else [table1.c.foo]
140+
q = sqlalchemy.select(cols) if old_syntax else sqlalchemy.select(*cols)
141+
q = q.select_from(unnested_foos.join(table1, table1.c.foo == unnested_foo_col))
142+
return q
143+
144+
145+
@sqlalchemy_before_1_4
146+
def test_no_implicit_join_asterix_for_inner_unnest_before_1_4(faux_conn, metadata):
147+
# See: https://github.com/googleapis/python-bigquery-sqlalchemy/issues/368
148+
q = prepare_implicit_join_base_query(faux_conn, metadata, True, True)
149+
expected_initial_sql = (
150+
"SELECT `table1`.`foo`, `table2`.`bar` \n"
151+
"FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`"
152+
)
153+
found_initial_sql = q.compile(faux_conn).string
154+
assert found_initial_sql == expected_initial_sql
155+
156+
q = sqlalchemy.select(["*"]).select_from(q)
157+
158+
expected_outer_sql = (
159+
"SELECT * \n"
160+
"FROM (SELECT `table1`.`foo` AS `foo`, `table2`.`bar` AS `bar` \n"
161+
"FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`)"
162+
)
163+
found_outer_sql = q.compile(faux_conn).string
164+
assert found_outer_sql == expected_outer_sql
165+
166+
167+
@sqlalchemy_1_4_or_higher
168+
def test_no_implicit_join_asterix_for_inner_unnest(faux_conn, metadata):
169+
# See: https://github.com/googleapis/python-bigquery-sqlalchemy/issues/368
170+
q = prepare_implicit_join_base_query(faux_conn, metadata, True, False)
171+
expected_initial_sql = (
172+
"SELECT `table1`.`foo`, `table2`.`bar` \n"
173+
"FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`"
174+
)
175+
found_initial_sql = q.compile(faux_conn).string
176+
assert found_initial_sql == expected_initial_sql
177+
178+
q = q.subquery()
179+
q = sqlalchemy.select("*").select_from(q)
180+
181+
expected_outer_sql = (
182+
"SELECT * \n"
183+
"FROM (SELECT `table1`.`foo` AS `foo`, `table2`.`bar` AS `bar` \n"
184+
"FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`) AS `anon_1`"
185+
)
186+
found_outer_sql = q.compile(faux_conn).string
187+
assert found_outer_sql == expected_outer_sql
188+
189+
190+
@sqlalchemy_before_1_4
191+
def test_no_implicit_join_for_inner_unnest_before_1_4(faux_conn, metadata):
192+
# See: https://github.com/googleapis/python-bigquery-sqlalchemy/issues/368
193+
q = prepare_implicit_join_base_query(faux_conn, metadata, True, True)
194+
expected_initial_sql = (
195+
"SELECT `table1`.`foo`, `table2`.`bar` \n"
196+
"FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`"
197+
)
198+
found_initial_sql = q.compile(faux_conn).string
199+
assert found_initial_sql == expected_initial_sql
200+
201+
q = sqlalchemy.select([q.c.foo]).select_from(q)
202+
203+
expected_outer_sql = (
204+
"SELECT `foo` \n"
205+
"FROM (SELECT `table1`.`foo` AS `foo`, `table2`.`bar` AS `bar` \n"
206+
"FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`)"
207+
)
208+
found_outer_sql = q.compile(faux_conn).string
209+
assert found_outer_sql == expected_outer_sql
210+
211+
212+
@sqlalchemy_1_4_or_higher
213+
def test_no_implicit_join_for_inner_unnest(faux_conn, metadata):
214+
# See: https://github.com/googleapis/python-bigquery-sqlalchemy/issues/368
215+
q = prepare_implicit_join_base_query(faux_conn, metadata, True, False)
216+
expected_initial_sql = (
217+
"SELECT `table1`.`foo`, `table2`.`bar` \n"
218+
"FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`"
219+
)
220+
found_initial_sql = q.compile(faux_conn).string
221+
assert found_initial_sql == expected_initial_sql
222+
223+
q = q.subquery()
224+
q = sqlalchemy.select(q.c.foo).select_from(q)
225+
226+
expected_outer_sql = (
227+
"SELECT `anon_1`.`foo` \n"
228+
"FROM (SELECT `table1`.`foo` AS `foo`, `table2`.`bar` AS `bar` \n"
229+
"FROM `table2`, unnest(`table2`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`) AS `anon_1`"
230+
)
231+
found_outer_sql = q.compile(faux_conn).string
232+
assert found_outer_sql == expected_outer_sql
233+
234+
235+
@sqlalchemy_1_4_or_higher
236+
def test_no_implicit_join_asterix_for_inner_unnest_no_table2_column(
237+
faux_conn, metadata
238+
):
239+
# See: https://github.com/googleapis/python-bigquery-sqlalchemy/issues/368
240+
q = prepare_implicit_join_base_query(faux_conn, metadata, False, False)
241+
expected_initial_sql = (
242+
"SELECT `table1`.`foo` \n"
243+
"FROM `table2` `table2_1`, unnest(`table2_1`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`"
244+
)
245+
found_initial_sql = q.compile(faux_conn).string
246+
assert found_initial_sql == expected_initial_sql
247+
248+
q = q.subquery()
249+
q = sqlalchemy.select("*").select_from(q)
250+
251+
expected_outer_sql = (
252+
"SELECT * \n"
253+
"FROM (SELECT `table1`.`foo` AS `foo` \n"
254+
"FROM `table2` `table2_1`, unnest(`table2_1`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`) AS `anon_1`"
255+
)
256+
found_outer_sql = q.compile(faux_conn).string
257+
assert found_outer_sql == expected_outer_sql
258+
259+
260+
@sqlalchemy_1_4_or_higher
261+
def test_no_implicit_join_for_inner_unnest_no_table2_column(faux_conn, metadata):
262+
# See: https://github.com/googleapis/python-bigquery-sqlalchemy/issues/368
263+
q = prepare_implicit_join_base_query(faux_conn, metadata, False, False)
264+
expected_initial_sql = (
265+
"SELECT `table1`.`foo` \n"
266+
"FROM `table2` `table2_1`, unnest(`table2_1`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`"
267+
)
268+
found_initial_sql = q.compile(faux_conn).string
269+
assert found_initial_sql == expected_initial_sql
270+
271+
q = q.subquery()
272+
q = sqlalchemy.select(q.c.foo).select_from(q)
273+
274+
expected_outer_sql = (
275+
"SELECT `anon_1`.`foo` \n"
276+
"FROM (SELECT `table1`.`foo` AS `foo` \n"
277+
"FROM `table2` `table2_1`, unnest(`table2_1`.`foos`) AS `unnested_foos` JOIN `table1` ON `table1`.`foo` = `unnested_foos`) AS `anon_1`"
278+
)
279+
found_outer_sql = q.compile(faux_conn).string
280+
assert found_outer_sql == expected_outer_sql

0 commit comments

Comments
 (0)