@@ -505,40 +505,42 @@ def _uniform_sampling(self, fraction: float) -> UnorderedIR:
505
505
506
506
def explode (self , column_ids : typing .Sequence [str ]) -> UnorderedIR :
507
507
table = self ._to_ibis_expr ()
508
- other_columns = [
509
- column_id for column_id in self ._column_names if column_id not in column_ids
510
- ]
511
508
512
- if len (column_ids ) == 1 :
513
- unnested_column = table [column_ids [0 ]].unnest ().name (column_ids [0 ])
514
- table_w_unnest = table .select (
515
- unnested_column ,
516
- * other_columns ,
517
- )
518
- else :
519
- zip_array_id = bigframes .core .guid .generate_guid ("zip_array_" )
520
- zip_array = (
521
- table [column_ids [0 ]]
522
- .zip (* [table [column_id ] for column_id in column_ids [1 :]])
523
- .name (zip_array_id )
524
- )
525
- table_w_zip_array = table .select (
526
- zip_array ,
527
- * self ._column_names ,
509
+ # The offset array ensures null represents empty arrays after unnesting.
510
+ offset_array_id = bigframes .core .guid .generate_guid ("offset_array_" )
511
+ offset_array = (
512
+ vendored_ibis_ops .GenerateArray (
513
+ ibis .greatest (
514
+ 0 ,
515
+ ibis .least (
516
+ * [table [column_id ].length () - 1 for column_id in column_ids ]
517
+ ),
518
+ )
528
519
)
520
+ .to_expr ()
521
+ .name (offset_array_id ),
522
+ )
523
+ table_w_offset_array = table .select (
524
+ offset_array ,
525
+ * self ._column_names ,
526
+ )
529
527
530
- unnest_array_id = bigframes .core .guid .generate_guid ("unnest_array_" )
531
- unnest_array = (
532
- table_w_zip_array [zip_array_id ].unnest ().name (unnest_array_id )
533
- )
534
- unnested_columns = [
535
- unnest_array [f"f{ index + 1 } " ].name (column_id )
536
- for index , column_id in zip (range (len (column_ids )), column_ids )
537
- ]
538
- table_w_unnest = table_w_zip_array .select (
539
- * unnested_columns ,
540
- * other_columns ,
541
- )
528
+ unnest_offset_id = bigframes .core .guid .generate_guid ("unnest_offset_" )
529
+ unnest_offset = (
530
+ table_w_offset_array [offset_array_id ].unnest ().name (unnest_offset_id )
531
+ )
532
+ table_w_offset = table_w_offset_array .select (
533
+ unnest_offset ,
534
+ * self ._column_names ,
535
+ )
536
+
537
+ unnested_columns = [
538
+ table_w_offset [column_id ][table_w_offset [unnest_offset_id ]].name (column_id )
539
+ if column_id in column_ids
540
+ else table_w_offset [column_id ]
541
+ for column_id in self ._column_names
542
+ ]
543
+ table_w_unnest = table_w_offset .select (* unnested_columns )
542
544
543
545
columns = [table_w_unnest [column_name ] for column_name in self ._column_names ]
544
546
return UnorderedIR (
@@ -779,39 +781,32 @@ def explode(self, column_ids: typing.Sequence[str]) -> OrderedIR:
779
781
.to_expr ()
780
782
.name (offset_array_id ),
781
783
)
782
- table_w_offset = table .select (
784
+ table_w_offset_array = table .select (
783
785
offset_array ,
784
786
* self ._column_names ,
785
787
* self ._hidden_ordering_column_names ,
786
788
)
787
789
788
- zip_array_id = bigframes .core .guid .generate_guid ("zip_array_" )
789
- zip_array = (
790
- table_w_offset [offset_array_id ]
791
- .zip (* [table_w_offset [column_id ] for column_id in column_ids ])
792
- .name (zip_array_id )
790
+ unnest_offset_id = bigframes .core .guid .generate_guid ("unnest_offset_" )
791
+ unnest_offset = (
792
+ table_w_offset_array [offset_array_id ].unnest ().name (unnest_offset_id )
793
793
)
794
- table_w_zip_array = table_w_offset .select (
795
- zip_array ,
794
+ table_w_offset = table_w_offset_array .select (
795
+ unnest_offset ,
796
796
* self ._column_names ,
797
797
* self ._hidden_ordering_column_names ,
798
798
)
799
799
800
- unnest_array_id = bigframes .core .guid .generate_guid ("unnest_array_" )
801
- unnest_offset_id = bigframes .core .guid .generate_guid ("unnest_offset_" )
802
-
803
- unnest_array = table_w_zip_array [zip_array_id ].unnest ().name (unnest_array_id )
804
800
unnested_columns = [
805
- unnest_array [f"f{ index + 2 } " ].name (column_id )
806
- for index , column_id in zip (range (len (column_ids )), column_ids )
801
+ table_w_offset [column_id ][table_w_offset [unnest_offset_id ]].name (column_id )
802
+ if column_id in column_ids
803
+ else table_w_offset [column_id ]
804
+ for column_id in self ._column_names
807
805
]
808
- other_columns = [
809
- column_id for column_id in self ._column_names if column_id not in column_ids
810
- ]
811
- table_w_unnest = table_w_zip_array .select (
812
- unnest_array ["f1" ].name (unnest_offset_id ),
806
+
807
+ table_w_unnest = table_w_offset .select (
808
+ table_w_offset [unnest_offset_id ],
813
809
* unnested_columns ,
814
- * other_columns ,
815
810
* self ._hidden_ordering_column_names ,
816
811
)
817
812
0 commit comments