|
| 1 | +--echo # |
| 2 | +--echo # WL#15257 Enhanced performance for set operations: INTERSECT, |
| 3 | +--echo # EXCEPT i.e. hashed implementation of EXCEPT, INTERSECT with |
| 4 | +--echo # spill to disk (chunk files) if hash table can't fit in memory |
| 5 | +--echo # and fallback to de-duplication via keyed temporary table as |
| 6 | +--echo # last resort. The latter is tested with error injection in |
| 7 | +--echo # query_expression_debug. |
| 8 | +--echo # |
| 9 | +--echo # The size of the VARCHAR column is an argument ($char_type) so |
| 10 | +--echo # we can test short varchar fields as well as blobs. |
| 11 | +--echo # |
| 12 | +eval CREATE TABLE t(i INT, d DATE, c $char_type CHARSET latin1) ENGINE=innodb; |
| 13 | + |
| 14 | +set @@cte_max_recursion_depth = 100000; |
| 15 | +INSERT INTO t |
| 16 | + WITH RECURSIVE cte AS ( |
| 17 | + SELECT 0 AS i, '2022-04-30' AS d, 'abracadabra' as c |
| 18 | + UNION |
| 19 | + SELECT 1 AS i, '2022-04-30' AS d, 'rabarbra' as c |
| 20 | + UNION |
| 21 | + SELECT i+2, d, c FROM cte |
| 22 | + WHERE i+2 < 65536/2 |
| 23 | + ) |
| 24 | + SELECT i,d,c FROM cte; |
| 25 | +set @@cte_max_recursion_depth = default; |
| 26 | + |
| 27 | +# insert one duplicate of each row |
| 28 | +INSERT INTO t select i, d, c FROM t; |
| 29 | +ANALYZE TABLE t; |
| 30 | + |
| 31 | + |
| 32 | +SELECT COUNT(*) FROM t; |
| 33 | +SELECT COUNT(*) FROM (SELECT DISTINCT i,d,c FROM t) derived; |
| 34 | + |
| 35 | + # so we can get reliable Created_tmp_files counts below |
| 36 | +FLUSH STATUS; |
| 37 | +SHOW STATUS LIKE 'Created_tmp_files%'; |
| 38 | + |
| 39 | +SET SESSION optimizer_switch = 'hash_set_operations=off'; |
| 40 | +SELECT * FROM (SELECT * FROM t INTERSECT SELECT * FROM t) AS derived ORDER BY i LIMIT 20; |
| 41 | +SHOW STATUS LIKE 'Created_tmp_files%'; |
| 42 | + |
| 43 | +SET SESSION optimizer_switch = 'hash_set_operations=default'; |
| 44 | +SELECT * FROM (SELECT * FROM t INTERSECT SELECT * FROM t) AS derived ORDER BY i LIMIT 20; |
| 45 | +SHOW STATUS LIKE 'Created_tmp_files%'; |
| 46 | +FLUSH STATUS; |
| 47 | +SET SESSION setop_hash_buffer_size = 16384; |
| 48 | +--echo # The number of Created_tmp_files will be 386, which is |
| 49 | +--echo # 128*2 (build, probe chunks) for left operand + 128 (probe) for right operand |
| 50 | +--echo # + 2 (REMAININGINPUT for left and right operand) = 386 |
| 51 | +--echo # The last 128 (probe chunk files for right operand), could have been avoided |
| 52 | +--echo # if we had a way to reset IO_CACHE files; now we do a close and open, cf. |
| 53 | +--echo # HashJoinChunk::Init. |
| 54 | +SELECT * FROM (SELECT * FROM t INTERSECT SELECT * FROM t) AS derived ORDER BY i LIMIT 20; |
| 55 | +SHOW STATUS LIKE 'Created_tmp_files%'; |
| 56 | + |
| 57 | +SET SESSION setop_hash_buffer_size = default; |
| 58 | + |
| 59 | +--echo # Test spill correctness and secondary overflow, the latter |
| 60 | +--echo # using injection. This query with setop_hash_buffer_size == |
| 61 | +--echo # 16384 will give 128 chunk files. With default setting it does |
| 62 | +--echo # not spill to disk. |
| 63 | +let $query = SELECT * FROM t INTERSECT SELECT * FROM t; |
| 64 | + |
| 65 | +SET SESSION optimizer_switch = 'hash_set_operations=off'; |
| 66 | +eval CREATE TABLE no_hashing AS $query; |
| 67 | + |
| 68 | +SET SESSION optimizer_switch = 'hash_set_operations=default'; |
| 69 | +eval CREATE TABLE hashing_no_spill AS $query; |
| 70 | + |
| 71 | +--echo # Compare old approach (no hashing) with hashing |
| 72 | +SET SESSION optimizer_switch = 'hash_set_operations=off'; |
| 73 | +SELECT COUNT(*) FROM (SELECT * FROM no_hashing EXCEPT ALL SELECT * FROM hashing_no_spill) derived; |
| 74 | +SELECT COUNT(*) FROM (SELECT * FROM hashing_no_spill EXCEPT ALL SELECT * FROM no_hashing) derived; |
| 75 | +SET SESSION optimizer_switch = 'hash_set_operations=default'; |
| 76 | + |
| 77 | +SET SESSION setop_hash_buffer_size = 16384; |
| 78 | +eval CREATE TABLE hashing_spill AS $query; |
| 79 | + |
| 80 | +--echo # Compare old approach (no hashing) with hashing w/spill |
| 81 | +SET SESSION optimizer_switch = 'hash_set_operations=off'; |
| 82 | +SELECT COUNT(*) FROM (SELECT * FROM no_hashing EXCEPT ALL SELECT * FROM hashing_spill) derived; |
| 83 | +SELECT COUNT(*) FROM (SELECT * FROM hashing_spill EXCEPT ALL SELECT * FROM no_hashing) derived; |
| 84 | + |
| 85 | +SET SESSION optimizer_switch = 'hash_set_operations=default'; |
| 86 | +SET SESSION setop_hash_buffer_size = default; |
| 87 | + |
| 88 | +DROP TABLE no_hashing, hashing_no_spill, hashing_spill; |
| 89 | + |
| 90 | +--echo # |
| 91 | +--echo # Test overflow in resulting tmp table |
| 92 | +--echo # |
| 93 | +SET SESSION optimizer_trace="enabled=on"; |
| 94 | + |
| 95 | +let $show_trace= |
| 96 | + SELECT JSON_PRETTY(JSON_EXTRACT(trace,"$.steps[*].join_execution")) |
| 97 | + FROM information_schema.optimizer_trace; |
| 98 | + |
| 99 | +let $pattern=$elide_trace_costs_and_rows; |
| 100 | +# elide some sorting statistics: |
| 101 | +let $pattern=$pattern /num_initial_chunks_spilled_to_disk\": [0-9.]+/num_initial_chunks_spilled_to_disk\": "elided"/; |
| 102 | +let $pattern=$pattern /peak_memory_used\": [0-9.]+/peak_memory_used\": "elided"/; |
| 103 | + |
| 104 | +--echo # a) When we spill |
| 105 | +SET SESSION tmp_table_size=100000; |
| 106 | +SELECT * FROM (SELECT * FROM t INTERSECT SELECT * FROM t) AS derived ORDER BY i LIMIT 20; |
| 107 | +--replace_regex $pattern |
| 108 | +--skip_if_hypergraph |
| 109 | +eval $show_trace; |
| 110 | + |
| 111 | +--echo # b) With secondary overflow (part of query_expression_debug |
| 112 | +--echo # since it requires error injection) |
| 113 | + |
| 114 | +--echo # c) When we can hash in memory |
| 115 | +SET SESSION setop_hash_buffer_size=2621440; |
| 116 | +SELECT * FROM (SELECT * FROM t INTERSECT SELECT * FROM t) AS derived ORDER BY i LIMIT 20; |
| 117 | +--replace_regex $pattern |
| 118 | +--skip_if_hypergraph |
| 119 | +eval $show_trace; |
| 120 | + |
| 121 | +SET SESSION tmp_table_size=default; |
| 122 | +SET SESSION setop_hash_buffer_size=default; |
| 123 | +SET SESSION optimizer_trace="enabled=default"; |
| 124 | + |
| 125 | +DROP TABLE t; |
0 commit comments