WL#14672: Enable the hypergraph optimizer for UPDATE [8/8, hash join]

kahatlen · kahatlen · commit 5b10c71da062 · 2022-04-08T16:48:41.000+02:00
Enable use of hash join for UPDATE statements when using the
hypergraph optimizer.

With hash joins, the row IDs used for delayed update cannot be
retrieved from the underlying scan, because the hash join iterator
does not guarantee that the underlying scans are positioned on the
correct row. Instead, the hash join iterator is instructed to store
row IDs of the underlying tables in the join buffer.

The existing execution code assumes that UPDATE uses nested loop joins
only, and enables semi-consistent reads on the outermost table when
the isolation level is read committed or lower. Hash joins are not yet
prepared for doing semi-consistent reads, so the patch disables
semi-consistent reads when the outer table is involved in a hash
join.

Change-Id: I8e712cca62f3e9c3beee5c93ddbd84ebcc61aeb1
diff --git a/mysql-test/t/multi_update.test b/mysql-test/t/multi_update.test
@@ -230,6 +230,10 @@ create table t2 (n int(10), d int(10));
 insert into t1 values(1,1),(3,2);
 insert into t2 values(1,10),(1,20);
 UPDATE t1,t2 SET t1.d=t2.d,t2.d=30 WHERE t1.n=t2.n;
+# It is unspecified which order the assignments are performed in, and
+# in which order the rows from t2 are read, so for n=1 the value of d
+# can end up as 10, 20 or 30, depending on the plan chosen.
+--replace_result 20 10 30 10
 select * from t1;
 select * from t2;
 UPDATE t1 a ,t2 b SET a.d=b.d,b.d=30 WHERE a.n=b.n;
diff --git a/sql/iterators/delete_rows_iterator.h b/sql/iterators/delete_rows_iterator.h
@@ -60,12 +60,15 @@ class DeleteRowsIterator final : public RowIterator {
   table_map m_tables_to_delete_from;
   /// The tables to delete from immediately while scanning the join result.
   table_map m_immediate_tables;
-  /// The target tables whose row IDs are stored in the hash join buffer.
-  /// This means all target tables that are below a hash join path.
-  /// Such tables will already have the row ID available in handler::ref, and
-  /// calling handler::position() will put an incorrect row ID (most likely the
-  /// last row read from the table) into handler::ref.
-  table_map m_tables_with_rowid_in_hash_join_buffer;
+  /// All the tables that are part of a hash join. We use this map to find out
+  /// how to get the row ID from a table when buffering row IDs for delayed
+  /// delete. For those tables that are part of a hash join, the row ID will
+  /// already be available in handler::ref, and calling handler::position() will
+  /// overwrite it with an incorrect row ID (most likely the last row read from
+  /// the table). For those that are not part of a hash join,
+  /// handler::position() must be called to get the current row ID from the
+  /// underlying scan.
+  table_map m_hash_join_tables;
   /// The target tables that live in transactional storage engines.
   table_map m_transactional_tables{0};
   /// The target tables that have before delete triggers.
diff --git a/sql/iterators/update_rows_iterator.h b/sql/iterators/update_rows_iterator.h
@@ -28,6 +28,7 @@
 
 #include "my_alloc.h"
 #include "my_base.h"
+#include "my_table_map.h"
 #include "sql/iterators/row_iterator.h"
 #include "sql/sql_list.h"
 
@@ -50,8 +51,9 @@ class UpdateRowsIterator final : public RowIterator {
                      List<TABLE> unupdated_check_opt_tables,
                      COPY_INFO **update_operations,
                      mem_root_deque<Item *> **fields_for_table,
-                     mem_root_deque<Item *> **values_for_table);
-  ~UpdateRowsIterator() override = default;
+                     mem_root_deque<Item *> **values_for_table,
+                     table_map tables_with_rowid_in_buffer);
+  ~UpdateRowsIterator() override;
   bool Init() override;
   int Read() override;
   void StartPSIBatchMode() override { m_source->StartPSIBatchMode(); }
@@ -95,6 +97,15 @@ class UpdateRowsIterator final : public RowIterator {
   ha_rows m_found_rows{0};
   /// The number of rows actually updated.
   ha_rows m_updated_rows{0};
+  /// All the tables that are part of a hash join. We use this map to find out
+  /// how to get the row ID from a table when buffering row IDs for delayed
+  /// update. For those tables that are part of a hash join, the row ID will
+  /// already be available in handler::ref, and calling handler::position() will
+  /// overwrite it with an incorrect row ID (most likely the last row read from
+  /// the table). For those that are not part of a hash join,
+  /// handler::position() must be called to get the current row ID from the
+  /// underlying scan.
+  table_map m_hash_join_tables;
 
   /// Perform all the immediate updates for the current row returned by the
   /// join, and buffer row IDs for the non-immediate tables.
diff --git a/sql/join_optimizer/access_path.cc b/sql/join_optimizer/access_path.cc
@@ -1344,16 +1344,16 @@ void ExpandFilterAccessPaths(THD *thd, AccessPath *path_arg, const JOIN *join,
                   });
 }
 
-table_map GetTablesWithRowIDsInHashJoin(AccessPath *path) {
+table_map GetHashJoinTables(AccessPath *path) {
   table_map tables = 0;
-  WalkAccessPaths(path, /*join=*/nullptr,
-                  WalkAccessPathPolicy::STOP_AT_MATERIALIZATION,
-                  [&tables](AccessPath *subpath, const JOIN *) {
-                    if (subpath->type == AccessPath::HASH_JOIN &&
-                        subpath->hash_join().store_rowids) {
-                      tables |= subpath->hash_join().tables_to_get_rowid_for;
-                    }
-                    return false;
-                  });
+  WalkAccessPaths(
+      path, /*join=*/nullptr, WalkAccessPathPolicy::STOP_AT_MATERIALIZATION,
+      [&tables](AccessPath *subpath, const JOIN *) {
+        if (subpath->type == AccessPath::HASH_JOIN) {
+          tables |= GetUsedTableMap(subpath, /*include_pruned_tables=*/true);
+          return true;
+        }
+        return false;
+      });
   return tables;
 }
diff --git a/sql/join_optimizer/access_path.h b/sql/join_optimizer/access_path.h
@@ -1785,7 +1785,7 @@ void ExpandSingleFilterAccessPath(THD *thd, AccessPath *path, const JOIN *join,
                                   const Mem_root_array<Predicate> &predicates,
                                   unsigned num_where_predicates);
 
-/// Returns the tables that have stored row IDs in the hash join result.
-table_map GetTablesWithRowIDsInHashJoin(AccessPath *path);
+/// Returns the tables that are part of a hash join.
+table_map GetHashJoinTables(AccessPath *path);
 
 #endif  // SQL_JOIN_OPTIMIZER_ACCESS_PATH_H
diff --git a/sql/join_optimizer/join_optimizer.cc b/sql/join_optimizer/join_optimizer.cc
@@ -3028,12 +3028,6 @@ void CostingReceiver::ProposeHashJoin(
   // to update or delete. The same applies to rows from the outer side, if the
   // hash join spills to disk, so we need to store row IDs for both sides.
   if (Overlaps(m_update_delete_target_nodes, left | right)) {
-    if (m_thd->lex->sql_command == SQLCOM_UPDATE_MULTI ||
-        m_thd->lex->sql_command == SQLCOM_UPDATE) {
-      // TODO(khatlen): Consider enabling hash join for UPDATE too. Must
-      // probably disable semi-consistent reads in that case.
-      return;
-    }
     FindTablesToGetRowidFor(&join_path);
   }
 
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
@@ -921,10 +921,9 @@ DeleteRowsIterator::DeleteRowsIterator(
       m_tables_to_delete_from(tables_to_delete_from),
       m_immediate_tables(immediate_tables),
       // The old optimizer does not use hash join in DELETE statements.
-      m_tables_with_rowid_in_hash_join_buffer(
-          thd->lex->using_hypergraph_optimizer
-              ? GetTablesWithRowIDsInHashJoin(join->root_access_path())
-              : 0),
+      m_hash_join_tables(thd->lex->using_hypergraph_optimizer
+                             ? GetHashJoinTables(join->root_access_path())
+                             : 0),
       m_tempfiles(thd->mem_root),
       m_delayed_tables(thd->mem_root) {
   for (const TABLE_LIST *tr = join->query_block->leaf_tables; tr != nullptr;
@@ -1076,7 +1075,10 @@ bool DeleteRowsIterator::DoImmediateDeletesAndBufferRowIds() {
     // Check if using outer join and no row found, or row is already deleted
     if (table->has_null_row() || table->has_deleted_row()) continue;
 
-    if (!Overlaps(map, m_tables_with_rowid_in_hash_join_buffer)) {
+    // Hash joins have already copied the row ID from the join buffer into
+    // table->file->ref. Nested loop joins have not, so we call position() to
+    // get the row ID from the handler.
+    if (!Overlaps(map, m_hash_join_tables)) {
       table->file->position(table->record[0]);
     }
 
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
@@ -67,6 +67,7 @@
 #include "sql/iterators/timing_iterator.h"
 #include "sql/iterators/update_rows_iterator.h"
 #include "sql/join_optimizer/access_path.h"
+#include "sql/join_optimizer/bit_utils.h"
 #include "sql/join_optimizer/walk_access_paths.h"
 #include "sql/key.h"  // is_key_used
 #include "sql/key_spec.h"
@@ -2114,8 +2115,15 @@ static bool AddRowIdAsTempTableField(THD *thd, TABLE *table,
 /// @param table The table to get a row ID from.
 /// @param tmp_table The temporary table in which to store the row ID.
 /// @param field_num The field of tmp_table in which to store the row ID.
-static void StoreRowId(TABLE *table, TABLE *tmp_table, int field_num) {
-  table->file->position(table->record[0]);
+/// @param hash_join_tables A map of all tables that are part of a hash join.
+static void StoreRowId(TABLE *table, TABLE *tmp_table, int field_num,
+                       table_map hash_join_tables) {
+  // Hash joins have already copied the row ID from the join buffer into
+  // table->file->ref. Nested loop joins have not, so we call position() to get
+  // the row ID from the handler.
+  if (!Overlaps(hash_join_tables, table->pos_in_table_list->map())) {
+    table->file->position(table->record[0]);
+  }
   tmp_table->visible_field_ptr()[field_num]->store(
       pointer_cast<const char *>(table->file->ref), table->file->ref_length,
       &my_charset_bin);
@@ -2252,14 +2260,6 @@ bool Query_result_update::optimize() {
     if (thd->lex->is_ignore()) table->file->ha_extra(HA_EXTRA_IGNORE_DUP_KEY);
     if (table == main_table)  // First table in join
     {
-      // As it's the first table in the join, and we're doing a nested loop
-      // join thanks to SELECT_NO_JOIN_CACHE, the table is the left argument
-      // of that NL join; thus, we can ask for semi-consistent read.
-      // It's a bit early to ask for it here, because we're before
-      // rnd_init/index_init; but cannot do it later, as we soon
-      // hand control over to iterators.
-      table->file->try_semi_consistent_read(true);
-
       if (table == table_to_update) {
         assert(bitmap_is_clear_all(&table->tmp_set));
         table->mark_columns_needed_for_update(
@@ -2397,10 +2397,6 @@ void Query_result_update::cleanup(THD *thd) {
   }
   tmp_table_param = nullptr;
   thd->check_for_truncated_fields = CHECK_FIELD_IGNORE;  // Restore this setting
-
-  if (main_table != nullptr && main_table->is_created()) {
-    main_table->file->try_semi_consistent_read(false);
-  }
   main_table = nullptr;
   // Reset state and statistics members:
   unupdated_check_opt_tables.clear();
@@ -2533,9 +2529,9 @@ bool UpdateRowsIterator::DoImmediateUpdatesAndBufferRowIds(
        rowids of tables used in the CHECK OPTION condition.
       */
       int field_num = 0;
-      StoreRowId(table, tmp_table, field_num++);
+      StoreRowId(table, tmp_table, field_num++, m_hash_join_tables);
       for (TABLE &tbl : m_unupdated_check_opt_tables) {
-        StoreRowId(&tbl, tmp_table, field_num++);
+        StoreRowId(&tbl, tmp_table, field_num++, m_hash_join_tables);
       }
 
       /*
@@ -2835,7 +2831,26 @@ bool UpdateRowsIterator::DoDelayedUpdates(bool *trans_safe,
   return true;
 }
 
-bool UpdateRowsIterator::Init() { return m_source->Init(); }
+bool UpdateRowsIterator::Init() {
+  if (m_source->Init()) return true;
+
+  if (m_outermost_table != nullptr &&
+      !Overlaps(m_hash_join_tables,
+                m_outermost_table->pos_in_table_list->map())) {
+    // As it's the first table in the join, and we're doing a nested loop join,
+    // the table is the left argument of that nested loop join; thus, we can ask
+    // for semi-consistent read.
+    m_outermost_table->file->try_semi_consistent_read(true);
+  }
+
+  return false;
+}
+
+UpdateRowsIterator::~UpdateRowsIterator() {
+  if (m_outermost_table != nullptr && m_outermost_table->is_created()) {
+    m_outermost_table->file->try_semi_consistent_read(false);
+  }
+}
 
 int UpdateRowsIterator::Read() {
   bool local_error = false;
@@ -3021,7 +3036,8 @@ UpdateRowsIterator::UpdateRowsIterator(
     TABLE **tmp_tables, Copy_field *copy_fields,
     List<TABLE> unupdated_check_opt_tables, COPY_INFO **update_operations,
     mem_root_deque<Item *> **fields_for_table,
-    mem_root_deque<Item *> **values_for_table)
+    mem_root_deque<Item *> **values_for_table,
+    table_map tables_with_rowid_in_buffer)
     : RowIterator(thd),
       m_source(std::move(source)),
       m_outermost_table(outermost_table),
@@ -3032,7 +3048,8 @@ UpdateRowsIterator::UpdateRowsIterator(
       m_unupdated_check_opt_tables(unupdated_check_opt_tables),
       m_update_operations(update_operations),
       m_fields_for_table(fields_for_table),
-      m_values_for_table(values_for_table) {}
+      m_values_for_table(values_for_table),
+      m_hash_join_tables(tables_with_rowid_in_buffer) {}
 
 unique_ptr_destroy_only<RowIterator> CreateUpdateRowsIterator(
     THD *thd, MEM_ROOT *mem_root, JOIN *join,
@@ -3046,5 +3063,9 @@ unique_ptr_destroy_only<RowIterator> Query_result_update::create_iterator(
   return NewIterator<UpdateRowsIterator>(
       thd, mem_root, std::move(source), main_table, table_to_update,
       update_tables, tmp_tables, copy_field, unupdated_check_opt_tables,
-      update_operations, fields_for_table, values_for_table);
+      update_operations, fields_for_table, values_for_table,
+      // The old optimizer does not use hash join in UPDATE statements.
+      thd->lex->using_hypergraph_optimizer
+          ? GetHashJoinTables(unit->root_access_path())
+          : 0);
 }
diff --git a/unittest/gunit/hypergraph_optimizer-t.cc b/unittest/gunit/hypergraph_optimizer-t.cc
@@ -4695,6 +4695,44 @@ TEST_F(HypergraphOptimizerTest, UpdatePreferImmediate) {
   EXPECT_STREQ("t2", nested_loop_join.inner->eq_ref().table->alias);
 }
 
+TEST_F(HypergraphOptimizerTest, UpdateHashJoin) {
+  Query_block *query_block =
+      ParseAndResolve("UPDATE t1, t2 SET t1.x = 1, t2.x = 2 WHERE t1.y = t2.y",
+                      /*nullable=*/false);
+  ASSERT_NE(nullptr, query_block);
+
+  // Size the tables so that a hash join is preferable to a nested loop join.
+  Fake_TABLE *t1 = m_fake_tables["t1"];
+  t1->file->stats.records = 100000;
+  t1->file->stats.data_file_length = 1e6;
+  Fake_TABLE *t2 = m_fake_tables["t2"];
+  t2->file->stats.records = 10000;
+  t2->file->stats.data_file_length = 1e5;
+
+  string trace;
+  AccessPath *root = FindBestQueryPlan(m_thd, query_block, &trace);
+  SCOPED_TRACE(trace);  // Prints out the trace on failure.
+  ASSERT_NE(nullptr, root);
+  // Prints out the query plan on failure.
+  SCOPED_TRACE(PrintQueryPlan(0, root, query_block->join,
+                              /*is_root_of_join=*/true));
+
+  ASSERT_EQ(AccessPath::UPDATE_ROWS, root->type);
+  // Both tables are updated.
+  EXPECT_EQ(t1->pos_in_table_list->map() | t2->pos_in_table_list->map(),
+            root->update_rows().tables_to_update);
+  // No immediate update with hash join.
+  EXPECT_EQ(0, root->update_rows().immediate_tables);
+
+  // Expect a hash join with the smaller table (t2) on the inner side.
+  ASSERT_EQ(AccessPath::HASH_JOIN, root->update_rows().child->type);
+  const auto &hash_join = root->update_rows().child->hash_join();
+  ASSERT_EQ(AccessPath::TABLE_SCAN, hash_join.outer->type);
+  EXPECT_EQ(t1, hash_join.outer->table_scan().table);
+  ASSERT_EQ(AccessPath::TABLE_SCAN, hash_join.inner->type);
+  EXPECT_EQ(t2, hash_join.inner->table_scan().table);
+}
+
 // An alias for better naming.
 using HypergraphSecondaryEngineTest = HypergraphOptimizerTest;