Skip to content

Commit db5c874

Browse files
author
Kapil Agrawal
committed
Bug#31636339 MTS IS UNUSABLE; ERRNO 3030, AND WORKER HA_ERR_FOUND_DUPP_KEY OR HA_ERR_LOCK_WAIT_TIMEOUT
This patch is for mysql-5.7. Background ---------- In the case of binlog_row_image=minimum, we put a minimum number of fields in the read set and only the modified fields in write sets. As these read sets and write sets are used in generating the write set hashes and writset hashes are used to compute the transaction dependencies. Problem ------- In this test case, we have a primary key and a secondary key (Unique key) so it adds only the primary key in the readset and only the changed field in the writeset such that the secondary key is not included in the writeset hashes since it is not there in either of read and write sets. Due to this transaction dependencies are not calculated correctly on source and we get an error while executing these transactions parallelly on replica in a multi-threaded environment. Solution --------- If we include secondary key (unique) in the writeset hashes without including it in the before and after image then transaction dependencies will be computed correctly and there would not be any failures on replica due to this. Fix --- 1. Take temporary backup of readset and writeset and restore these bitmaps after setting the bits for unique key (step 2) in order to remove 1-1 correspondence between readset and before-image. And this is being done in the Binlog_log_row_cleanup class. 2. In case of BINLOG_ROW_IMAGE=MINIMAL, mark all unique key bits in the readset and writesets to calculate the transaction dependencies correctly. 3. In the function add_pke, pointer arithmetic code where we are calculating the writeset hashes is also modifies same as Mysql-8.0. RB#25966
1 parent 4e26fa4 commit db5c874

6 files changed

+293
-49
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
include/only_with_option.inc [GLOBAL.slave_transaction_retries > 2]
2+
#
3+
# 1. Create source-replica topology.
4+
include/master-slave.inc
5+
Warnings:
6+
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
7+
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
8+
[connection master]
9+
#
10+
# 2. Save the current variable values.
11+
SET @old_binlog_row_image= @@binlog_row_image;
12+
[connection master]
13+
# 3. Setup neccesary variables on source.
14+
# 3.1 Set the binlog_row_image to MINIMAL on source.
15+
CON: 'master', IMG: 'MINIMAL', RESTART SLAVE: 'N'
16+
SET SESSION binlog_row_image= 'MINIMAL';
17+
SET GLOBAL binlog_row_image= 'MINIMAL';
18+
FLUSH TABLES;
19+
SHOW VARIABLES LIKE 'binlog_row_image';
20+
Variable_name Value
21+
binlog_row_image MINIMAL
22+
SET GLOBAL TRANSACTION_WRITE_SET_EXTRACTION = 'XXHASH64';
23+
SET SESSION TRANSACTION_WRITE_SET_EXTRACTION = 'XXHASH64';
24+
SET GLOBAL BINLOG_TRANSACTION_DEPENDENCY_TRACKING = WRITESET;
25+
#
26+
# 4. Create table and procedure on source.
27+
CREATE TABLE t( a tinyint unsigned primary key, b tinyint, c int, d bigint,
28+
f char(10), g char(255), h text, i longtext, unique key(g,f),
29+
unique key(f,c), key(c,d,b), key(i(10),f(10),b)) ENGINE=InnoDB;
30+
drop procedure if exists p;
31+
Warnings:
32+
Note 1305 PROCEDURE test.p does not exist
33+
create procedure p(p_i bigint)
34+
begin
35+
declare v_i bigint default 0;
36+
repeat
37+
replace into t values(
38+
floor(rand()*5),floor(rand()*5),floor(rand()*5),floor(rand()*5),
39+
floor(rand()*5),floor(rand()*5),floor(rand()*5),floor(rand()*5)
40+
),
41+
(
42+
floor(rand()*5),floor(rand()*5),floor(rand()*5),floor(rand()*5),
43+
floor(rand()*5),floor(rand()*5),floor(rand()*5),floor(rand()*5)
44+
);
45+
set v_i=v_i+1;
46+
until v_i > p_i end repeat;
47+
end|
48+
#
49+
#
50+
# 5. Switch to source and call procedure.
51+
[connection master]
52+
call p(1000);
53+
include/sync_slave_sql_with_master.inc
54+
#
55+
# 6. Clear system variables on source.
56+
#
57+
[connection master]
58+
SET GLOBAL binlog_row_image= @old_binlog_row_image;
59+
SET SESSION binlog_row_image= @old_binlog_row_image;
60+
SET GLOBAL BINLOG_TRANSACTION_DEPENDENCY_TRACKING = OLD_TRX_TRACKER;
61+
SET GLOBAL TRANSACTION_WRITE_SET_EXTRACTION = OLD_TRX_WRITE_SET_EXTRACTION_GLOBAL;
62+
SET SESSION TRANSACTION_WRITE_SET_EXTRACTION = OLD_TRX_WRITE_SET_EXTRACTION_SESSION;
63+
[connection slave]
64+
CALL mtr.add_suppression("Worker .* failed executing transaction.*");
65+
[connection master]
66+
DROP TABLE t;
67+
DROP PROCEDURE p;
68+
include/rpl_end.inc
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# ==== Purpose ====
2+
#
3+
# This test verifies that when binlog_row_image=minimum and a table has
4+
# both unique + primary key then there should not be any conflict running
5+
# the transactions on this table on the soure and replica.
6+
#
7+
# ==== Requirements ====
8+
# R1. In case of binlog_row_image=minimum, when generating read_sets and
9+
# writesets, we should include unique key also such that transaction dependencies
10+
# are calculated correctly and transactions do not fail on replica while running
11+
# in parallel.
12+
#
13+
# ==== Implementation ====
14+
#
15+
# 1) Create source-replica topology.
16+
# 2) Save the current variable values.
17+
# 2.1) Save current binlog_row_image.
18+
# 2.2) Save current tracking mode and extraction algorithm.
19+
# 3) Setup neccesary variables on source.
20+
# 3.1) Set the binlog_row_image to MINIMAL on source.
21+
# 3.2) Set the global transaction extraction algorithm to XXHASH64
22+
# and the tracking mode to WRITESET.
23+
# 4) Create table and procedure on source.
24+
# 5) Switch to source and call procedure.
25+
# 6) Clear system variables on source.
26+
# 7) Cleanup.
27+
#
28+
# ==== References ====
29+
#
30+
# Bug#31636339 MTS IS UNUSABLE; ERRNO 3030, AND WORKER HA_ERR_FOUND_DUPP_KEY OR HA_ERR_LOCK_WAIT_TIMEOUT
31+
#
32+
--source include/have_binlog_format_row.inc
33+
--source include/only_mts_slave_parallel_type_logical_clock.inc
34+
--source include/only_mts_slave_parallel_workers.inc
35+
--let $option_name = slave_transaction_retries
36+
--let $option_operator = >
37+
--let $option_value = 2
38+
--source include/only_with_option.inc
39+
40+
--echo #
41+
--echo # 1. Create source-replica topology.
42+
--source include/master-slave.inc
43+
44+
--echo #
45+
--echo # 2. Save the current variable values.
46+
# 2.1. Save binlog_row_image value.
47+
SET @old_binlog_row_image= @@binlog_row_image;
48+
--source include/rpl_connection_master.inc
49+
50+
# 2.2. Save the current tracking mode and extraction algorithm.
51+
--let $old_trx_tracker = `SELECT @@GLOBAL.BINLOG_TRANSACTION_DEPENDENCY_TRACKING`
52+
--let $old_trx_write_set_extraction_session = `SELECT @@SESSION.TRANSACTION_WRITE_SET_EXTRACTION`
53+
--let $old_trx_write_set_extraction_global = `SELECT @@GLOBAL.TRANSACTION_WRITE_SET_EXTRACTION`
54+
55+
--echo # 3. Setup neccesary variables on source.
56+
--echo # 3.1 Set the binlog_row_image to MINIMAL on source.
57+
--let $row_img_set=master:MINIMAL:N
58+
--source include/rpl_row_img_set.inc
59+
60+
# 3.2 Set the global transaction extraction algorithm and tracking mode.
61+
SET GLOBAL TRANSACTION_WRITE_SET_EXTRACTION = 'XXHASH64';
62+
SET SESSION TRANSACTION_WRITE_SET_EXTRACTION = 'XXHASH64';
63+
SET GLOBAL BINLOG_TRANSACTION_DEPENDENCY_TRACKING = WRITESET;
64+
65+
--echo #
66+
--echo # 4. Create table and procedure on source.
67+
CREATE TABLE t( a tinyint unsigned primary key, b tinyint, c int, d bigint,
68+
f char(10), g char(255), h text, i longtext, unique key(g,f),
69+
unique key(f,c), key(c,d,b), key(i(10),f(10),b)) ENGINE=InnoDB;
70+
71+
drop procedure if exists p;
72+
delimiter |;
73+
create procedure p(p_i bigint)
74+
begin
75+
declare v_i bigint default 0;
76+
repeat
77+
replace into t values(
78+
floor(rand()*5),floor(rand()*5),floor(rand()*5),floor(rand()*5),
79+
floor(rand()*5),floor(rand()*5),floor(rand()*5),floor(rand()*5)
80+
),
81+
(
82+
floor(rand()*5),floor(rand()*5),floor(rand()*5),floor(rand()*5),
83+
floor(rand()*5),floor(rand()*5),floor(rand()*5),floor(rand()*5)
84+
);
85+
set v_i=v_i+1;
86+
until v_i > p_i end repeat;
87+
end|
88+
delimiter ;|
89+
90+
--echo #
91+
--echo #
92+
--echo # 5. Switch to source and call procedure.
93+
--source include/rpl_connection_master.inc
94+
call p(1000);
95+
# Sync replica's sql thread with source.
96+
--source include/sync_slave_sql_with_master.inc
97+
98+
--echo #
99+
--echo # 6. Clear system variables on source.
100+
--echo #
101+
--source include/rpl_connection_master.inc
102+
SET GLOBAL binlog_row_image= @old_binlog_row_image;
103+
SET SESSION binlog_row_image= @old_binlog_row_image;
104+
--replace_result $old_trx_tracker OLD_TRX_TRACKER
105+
--eval SET GLOBAL BINLOG_TRANSACTION_DEPENDENCY_TRACKING = $old_trx_tracker
106+
--replace_result $old_trx_write_set_extraction_global OLD_TRX_WRITE_SET_EXTRACTION_GLOBAL
107+
--eval SET GLOBAL TRANSACTION_WRITE_SET_EXTRACTION = $old_trx_write_set_extraction_global
108+
--replace_result $old_trx_write_set_extraction_session OLD_TRX_WRITE_SET_EXTRACTION_SESSION
109+
--eval SET SESSION TRANSACTION_WRITE_SET_EXTRACTION = $old_trx_write_set_extraction_session
110+
111+
# Supress the error warning on slave.
112+
--source include/rpl_connection_slave.inc
113+
CALL mtr.add_suppression("Worker .* failed executing transaction.*");
114+
115+
# 7. Cleanup.
116+
--source include/rpl_connection_master.inc
117+
DROP TABLE t;
118+
DROP PROCEDURE p;
119+
--source include/rpl_end.inc
120+

sql/handler.cc

+78-29
Original file line numberDiff line numberDiff line change
@@ -7915,10 +7915,57 @@ static int write_locked_table_maps(THD *thd)
79157915
DBUG_RETURN(0);
79167916
}
79177917

7918-
79197918
typedef bool Log_func(THD*, TABLE*, bool,
79207919
const uchar*, const uchar*);
79217920

7921+
/**
7922+
7923+
The purpose of an instance of this class is to :
7924+
7925+
1) Given a TABLE instance, backup the given TABLE::read_set, TABLE::write_set
7926+
and restore those members upon this instance disposal.
7927+
7928+
2) Store a reference to a dynamically allocated buffer and dispose of it upon
7929+
this instance disposal.
7930+
*/
7931+
7932+
class Binlog_log_row_cleanup
7933+
{
7934+
public:
7935+
/**
7936+
This constructor aims to create temporary copies of readset and writeset.
7937+
@param table A pointer to TABLE object
7938+
@param temp_read_bitmap Temporary BITMAP to store read_set.
7939+
@param temp_write_bitmap Temporary BITMAP to store write_set.
7940+
*/
7941+
Binlog_log_row_cleanup(TABLE &table, MY_BITMAP &temp_read_bitmap,
7942+
MY_BITMAP &temp_write_bitmap)
7943+
: m_cleanup_table(table),
7944+
m_cleanup_read_bitmap(temp_read_bitmap),
7945+
m_cleanup_write_bitmap(temp_write_bitmap)
7946+
{
7947+
bitmap_copy(&this->m_cleanup_read_bitmap, this->m_cleanup_table.read_set);
7948+
bitmap_copy(&this->m_cleanup_write_bitmap, this->m_cleanup_table.write_set);
7949+
}
7950+
7951+
/**
7952+
This destructor aims to restore the original readset and writeset and
7953+
delete the temporary copies.
7954+
*/
7955+
virtual ~Binlog_log_row_cleanup()
7956+
{
7957+
bitmap_copy(this->m_cleanup_table.read_set, &this->m_cleanup_read_bitmap);
7958+
bitmap_copy(this->m_cleanup_table.write_set, &this->m_cleanup_write_bitmap);
7959+
bitmap_free(&this->m_cleanup_read_bitmap);
7960+
bitmap_free(&this->m_cleanup_write_bitmap);
7961+
}
7962+
7963+
private:
7964+
TABLE &m_cleanup_table; // Creating a TABLE to get access to its members.
7965+
MY_BITMAP &m_cleanup_read_bitmap; // Temporary bitmap to store read_set.
7966+
MY_BITMAP &m_cleanup_write_bitmap; // Temporary bitmap to store write_set.
7967+
};
7968+
79227969
int binlog_log_row(TABLE* table,
79237970
const uchar *before_record,
79247971
const uchar *after_record,
@@ -7933,40 +7980,42 @@ int binlog_log_row(TABLE* table,
79337980
{
79347981
try
79357982
{
7936-
if (before_record && after_record)
7983+
MY_BITMAP save_read_set;
7984+
MY_BITMAP save_write_set;
7985+
if (bitmap_init(&save_read_set, NULL, table->s->fields, false) ||
7986+
bitmap_init(&save_write_set, NULL, table->s->fields, false))
79377987
{
7938-
size_t length= table->s->reclength;
7939-
uchar* temp_image=(uchar*) my_malloc(PSI_NOT_INSTRUMENTED,
7940-
length,
7941-
MYF(MY_WME));
7942-
if (!temp_image)
7943-
{
7944-
sql_print_error("Out of memory on transaction write set extraction");
7945-
return 1;
7946-
}
7947-
if (add_pke(table, thd))
7948-
{
7949-
my_free(temp_image);
7950-
return HA_ERR_RBR_LOGGING_FAILED;
7951-
}
7952-
7953-
memcpy(temp_image, table->record[0],(size_t) table->s->reclength);
7954-
memcpy(table->record[0],table->record[1],(size_t) table->s->reclength);
7988+
my_error(ER_OUT_OF_RESOURCES, MYF(0));
7989+
return HA_ERR_RBR_LOGGING_FAILED;
7990+
}
79557991

7956-
if (add_pke(table, thd))
7992+
Binlog_log_row_cleanup cleanup_sentry(*table, save_read_set,
7993+
save_write_set);
7994+
if (thd->variables.binlog_row_image == 0)
7995+
{
7996+
for (uint key_number= 0; key_number < table->s->keys; ++key_number)
79577997
{
7958-
my_free(temp_image);
7959-
return HA_ERR_RBR_LOGGING_FAILED;
7998+
if (((table->key_info[key_number].flags & (HA_NOSAME)) ==
7999+
HA_NOSAME))
8000+
{
8001+
table->mark_columns_used_by_index_no_reset(key_number,
8002+
table->read_set);
8003+
table->mark_columns_used_by_index_no_reset(key_number,
8004+
table->write_set);
8005+
}
79608006
}
7961-
7962-
memcpy(table->record[0], temp_image, (size_t) table->s->reclength);
7963-
7964-
my_free(temp_image);
79658007
}
7966-
else
8008+
const uchar *records[]= {after_record, before_record};
8009+
8010+
for (int record= 0; record < 2; ++record)
79678011
{
7968-
if (add_pke(table, thd))
7969-
return HA_ERR_RBR_LOGGING_FAILED;
8012+
if (records[record] != NULL)
8013+
{
8014+
assert(records[record] == table->record[0] ||
8015+
records[record] == table->record[1]);
8016+
bool res= add_pke(table, thd, records[record]);
8017+
if (res) return HA_ERR_RBR_LOGGING_FAILED;
8018+
}
79708019
}
79718020
}
79728021
catch (const std::bad_alloc &)

sql/rpl_rli_pdb.cc

-13
Original file line numberDiff line numberDiff line change
@@ -1980,19 +1980,6 @@ bool Slave_worker::retry_transaction(uint start_relay_number,
19801980
{
19811981
error= ER_LOCK_DEADLOCK;
19821982
}
1983-
#ifndef NDEBUG
1984-
else
1985-
{
1986-
/*
1987-
The non-debug binary will not retry this transactions, stopping the
1988-
SQL thread because of the non-temporary error. But, as this situation
1989-
is not supposed to happen as described in the comment above, we will
1990-
fail an assert to ease the issue investigation when it happens.
1991-
*/
1992-
if (DBUG_EVALUATE_IF("rpl_fake_cod_deadlock", 0, 1))
1993-
assert(false);
1994-
}
1995-
#endif
19961983
}
19971984

19981985
if ((!has_temporary_error(thd, error, &silent) ||

0 commit comments

Comments
 (0)