Skip to content

Commit 81de505

Browse files
committed
Bug#25407335 SLAVE DON'T REPORT SECONDS_BEHIND_MASTER WHEN RUNNING SLAVE_PARALLEL_WORKERS > 0
Problem: -------- Slave does not report Seconds_Behind_Master when running with MTS. How Seconds_Behind_Master is calculated? ---------------------------------------- When the slave is actively processing updates, Seconds_Behind_Master shows the difference between the current timestamp on the slave and the original timestamp logged on the master for the event currently being processed on the slave. Seconds_Behind_Master= Current timestamp - mi->rli->last_master_timestamp - mi->clock_diff_with_master In some cases, slave's clock can be different from master's clock. In order to support non-identical clocks, 'clock_diff_with_master' is also taken into consideration so that SBM is correct. Description: ------------ Coordinator thread, when it finds out that worker queue(GAQ) is empty, sets rli->last_master_timestamp to zero. For any further events, rli->last_master_timestamp is not updated and thus SBM doesn't show replication lag. There is also another issue associated with this bug. When a worker commits a transaction, mts_checkpoint_routine calls rli->reset_notified_checkpoint function where it sets the rli->last_master_timestamp. While doing that, instead of fetching the timestamp from the first group of the Slave_committed_queue, it fetches the timestamp from the last checkpoint and assigns it to rli->last_master_timestamp. This needs to be fixed as it results in inaccurate SBM. Fix: ---- rli->last_master_timestamp is assigned from the first event from the master when all slave workers are waiting for events from the Coordinator. When a worker commits a transaction, the coordinator thread fetches the timestamp from the first group of the Slave_committed_queue and assigns to rli->last_master_timestamp.
1 parent 02077e8 commit 81de505

10 files changed

+955
-21
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,336 @@
1+
# ==== Purpose ====
2+
#
3+
# Verify that Seconds_Behind_Master is calculated correctly on a
4+
# Multi-threaded Slave when slave_parallel_type = 'LOGICAL_CLOCK'.
5+
# The test covers these scenarios:
6+
#
7+
# 1) With two workers, check if Seconds_Behind_Master is calculated
8+
# correctly when two statements are applied in parallel on one database
9+
# and both the workers are blocked to to lock.
10+
#
11+
# 2) With two workers, check if Seconds_Behind_Master is calculated
12+
# correctly when the first worker is blocked.
13+
#
14+
# 3) With three workers, check if Seconds_Behind_Master is calculated
15+
# correctly when two workers working on table from the same database and
16+
# 3rd worker working on another database.
17+
18+
--source include/start_slave.inc
19+
20+
###############################################################################
21+
# Scenario 1: On slave, two statements are applied in parallel on one database.
22+
# Both workers are blocked.
23+
###############################################################################
24+
--echo # Scenario 1: With two workers, check if Seconds_Behind_Master is
25+
--echo # calculated correctly when two statements are applied in parallel on
26+
--echo # one database and both the workers are blocked to to lock.
27+
connect (master2, localhost, root, , );
28+
29+
connection slave;
30+
31+
connection master;
32+
USE test;
33+
CREATE TABLE t1 (a int);
34+
CREATE TABLE t2 (a int);
35+
36+
# We need one statement to be commited so that the parallel
37+
# statements have a common parent commit
38+
INSERT INTO t1 SET a=1;
39+
--source include/sync_slave_sql_with_master.inc
40+
41+
connection slave1;
42+
LOCK TABLE test.t1 WRITE, test.t2 WRITE;
43+
44+
--connection master1
45+
let $start= `SELECT UNIX_TIMESTAMP()`;
46+
BEGIN;
47+
INSERT INTO t1 SET a=2;
48+
49+
--connection master2
50+
BEGIN;
51+
INSERT INTO t2 SET a=4;
52+
53+
--connection master1
54+
COMMIT;
55+
--connection master2
56+
COMMIT;
57+
58+
connection master;
59+
--source include/sync_slave_io_with_master.inc
60+
61+
# Wait until all workers are blocked by locks. It implies that all
62+
# transactions are registered into the order commit queue.
63+
let $wait_condition= SELECT count(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock';
64+
--source include/wait_condition.inc
65+
let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event'
66+
OR State = 'update';
67+
--source include/wait_condition.inc
68+
69+
# Sleep on slave so we were at least 3 seconds behind the master
70+
--real_sleep 3
71+
--source include/wait_for_mts_checkpoint.inc
72+
73+
let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);
74+
let $stop= `SELECT UNIX_TIMESTAMP()`;
75+
let $upper_bound= `SELECT $stop - $start`;
76+
let $assert_text= Seconds_Behind_Master must be between 3 and upper_bound;
77+
let $assert_cond= 3 <= $sbm AND $sbm <= $upper_bound;
78+
--source include/assert.inc
79+
80+
# Unlock tables and allow both workers to complete
81+
connection slave1;
82+
UNLOCK TABLES;
83+
84+
connection master;
85+
--source include/sync_slave_sql_with_master.inc
86+
87+
let $wait_condition= SELECT count(*) = 3 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for an event from Coordinator';
88+
--source include/wait_condition.inc
89+
--source include/wait_for_mts_checkpoint.inc
90+
91+
# Now both workers have completed. Thus Seconds_Behind_Master shoud be equal to 0.
92+
let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);
93+
let $assert_text= Seconds_Behind_Master must be 0;
94+
let $assert_cond= $sbm = 0;
95+
--source include/assert.inc
96+
97+
###############################################################################
98+
# Scenario 2: Now only the first worker is blocked. Make sure there is a parent
99+
# commit
100+
###############################################################################
101+
--echo # Scenario 2: With two workers, check if Seconds_Behind_Master is
102+
--echo # calculated correctly when the first worker is blocked.
103+
104+
connection master;
105+
INSERT INTO t1 SET a=1;
106+
--source include/sync_slave_sql_with_master.inc
107+
108+
connection slave1;
109+
LOCK TABLE test.t1 WRITE;
110+
111+
connection slave2;
112+
LOCK TABLE test.t2 WRITE;
113+
114+
--connection master1
115+
let $start= `SELECT UNIX_TIMESTAMP()`;
116+
117+
BEGIN;
118+
INSERT INTO t1 SET a=2;
119+
120+
--connection master2
121+
BEGIN;
122+
INSERT INTO t2 SET a=3;
123+
124+
--connection master1
125+
COMMIT;
126+
--connection master2
127+
COMMIT;
128+
129+
connection master;
130+
--source include/sync_slave_io_with_master.inc
131+
132+
# Wait until both workers are queued and blocked.
133+
let $wait_condition= SELECT count(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock';
134+
--source include/wait_condition.inc
135+
let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event'
136+
OR State = 'update';
137+
--source include/wait_condition.inc
138+
139+
# Now release one worker
140+
connection slave1;
141+
UNLOCK TABLES;
142+
143+
# Wait until released worker completes its taks
144+
let $wait_condition= SELECT count(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock';
145+
--source include/wait_condition.inc
146+
let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event'
147+
OR State = 'update';
148+
--source include/wait_condition.inc
149+
150+
# Sleep for 2 seconds so Seconds_Behind_Master was at least 2
151+
--real_sleep 2
152+
153+
--source include/wait_for_mts_checkpoint.inc
154+
155+
let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);
156+
let $stop= `SELECT UNIX_TIMESTAMP()`;
157+
let $upper_bound= `SELECT $stop - $start`;
158+
let $assert_text= Seconds_Behind_Master must be between 2 and upper_bound;
159+
let $assert_cond= 2 <= $sbm AND $sbm <= $upper_bound;
160+
--source include/assert.inc
161+
162+
connection slave2;
163+
UNLOCK TABLES;
164+
165+
--connection master
166+
--source include/sync_slave_sql_with_master.inc
167+
168+
# All workers should be done by now.
169+
let $wait_condition= SELECT count(*) = 3 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for an event from Coordinator';
170+
--source include/wait_condition.inc
171+
--source include/wait_for_mts_checkpoint.inc
172+
173+
let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);
174+
let $assert_text= Seconds_Behind_Master must be 0;
175+
let $assert_cond= $sbm = 0;
176+
--source include/assert.inc
177+
178+
###############################################################################
179+
# Scenario 3: Now there are 3 workers working in pararell. Two workers working
180+
# on table from the same database and a 3rd worker working on another database
181+
###############################################################################
182+
--echo # Scenario 3: With three workers, check if Seconds_Behind_Master is
183+
--echo # calculated correctly when two workers working on table from the same
184+
--echo # database and 3rd worker working on another database.
185+
186+
connection master;
187+
CREATE DATABASE second_test;
188+
CREATE TABLE second_test.t3 (f1 INT);
189+
190+
# Make sure there is a parent commit
191+
INSERT INTO test.t1 SET a=1;
192+
--sync_slave_with_master
193+
194+
# Lock all tables to block all workers.
195+
connection slave1;
196+
LOCK TABLE test.t1 WRITE;
197+
198+
connection slave2;
199+
LOCK TABLE test.t2 WRITE;
200+
201+
connection slave3;
202+
LOCK TABLE second_test.t3 WRITE;
203+
204+
connect (master_second_test, localhost, root, , );
205+
206+
--connection master_second_test
207+
BEGIN;
208+
INSERT INTO second_test.t3 VALUES (1);
209+
210+
--connection master1
211+
let $start= `SELECT UNIX_TIMESTAMP()`;
212+
BEGIN;
213+
INSERT INTO test.t1 SET a=2;
214+
215+
--connection master2
216+
BEGIN;
217+
INSERT INTO test.t2 SET a=3;
218+
219+
--connection master_second_test
220+
COMMIT;
221+
--connection master1
222+
COMMIT;
223+
--connection master2
224+
COMMIT;
225+
226+
--source include/sync_slave_io_with_master.inc
227+
228+
# Wait for all workers to be queued
229+
# There should be two workers waiting for locks on test database
230+
let $wait_condition= SELECT count(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'
231+
AND Info LIKE 'INSERT INTO test%';
232+
--source include/wait_condition.inc
233+
234+
# And one worker waiting for the lock on second_database
235+
let $wait_condition= SELECT count(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'
236+
AND Info LIKE 'INSERT INTO second_test%';
237+
--source include/wait_condition.inc
238+
239+
# Make sure that there are 3 overall at the same point in time
240+
let $wait_condition= SELECT count(*) = 3 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock';
241+
--source include/wait_condition.inc
242+
let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event'
243+
OR State = 'update';
244+
--source include/wait_condition.inc
245+
246+
# Wait for 2 seconds so Second_Behind_Master is at least 2
247+
--real_sleep 2
248+
--source include/wait_for_mts_checkpoint.inc
249+
250+
let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);
251+
let $stop= `SELECT UNIX_TIMESTAMP()`;
252+
let $upper_bound= `SELECT $stop - $start`;
253+
let $assert_text= Seconds_Behind_Master must be between 2 and upper_bound;
254+
let $assert_cond= 2 <= $sbm AND $sbm <= $upper_bound;
255+
--source include/assert.inc
256+
257+
# Lets finish concurrent write on second_test and check if workers on test report
258+
# correct Seconds_Behind_Master
259+
connection slave3;
260+
UNLOCK TABLES;
261+
262+
--real_sleep 1
263+
264+
connection slave;
265+
# Wait till worker on second_test db finishes
266+
let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'
267+
AND Info LIKE 'INSERT INTO second_test%';
268+
--source include/wait_condition.inc
269+
270+
# There should be the remaining two workers hanging on test db
271+
let $wait_condition= SELECT count(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'
272+
AND Info LIKE 'INSERT INTO test%';
273+
--source include/wait_condition.inc
274+
let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event'
275+
OR State = 'update';
276+
--source include/wait_condition.inc
277+
--source include/wait_for_mts_checkpoint.inc
278+
279+
let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);
280+
let $stop= `SELECT UNIX_TIMESTAMP()`;
281+
let $upper_bound= `SELECT $stop - $start`;
282+
let $assert_text= Seconds_Behind_Master must be between 3 and upper_bound;
283+
let $assert_cond= 3 <= $sbm AND $sbm <= $upper_bound;
284+
--source include/assert.inc
285+
286+
# Lets finish worker writing to t1
287+
connection slave1;
288+
UNLOCK TABLES;
289+
290+
# There should be only one worker left
291+
let $wait_condition= SELECT count(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock';
292+
--source include/wait_condition.inc
293+
let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event'
294+
OR State = 'update';
295+
--source include/wait_condition.inc
296+
297+
--real_sleep 1
298+
299+
--source include/wait_for_mts_checkpoint.inc
300+
301+
let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);
302+
let $stop= `SELECT UNIX_TIMESTAMP()`;
303+
let $upper_bound= `SELECT $stop - $start`;
304+
let $assert_text= Seconds_Behind_Master must be between 4 and upper_bound;
305+
let $assert_cond= 4 <= $sbm AND $sbm <= $upper_bound;
306+
--source include/assert.inc
307+
308+
# Release last worker
309+
connection slave2;
310+
UNLOCK TABLE;
311+
312+
# Wait for all workers to be done
313+
let $wait_condition= SELECT count(*) = 3 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for an event from Coordinator';
314+
--source include/wait_condition.inc
315+
316+
# All catched up Seconds_Behind_Master should be 0. Lets wait some more to see if it does not increase
317+
# Seconds_Behind_Master
318+
319+
--real_sleep 1
320+
--source include/wait_for_mts_checkpoint.inc
321+
322+
let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1);
323+
let $assert_text= Seconds_Behind_Master must be 0;
324+
let $assert_cond= $sbm = 0;
325+
--source include/assert.inc
326+
327+
#
328+
# Cleanup
329+
#
330+
connection master;
331+
DROP TABLE test.t1;
332+
DROP TABLE test.t2;
333+
DROP DATABASE second_test;
334+
335+
--source include/sync_slave_sql_with_master.inc
336+
--source include/stop_slave.inc

0 commit comments

Comments
 (0)