Skip to content

Commit 78f25d2

Browse files
committed
Bug#26997096: RELAY_LOG_SPACE IS INACCURATE AND LEAKS
Problem ------- The Relay_Log_Space variable shown in SHOW SLAVE STATUS is sometimes much higher than the actual disk space used by relay logs. Analysis -------- This is because we are not writing to Relay_log_info::log_space_total in a synchronized manner. i.e, no lock is being taken by the IO thread while updating the variable. Fix --- The Relay_log_info::log_space_total is now guarded by the Relay_log_info::log_space_lock and this protects concurrent update on Relay_log_info::log_space_total.
1 parent c1a7c10 commit 78f25d2

8 files changed

+136
-28
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
include/master-slave.inc
2+
Warnings:
3+
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
4+
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
5+
[connection master]
6+
CREATE TABLE t1 (i INT);
7+
include/sync_slave_sql_with_master.inc
8+
include/stop_slave_sql.inc
9+
SET GLOBAL debug='+d,wait_in_purge_index_entry';
10+
FLUSH LOCAL RELAY LOGS;
11+
START SLAVE SQL_THREAD;
12+
SET DEBUG_SYNC="now WAIT_FOR in_purge_index_entry";
13+
[connection master]
14+
INSERT INTO t1 VALUES (1);
15+
[connection slave]
16+
include/assert.inc [IO Thread is waiting for Relay_log_info::log_space_lock.]
17+
SET DEBUG_SYNC="now SIGNAL go_ahead_sql";
18+
[connection master]
19+
DROP TABLE t1;
20+
include/sync_slave_sql_with_master.inc
21+
SET GLOBAL debug='-d,wait_in_purge_index_entry';
22+
SET DEBUG_SYNC="RESET";
23+
include/rpl_end.inc
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# === Purpose ===
2+
#
3+
# This test verifies that SQL and IO threads update the
4+
# rli->relay_log_space_total variable in a synchronized manner.
5+
#
6+
# === Implementation ===
7+
#
8+
# 1. On Slave, make the SQL thread to purge relay logs and halt
9+
# the SQL thread in MYSQL_BIN_LOG::purge_index_entry function
10+
# using debug sync utility.
11+
# 2. Do a DML on master so that IO thread calls queue_event and
12+
# updates rli->relay_log_space_total.
13+
# 3. Verify that IO thread is waiting rli->log_space_lock until it is released
14+
# by SQL thread.
15+
#
16+
# === References ===
17+
#
18+
# Bug#26997096 RELAY_LOG_SPACE IS INACCURATE AND LEAKS
19+
20+
# This test case is binlog_format agnostic
21+
--source include/have_binlog_format_row.inc
22+
# This test case uses debug_sync
23+
--source include/have_debug.inc
24+
--source include/have_debug_sync.inc
25+
--source include/master-slave.inc
26+
27+
CREATE TABLE t1 (i INT);
28+
29+
--source include/sync_slave_sql_with_master.inc
30+
--source include/stop_slave_sql.inc
31+
SET GLOBAL debug='+d,wait_in_purge_index_entry';
32+
33+
# Create a new relay log so the START SLAVE SQL_THREAD tries
34+
# to purge the old relay logs and hits the debug point.
35+
FLUSH LOCAL RELAY LOGS;
36+
START SLAVE SQL_THREAD;
37+
SET DEBUG_SYNC="now WAIT_FOR in_purge_index_entry";
38+
39+
# Do a DML on master so that IO thread calls queue_event and
40+
# updates rli->relay_log_space_total.
41+
--source include/rpl_connection_master.inc
42+
INSERT INTO t1 VALUES (1);
43+
--source include/rpl_connection_slave.inc
44+
45+
# Wait until IO thread tries to take a log_space_lock.
46+
--let $io_thread_id = `SELECT THREAD_ID FROM performance_schema.threads WHERE NAME like '%slave_io%'`
47+
--let $wait_condition= SELECT EVENT_NAME= 'wait/synch/mutex/sql/Relay_log_info::log_space_lock' FROM performance_schema.events_waits_current WHERE THREAD_ID=$io_thread_id
48+
--source include/wait_condition.inc
49+
50+
# Since SQL thread has taken the rli->log_space_lock, IO thread should wait until
51+
# the lock is released. Assert that IO thread is waiting for rli->log_space_lock.
52+
--let $assert_text= IO Thread is waiting for Relay_log_info::log_space_lock.
53+
--let $assert_cond= "[SELECT EVENT_NAME FROM performance_schema.events_waits_current WHERE THREAD_ID=$io_thread_id]" = "wait/synch/mutex/sql/Relay_log_info::log_space_lock"
54+
--source include/assert.inc
55+
56+
SET DEBUG_SYNC="now SIGNAL go_ahead_sql";
57+
58+
# Cleanup
59+
--source include/rpl_connection_master.inc
60+
DROP TABLE t1;
61+
--source include/sync_slave_sql_with_master.inc
62+
SET GLOBAL debug='-d,wait_in_purge_index_entry';
63+
SET DEBUG_SYNC="RESET";
64+
--source include/rpl_end.inc

sql/binlog.cc

+25
Original file line numberDiff line numberDiff line change
@@ -4843,6 +4843,13 @@ int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *decrease_log_space,
48434843
DBUG_PRINT("info",("purging %s",log_info.log_file_name));
48444844
if (!mysql_file_delete(key_file_binlog, log_info.log_file_name, MYF(0)))
48454845
{
4846+
DBUG_EXECUTE_IF("wait_in_purge_index_entry",
4847+
{
4848+
const char action[] = "now SIGNAL in_purge_index_entry WAIT_FOR go_ahead_sql";
4849+
DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(action)));
4850+
DBUG_SET("-d,wait_in_purge_index_entry");
4851+
};);
4852+
48464853
if (decrease_log_space)
48474854
*decrease_log_space-= s.st_size;
48484855
}
@@ -6412,6 +6419,24 @@ void MYSQL_BIN_LOG::close(uint exiting, bool need_lock_log,
64126419
DBUG_VOID_RETURN;
64136420
}
64146421

6422+
void MYSQL_BIN_LOG::harvest_bytes_written(Relay_log_info* rli, bool need_log_space_lock)
6423+
{
6424+
#ifndef DBUG_OFF
6425+
char buf1[22],buf2[22];
6426+
#endif
6427+
DBUG_ENTER("harvest_bytes_written");
6428+
if (need_log_space_lock)
6429+
mysql_mutex_lock(&rli->log_space_lock);
6430+
else
6431+
mysql_mutex_assert_owner(&rli->log_space_lock);
6432+
rli->log_space_total+= bytes_written;
6433+
DBUG_PRINT("info",("relay_log_space: %s bytes_written: %s",
6434+
llstr(rli->log_space_total,buf1), llstr(bytes_written,buf2)));
6435+
bytes_written=0;
6436+
if (need_log_space_lock)
6437+
mysql_mutex_unlock(&rli->log_space_lock);
6438+
DBUG_VOID_RETURN;
6439+
}
64156440

64166441
void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg)
64176442
{

sql/binlog.h

+1-12
Original file line numberDiff line numberDiff line change
@@ -543,18 +543,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
543543
{
544544
bytes_written = 0;
545545
}
546-
void harvest_bytes_written(ulonglong* counter)
547-
{
548-
#ifndef DBUG_OFF
549-
char buf1[22],buf2[22];
550-
#endif
551-
DBUG_ENTER("harvest_bytes_written");
552-
(*counter)+=bytes_written;
553-
DBUG_PRINT("info",("counter: %s bytes_written: %s", llstr(*counter,buf1),
554-
llstr(bytes_written,buf2)));
555-
bytes_written=0;
556-
DBUG_VOID_RETURN;
557-
}
546+
void harvest_bytes_written(Relay_log_info *rli, bool need_log_space_lock);
558547
void set_max_size(ulong max_size_arg);
559548
void signal_update();
560549
int wait_for_update_relay_log(THD* thd, const struct timespec * timeout);

sql/rpl_rli.cc

+9-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2006, 2018, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -344,6 +344,7 @@ static inline int add_relay_log(Relay_log_info* rli,LOG_INFO* linfo)
344344
{
345345
MY_STAT s;
346346
DBUG_ENTER("add_relay_log");
347+
mysql_mutex_assert_owner(&rli->log_space_lock);
347348
if (!mysql_file_stat(key_file_relaylog,
348349
linfo->log_file_name, &s, MYF(0)))
349350
{
@@ -363,23 +364,29 @@ int Relay_log_info::count_relay_log_space()
363364
{
364365
LOG_INFO flinfo;
365366
DBUG_ENTER("Relay_log_info::count_relay_log_space");
367+
mysql_mutex_lock(&log_space_lock);
366368
log_space_total= 0;
367369
if (relay_log.find_log_pos(&flinfo, NullS, 1))
368370
{
369371
sql_print_error("Could not find first log while counting relay log space.");
372+
mysql_mutex_unlock(&log_space_lock);
370373
DBUG_RETURN(1);
371374
}
372375
do
373376
{
374377
if (add_relay_log(this, &flinfo))
378+
{
379+
mysql_mutex_unlock(&log_space_lock);
375380
DBUG_RETURN(1);
381+
}
376382
} while (!relay_log.find_next_log(&flinfo, 1));
377383
/*
378384
As we have counted everything, including what may have written in a
379385
preceding write, we must reset bytes_written, or we may count some space
380386
twice.
381387
*/
382388
relay_log.reset_bytes_written();
389+
mysql_mutex_unlock(&log_space_lock);
383390
DBUG_RETURN(0);
384391
}
385392

@@ -1995,7 +2002,7 @@ void Relay_log_info::end_info()
19952002
relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT,
19962003
true/*need_lock_log=true*/,
19972004
true/*need_lock_index=true*/);
1998-
relay_log.harvest_bytes_written(&log_space_total);
2005+
relay_log.harvest_bytes_written(this, true/*need_log_space_lock=true*/);
19992006
/*
20002007
Delete the slave's temporary tables from memory.
20012008
In the future there will be other actions than this, to ensure persistance

sql/rpl_slave.cc

+11-11
Original file line numberDiff line numberDiff line change
@@ -2807,7 +2807,7 @@ static bool wait_for_relay_log_space(Relay_log_info* rli)
28072807
if (rli->sql_force_rotate_relay)
28082808
{
28092809
mysql_mutex_lock(&mi->data_lock);
2810-
rotate_relay_log(mi);
2810+
rotate_relay_log(mi, false/*need_log_space_lock=false*/);
28112811
mysql_mutex_unlock(&mi->data_lock);
28122812
rli->sql_force_rotate_relay= false;
28132813
}
@@ -2861,7 +2861,7 @@ static int write_ignored_events_info_to_relay_log(THD *thd, Master_info *mi)
28612861
"failed to write a Rotate event"
28622862
" to the relay log, SHOW SLAVE STATUS may be"
28632863
" inaccurate");
2864-
rli->relay_log.harvest_bytes_written(&rli->log_space_total);
2864+
rli->relay_log.harvest_bytes_written(rli, true/*need_log_space_lock=true*/);
28652865
if (flush_master_info(mi, TRUE))
28662866
{
28672867
error= 1;
@@ -6560,7 +6560,7 @@ static int process_io_create_file(Master_info* mi, Create_file_log_event* cev)
65606560
"error writing Exec_load event to relay log");
65616561
goto err;
65626562
}
6563-
mi->rli->relay_log.harvest_bytes_written(&mi->rli->log_space_total);
6563+
mi->rli->relay_log.harvest_bytes_written(mi->rli, true/*need_log_space_lock=true*/);
65646564
break;
65656565
}
65666566
if (unlikely(cev_not_written))
@@ -6575,7 +6575,7 @@ static int process_io_create_file(Master_info* mi, Create_file_log_event* cev)
65756575
goto err;
65766576
}
65776577
cev_not_written=0;
6578-
mi->rli->relay_log.harvest_bytes_written(&mi->rli->log_space_total);
6578+
mi->rli->relay_log.harvest_bytes_written(mi->rli, true/*need_log_space_lock=true*/);
65796579
}
65806580
else
65816581
{
@@ -6589,7 +6589,7 @@ static int process_io_create_file(Master_info* mi, Create_file_log_event* cev)
65896589
"error writing Append_block event to relay log");
65906590
goto err;
65916591
}
6592-
mi->rli->relay_log.harvest_bytes_written(&mi->rli->log_space_total);
6592+
mi->rli->relay_log.harvest_bytes_written(mi->rli, true/*need_log_space_lock=true*/);
65936593
}
65946594
}
65956595
}
@@ -6659,7 +6659,7 @@ static int process_io_rotate(Master_info *mi, Rotate_log_event *rev)
66596659
Rotate the relay log makes binlog format detection easier (at next slave
66606660
start or mysqlbinlog)
66616661
*/
6662-
int ret= rotate_relay_log(mi);
6662+
int ret= rotate_relay_log(mi, true/*need_log_space_lock=true*/);
66636663
DBUG_RETURN(ret);
66646664
}
66656665

@@ -6773,7 +6773,7 @@ static int queue_binlog_ver_1_event(Master_info *mi, const char *buf,
67736773
delete ev;
67746774
DBUG_RETURN(1);
67756775
}
6776-
rli->relay_log.harvest_bytes_written(&rli->log_space_total);
6776+
rli->relay_log.harvest_bytes_written(rli, true/*need_log_space_lock=true*/);
67776777
}
67786778
delete ev;
67796779
mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
@@ -6831,7 +6831,7 @@ static int queue_binlog_ver_3_event(Master_info *mi, const char *buf,
68316831
delete ev;
68326832
DBUG_RETURN(1);
68336833
}
6834-
rli->relay_log.harvest_bytes_written(&rli->log_space_total);
6834+
rli->relay_log.harvest_bytes_written(rli, true/*need_log_space_lock=true*/);
68356835
delete ev;
68366836
mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
68376837
err:
@@ -7365,7 +7365,7 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
73657365
{
73667366
mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
73677367
DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
7368-
rli->relay_log.harvest_bytes_written(&rli->log_space_total);
7368+
rli->relay_log.harvest_bytes_written(rli, true/*need_log_space_lock=true*/);
73697369
}
73707370
else
73717371
{
@@ -8248,7 +8248,7 @@ event(errno: %d cur_log->error: %d)",
82488248
is void).
82498249
*/
82508250

8251-
int rotate_relay_log(Master_info* mi)
8251+
int rotate_relay_log(Master_info* mi, bool need_log_space_lock)
82528252
{
82538253
DBUG_ENTER("rotate_relay_log");
82548254

@@ -8286,7 +8286,7 @@ int rotate_relay_log(Master_info* mi)
82868286
If the log is closed, then this will just harvest the last writes, probably
82878287
0 as they probably have been harvested.
82888288
*/
8289-
rli->relay_log.harvest_bytes_written(&rli->log_space_total);
8289+
rli->relay_log.harvest_bytes_written(rli, need_log_space_lock);
82908290
end:
82918291
DBUG_RETURN(error);
82928292
}

sql/rpl_slave.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -326,7 +326,7 @@ void init_thread_mask(int* mask,Master_info* mi,bool inverse);
326326
void set_slave_thread_options(THD* thd);
327327
void set_slave_thread_default_charset(THD *thd, Relay_log_info const *rli);
328328
int apply_event_and_update_pos(Log_event* ev, THD* thd, Relay_log_info* rli);
329-
int rotate_relay_log(Master_info* mi);
329+
int rotate_relay_log(Master_info* mi, bool need_log_space_lock);
330330

331331
pthread_handler_t handle_slave_io(void *arg);
332332
pthread_handler_t handle_slave_sql(void *arg);

sql/sql_reload.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ bool reload_acl_and_cache(THD *thd, unsigned long options,
177177
if (active_mi != NULL)
178178
{
179179
mysql_mutex_lock(&active_mi->data_lock);
180-
if (rotate_relay_log(active_mi))
180+
if (rotate_relay_log(active_mi, true/*need_log_space_lock=true*/))
181181
*write_to_binlog= -1;
182182
mysql_mutex_unlock(&active_mi->data_lock);
183183
}

0 commit comments

Comments
 (0)