Skip to content

Commit 0950bcb

Browse files
author
Venkatesh Duggirala
committed
Bug #17026898 PREVIOUS GTID EVENT IS NOT WRITTEN WHEN BINLOG IS ROTATED VIA
SIGHUP Problem: When Binlog is rotated via SIGHUP signal, the newly generated binlog does not contain previous gtid event which is very important for processing that binlog's gtid events later. If there are any transactions written to this binlog then on next restart, while server is processing available binary logs, it was failing with following error: "The first global transaction identifier was read, but no other information regarding identifiers existing on the previous log files was found." and the server refuses to start. Or If the new GTID transactions which were written to this new binlog are replicated, Slave gets confused after seeing a GTID event without a previous_gtid_event and enters into "Fatal 1236" error. Analysis: SIGHUP siganl causes the server to reload the grant tables and to flush tables, logs, the thread cache, and the host cache. As part of flush logs, server rotates binary log as well. When server receives SIGHUP signal, it calls reload_acl_and_cache and which eventually executes the following code to write PREVIOUS_GTID_EVENT. if (current_thd && gtid_mode > 0) { /* write previous gtid event */ } And current_thd is NULL when server reaches this code through signal handler. Hence the newly generated binary log is not containing previous gtid event which resulted in reported issue at the time of restart. Fix: If reload_acl_and_cache() is called from SIGHUP handler, then allocate temporary THD before execution of binary log rotation function. The same above problem can be seen with relay log as well. Hence this temporary THD will be allocated even before relay log rotation function. And delete the THD object after finishing the task.
1 parent 3fad85b commit 0950bcb

File tree

4 files changed

+214
-24
lines changed

4 files changed

+214
-24
lines changed

mysql-test/include/wait_show_condition.inc

+6-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#
1010
# USAGE
1111
#
12-
# All rows of the result must fulfil the condition if $all_rows_fulfil is 1
12+
# All rows of the result must fulfil the condition if $wait_for_all is 1
1313
# else at least one of the result must fulfil the condition.
1414
# let $wait_for_all= 1;
1515
# let $show_statement= SHOW PROCESSLIST;
@@ -87,6 +87,11 @@ if ($wait_for_all != 1)
8787
# We are behind the last row of the result set.
8888
let $process_result= 0;
8989
let $do_loop= 0;
90+
# For string fields and for "!=" comparision, it is possible that
91+
# above if condition (i.e., SELECT '$field_value' $condition) is
92+
# true and the current if condition is also true. So set $found=0
93+
# here explicitly
94+
let $found= 0;
9095
}
9196
inc $rowno;
9297
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
include/master-slave.inc
2+
Warnings:
3+
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
4+
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
5+
[connection master]
6+
CREATE TABLE pid_table(pid_no INT);
7+
[connection server_1]
8+
LOAD DATA LOCAL INFILE 'pid_file' INTO TABLE pid_table;
9+
DELETE FROM pid_table;
10+
include/sync_slave_sql_with_master.inc
11+
[connection server_1]
12+
include/show_binlog_events.inc
13+
Log_name Pos Event_type Server_id End_log_pos Info
14+
master-bin.000002 # Previous_gtids # # Gtid_set
15+
master-bin.000002 # Gtid # # SET @@SESSION.GTID_NEXT= 'Gtid_set'
16+
master-bin.000002 # Query # # BEGIN
17+
master-bin.000002 # Query # # use `test`; DELETE FROM pid_table
18+
master-bin.000002 # Query # # COMMIT
19+
include/rpl_stop_server.inc [server_number=1]
20+
include/rpl_start_server.inc [server_number=1 gtids=on]
21+
[connection server_2]
22+
LOAD DATA LOCAL INFILE 'pid_file' INTO TABLE pid_table;
23+
[connection server_1]
24+
DELETE FROM pid_table;
25+
include/sync_slave_sql_with_master.inc
26+
include/show_relaylog_events.inc
27+
Log_name Pos Event_type Server_id End_log_pos Info
28+
slave-relay-bin.000006 # Previous_gtids # # Gtid_set
29+
slave-relay-bin.000006 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
30+
slave-relay-bin.000006 # Gtid # # SET @@SESSION.GTID_NEXT= 'Gtid_set'
31+
slave-relay-bin.000006 # Query # # BEGIN
32+
slave-relay-bin.000006 # Query # # use `test`; DELETE FROM pid_table
33+
slave-relay-bin.000006 # Query # # COMMIT
34+
include/rpl_stop_server.inc [server_number=2]
35+
include/rpl_start_server.inc [server_number=2 gtids=on]
36+
include/rpl_start_slaves.inc
37+
[connection server_1]
38+
DROP TABLE pid_table;
39+
include/sync_slave_sql_with_master.inc
40+
include/rpl_end.inc
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
###############################################################################
2+
# Bug #17026898 PREVIOUS GTID EVENT IS NOT WRITTEN WHEN BINLOG IS ROTATED VIA
3+
# SIGHUP
4+
5+
# Problem: When Bin(Relay)log is rotated via SIGHUP signal, the newly generated
6+
# binlog does not contain previous gtid event which is very important
7+
# for processing that bin(relay)log's gtid events later.
8+
9+
# Fix: If reload_acl_and_cache() (which does rotation of Bin(Relay) log)
10+
# is called from SIGHUP handler, then allocate temporary THD for
11+
# execution of rotate bin(relay)log.
12+
13+
# Steps to reproduce the issue:
14+
# 1) Get the server pid
15+
# 2) Send Kill -1 signal (SIGHUP) signal to server pid
16+
# 3) Wait until rotation is done
17+
# 4) Verify the newly generated log contains prev_gtid_event
18+
# 5) Restart the server to see the processing of the new log is not an issue.
19+
# 6) Verify that Replication works fine at the end of the scenario
20+
21+
# Execute the same steps on both Master and Slave do prove that
22+
# a) no problem in binary log rotation (i.e., prev_gtids_event exists)
23+
# b) no problem in relay log rotation (i.e., prev_gtids event exists)
24+
# c) no problem in restarting master
25+
# d) no problem in restarting slave
26+
# e) Replication works fine after receiving SIGHUP.
27+
###############################################################################
28+
29+
# Test involves sending SIGHUP signal using kill linux cmd
30+
--source include/linux.inc
31+
32+
# Problem appears only with gtid
33+
--source include/have_gtid.inc
34+
35+
# Testing SIGHUP behaviour with one mode is enough
36+
--source include/have_binlog_format_statement.inc
37+
--source include/master-slave.inc
38+
CREATE TABLE pid_table(pid_no INT);
39+
40+
41+
# Execute above mentioned steps in two iterations
42+
# Iteration 1 : Master and Iteration 2: Slave
43+
--let $_rpl_server= 1
44+
while ($_rpl_server <= 2)
45+
{
46+
--let $rpl_connection_name= server_$_rpl_server
47+
--source include/rpl_connection.inc
48+
--let $pid_file=`SELECT @@pid_file`
49+
--replace_result $pid_file pid_file
50+
--eval LOAD DATA LOCAL INFILE '$pid_file' INTO TABLE pid_table
51+
# Step1: Get server pid
52+
--let $server_pid=`SELECT pid_no FROM pid_table`
53+
54+
# Get current master binlog name
55+
if ($_rpl_server == 1)
56+
{
57+
--let $before_sighup_log_name=query_get_value(SHOW MASTER STATUS, File, 1)
58+
}
59+
if ($_rpl_server == 2)
60+
{
61+
--let $before_sighup_log_name=query_get_value(SHOW SLAVE STATUS, Relay_Log_File, 1)
62+
}
63+
64+
# Step2: send SIGHUP(signal 1) to server which will rotate bin(relay)log
65+
--exec kill -1 $server_pid
66+
67+
# Step3: Wait until signal handler does required work
68+
# (i.e., completes rotation of binary/relay log) after receiving SIGHUP signal.
69+
if ($_rpl_server == 1)
70+
{
71+
--let $show_statement=SHOW MASTER STATUS
72+
--let $field=File
73+
}
74+
if ($_rpl_server == 2)
75+
{
76+
--let $show_statement=SHOW SLAVE STATUS
77+
--let $field=Relay_Log_File
78+
}
79+
--let $condition= <> '$before_sighup_log_name'
80+
--source include/wait_show_condition.inc
81+
82+
# Write something to newly generated binary log/relay log
83+
if ($_rpl_server == 2)
84+
{
85+
--let $rpl_connection_name= server_1
86+
--source include/rpl_connection.inc
87+
}
88+
DELETE FROM pid_table;
89+
--source include/sync_slave_sql_with_master.inc
90+
if ($_rpl_server == 1)
91+
{
92+
--let $rpl_connection_name= server_1
93+
--source include/rpl_connection.inc
94+
}
95+
# Step 4:Show that newly generated binary/relaylog has previous gtid event as well
96+
--let $keep_gtid_events= 1
97+
--let $binlog_file= LAST
98+
if ($_rpl_server == 1)
99+
{
100+
--source include/show_binlog_events.inc
101+
}
102+
if ($_rpl_server == 2)
103+
{
104+
--source include/show_relaylog_events.inc
105+
}
106+
# Step5: Restart server to make sure that
107+
# newly generated binary log/relay log does not cause any issues
108+
--let $rpl_server_number= $_rpl_server
109+
--source include/rpl_stop_server.inc
110+
--let $rpl_start_with_gtids= 1
111+
--source include/rpl_start_server.inc
112+
if ($_rpl_server == 2)
113+
{
114+
--source include/rpl_start_slaves.inc
115+
}
116+
--inc $_rpl_server
117+
}
118+
119+
# Now just make sure replication works fine
120+
--let $rpl_connection_name= server_1
121+
--source include/rpl_connection.inc
122+
DROP TABLE pid_table;
123+
--source include/sync_slave_sql_with_master.inc
124+
125+
--source include/rpl_end.inc

sql/sql_reload.cc

+43-23
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2010, 2014, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -139,35 +139,55 @@ bool reload_acl_and_cache(THD *thd, unsigned long options,
139139
if (options & REFRESH_ENGINE_LOG)
140140
if (ha_flush_logs(NULL))
141141
result= 1;
142-
143-
if (options & REFRESH_BINARY_LOG)
142+
if ((options & REFRESH_BINARY_LOG) || (options & REFRESH_RELAY_LOG ))
144143
{
145144
/*
146-
Writing this command to the binlog may result in infinite loops
147-
when doing mysqlbinlog|mysql, and anyway it does not really make
148-
sense to log it automatically (would cause more trouble to users
149-
than it would help them)
150-
*/
151-
tmp_write_to_binlog= 0;
152-
if (mysql_bin_log.is_open())
145+
If reload_acl_and_cache() is called from SIGHUP handler we have to
146+
allocate temporary THD for execution of binlog/relay log rotation.
147+
*/
148+
THD *tmp_thd= 0;
149+
if (!thd && (thd= (tmp_thd= new THD)))
153150
{
154-
if (mysql_bin_log.rotate_and_purge(true))
155-
*write_to_binlog= -1;
151+
thd->thread_stack= (char *) (&tmp_thd);
152+
thd->store_globals();
156153
}
157-
}
158-
if (options & REFRESH_RELAY_LOG)
159-
{
160-
#ifdef HAVE_REPLICATION
161-
mysql_mutex_lock(&LOCK_active_mi);
162-
if (active_mi != NULL)
154+
155+
if (options & REFRESH_BINARY_LOG)
163156
{
164-
mysql_mutex_lock(&active_mi->data_lock);
165-
if (rotate_relay_log(active_mi))
166-
*write_to_binlog= -1;
167-
mysql_mutex_unlock(&active_mi->data_lock);
157+
/*
158+
Writing this command to the binlog may result in infinite loops
159+
when doing mysqlbinlog|mysql, and anyway it does not really make
160+
sense to log it automatically (would cause more trouble to users
161+
than it would help them)
162+
*/
163+
tmp_write_to_binlog= 0;
164+
if (mysql_bin_log.is_open())
165+
{
166+
if (mysql_bin_log.rotate_and_purge(true))
167+
*write_to_binlog= -1;
168+
}
168169
}
169-
mysql_mutex_unlock(&LOCK_active_mi);
170+
if (options & REFRESH_RELAY_LOG)
171+
{
172+
#ifdef HAVE_REPLICATION
173+
mysql_mutex_lock(&LOCK_active_mi);
174+
if (active_mi != NULL)
175+
{
176+
mysql_mutex_lock(&active_mi->data_lock);
177+
if (rotate_relay_log(active_mi))
178+
*write_to_binlog= -1;
179+
mysql_mutex_unlock(&active_mi->data_lock);
180+
}
181+
mysql_mutex_unlock(&LOCK_active_mi);
170182
#endif
183+
}
184+
if (tmp_thd)
185+
{
186+
delete tmp_thd;
187+
/* Remember that we don't have a THD */
188+
my_pthread_setspecific_ptr(THR_THD, 0);
189+
thd= 0;
190+
}
171191
}
172192
#ifdef HAVE_QUERY_CACHE
173193
if (options & REFRESH_QUERY_CACHE_FREE)

0 commit comments

Comments
 (0)