Skip to content

Commit 5e924e0

Browse files
BUG#26886646 - SEGMENTATION FAULT AT TASK_DELETE() IN LIBMYSQLGCS/SRC/BINDINGS/XCOM/XCOM/TASK.C
Problem ---------------------------- A Segmentation fault was encountered on a daily-trunk pushbuild run for the test 'group_replication.gr_ssl_mode_verify_identity_error' when starting group replication with the below symptoms - group_replication.gr_ssl_mode_verify_identity_error w5 [ fail ] Test ended at 2017-09-27 17:37:18 CURRENT_TEST: group_replication.gr_ssl_mode_verify_identity_error mysqltest: At line 58: query 'START GROUP_REPLICATION' failed with wrong errno 2013: 'Lost connection to MySQL server during query', instead of 3092 Analysis ----------------------------- The test that fails sporadically is Step #2 of gr_ssl_mode_verify_identity_error. That step causes a global SSL comunication failure, in which not even the local connections can be established, thus causing the join operation to fail. When that happens, GCS will request the XCom thread termination. Since it does not have socket access, it will contact it directly calling xcom_fsm(exit), but this is a well-known non-thread safe operation, hence causing this crash. Fix ----------------------------- Create a new callback for GCS called should_exit, which is tested within task_loop, in order to check if we should exit the XCom thread.
1 parent 1591fab commit 5e924e0

File tree

9 files changed

+72
-7
lines changed

9 files changed

+72
-7
lines changed

rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_control_interface.cc

+1
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ static void *xcom_taskmain_startup(void *ptr)
8484
Gcs_xcom_proxy *proxy= gcs_ctrl->get_xcom_proxy();
8585
xcom_port port= gcs_ctrl->get_node_address()->get_member_port();
8686

87+
proxy->set_should_exit(false);
8788
proxy->xcom_init(port);
8889

8990
My_xp_thread_util::exit(0);

rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_interface.cc

+9
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ void cb_xcom_comms(int status);
7676
void cb_xcom_ready(int status);
7777
void cb_xcom_exit(int status);
7878
synode_no cb_xcom_get_app_snap(blob *gcs_snap);
79+
int cb_xcom_get_should_exit();
7980
void cb_xcom_handle_app_snap(blob *gcs_snap);
8081
int cb_xcom_socket_accept(int fd);
8182

@@ -881,6 +882,7 @@ initialize_xcom(const Gcs_interface_parameters &interface_params)
881882
::set_xcom_global_view_receiver(cb_xcom_receive_global_view);
882883
::set_port_matcher(cb_xcom_match_port);
883884
::set_app_snap_handler(cb_xcom_handle_app_snap);
885+
::set_should_exit_getter(cb_xcom_get_should_exit);
884886
::set_app_snap_getter(cb_xcom_get_app_snap);
885887
::set_xcom_run_cb(cb_xcom_ready);
886888
::set_xcom_comms_cb(cb_xcom_comms);
@@ -1579,6 +1581,13 @@ synode_no cb_xcom_get_app_snap(blob *gcs_snap MY_ATTRIBUTE((unused)))
15791581
return null_synode;
15801582
}
15811583

1584+
int cb_xcom_get_should_exit()
1585+
{
1586+
if (xcom_proxy)
1587+
return (int)xcom_proxy->get_should_exit();
1588+
else
1589+
return 0;
1590+
}
15821591

15831592
void cb_xcom_ready(int status MY_ATTRIBUTE((unused)))
15841593
{

rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_utils.cc

+18-3
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,8 @@ int Gcs_xcom_proxy_impl::xcom_exit(bool xcom_handlers_open)
298298
else if (!xcom_handlers_open)
299299
{
300300
/* The handlers were not yet open, so use basic xcom stop */
301-
::xcom_fsm(xa_exit, int_arg(0));
301+
this->set_should_exit(1);
302+
302303
res= false;
303304
}
304305

@@ -506,7 +507,8 @@ Gcs_xcom_proxy_impl::Gcs_xcom_proxy_impl()
506507
m_crl_file(),
507508
m_crl_path(),
508509
m_cipher(),
509-
m_tls_version()
510+
m_tls_version(),
511+
m_should_exit(false)
510512
{
511513
m_xcom_handlers= new Xcom_handler *[m_xcom_handlers_size];
512514

@@ -550,7 +552,8 @@ Gcs_xcom_proxy_impl::Gcs_xcom_proxy_impl(unsigned int wt)
550552
m_crl_file(),
551553
m_crl_path(),
552554
m_cipher(),
553-
m_tls_version()
555+
m_tls_version(),
556+
m_should_exit(false)
554557
{
555558
m_xcom_handlers= new Xcom_handler *[m_xcom_handlers_size];
556559

@@ -866,6 +869,18 @@ Gcs_xcom_proxy_impl::xcom_signal_comms_status_changed(int status)
866869
m_lock_xcom_comms_status.unlock();
867870
}
868871

872+
bool
873+
Gcs_xcom_proxy_impl::get_should_exit()
874+
{
875+
return m_should_exit.load(std::memory_order_relaxed);
876+
}
877+
878+
void
879+
Gcs_xcom_proxy_impl::set_should_exit(bool should_exit)
880+
{
881+
m_should_exit.store(should_exit, std::memory_order_relaxed);
882+
}
883+
869884
void Gcs_xcom_app_cfg::init()
870885
{
871886
::init_cfg_app_xcom();

rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_utils.h

+18
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#ifndef GCS_XCOM_UTILS_INCLUDED
1717
#define GCS_XCOM_UTILS_INCLUDED
1818

19+
#include <atomic>
1920
#include <string>
2021
#include <vector>
2122

@@ -636,6 +637,20 @@ class Gcs_xcom_proxy
636637

637638
virtual int xcom_force_nodes(Gcs_xcom_nodes &nodes,
638639
unsigned int group_id_hash)= 0;
640+
641+
/**
642+
Function that retrieves the value that signals that XCom
643+
must be forcefully stopped.
644+
645+
@return 1 if XCom needs to forcefully exit. 0 otherwise.
646+
*/
647+
virtual bool get_should_exit()= 0;
648+
649+
/**
650+
Function that sets the value that signals that XCom
651+
must be forcefully stopped.
652+
*/
653+
virtual void set_should_exit(bool should_exit)= 0;
639654
};
640655

641656

@@ -759,6 +774,8 @@ class Gcs_xcom_proxy_impl : public Gcs_xcom_proxy_base
759774
int xcom_client_force_config(node_list *nl, uint32_t group_id);
760775
int xcom_client_force_config(connection_descriptor *fd, node_list *nl,
761776
uint32_t group_id);
777+
bool get_should_exit();
778+
void set_should_exit(bool should_exit);
762779
private:
763780
/* A pointer to the next local XCom connection to use. */
764781
int m_xcom_handlers_cursor;
@@ -805,6 +822,7 @@ class Gcs_xcom_proxy_impl : public Gcs_xcom_proxy_base
805822
const char *m_cipher;
806823
const char *m_tls_version;
807824

825+
std::atomic_bool m_should_exit;
808826

809827
/*
810828
Disabling the copy constructor and assignment operator.

rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/task.c

+16-3
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@
7171
#include "plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/task_net.h"
7272
#include "plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/task_os.h"
7373
#include "plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_cfg.h"
74+
#include "plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/site_def.h"
75+
#include "plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.h"
7476

7577
#ifndef _WIN32
7678
#include <poll.h>
@@ -935,11 +937,22 @@ static int msdiff(double time) {
935937
return (int)(1000.5 * (first_delayed()->time - time));
936938
}
937939

940+
static should_exit_getter get_should_exit;
941+
942+
void set_should_exit_getter(should_exit_getter x) { get_should_exit = x; }
943+
938944
static double idle_time = 0.0;
939945
void task_loop() {
946+
task_env *t = 0;
940947
/* While there are tasks */
941948
for (;;) {
942-
task_env *t = first_runnable();
949+
//check forced exit callback
950+
if(get_should_exit())
951+
{
952+
xcom_fsm(xa_exit, int_arg(0));
953+
}
954+
955+
t = first_runnable();
943956
/* While runnable tasks */
944957
while (runnable_tasks()) {
945958
task_env *next = next_task(t);
@@ -1023,9 +1036,9 @@ static int init_sockaddr(char *server, struct sockaddr_in *sock_addr,
10231036
socklen_t *sock_size, xcom_port port) {
10241037
/* Get address of server */
10251038
struct addrinfo *addr = 0;
1026-
1039+
10271040
checked_getaddrinfo(server, 0, 0, &addr);
1028-
1041+
10291042
if (!addr) return 0;
10301043
/* Copy first address */
10311044
memcpy(sock_addr, addr->ai_addr, addr->ai_addrlen);

rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/task.h

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/x_platform.h"
2727
#include "plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_common.h"
2828

29+
2930
#ifdef __cplusplus
3031
extern "C" {
3132
#endif

rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ extern "C" {
2727
#include "plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_os_layer.h"
2828
#include "plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_os_layer.h"
2929
#include "plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xdr_utils.h"
30-
#include "plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xdr_utils.h"
3130

3231
#define XCOM_THREAD_DEBUG 1
3332

@@ -183,6 +182,9 @@ void set_xcom_run_cb(xcom_state_change_cb x);
183182
void set_xcom_terminate_cb(xcom_state_change_cb x);
184183
void set_xcom_exit_cb(xcom_state_change_cb x);
185184

185+
typedef int (*should_exit_getter)();
186+
void set_should_exit_getter(should_exit_getter x);
187+
186188
app_data_ptr init_config_with_group(app_data *a, node_list *nl, cargo_type type,
187189
uint32_t group_id);
188190

rapid/unittest/gunit/libmysqlgcs/xcom/gcs_xcom_communication_interface-t.cc

+3
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,9 @@ class mock_gcs_xcom_proxy : public Gcs_xcom_proxy_base
126126
uint32_t group_id));
127127
MOCK_METHOD2(xcom_client_force_config, int(node_list *nl,
128128
uint32_t group_id));
129+
130+
MOCK_METHOD0(get_should_exit, bool());
131+
MOCK_METHOD1(set_should_exit,void(bool should_exit));
129132
};
130133

131134

rapid/unittest/gunit/libmysqlgcs/xcom/gcs_xcom_group_management-t.cc

+3
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ class mock_gcs_xcom_proxy : public Gcs_xcom_proxy_base
9191
uint32_t group_id));
9292
MOCK_METHOD2(xcom_client_force_config, int(node_list *nl,
9393
uint32_t group_id));
94+
95+
MOCK_METHOD0(get_should_exit, bool());
96+
MOCK_METHOD1(set_should_exit,void(bool should_exit));
9497
};
9598

9699
class XcomGroupManagementTest : public GcsBaseTest

0 commit comments

Comments
 (0)