Skip to content

Commit bac0b46

Browse files
committed
Improves pgxc_ctl behavior to
1) When issuing pg_ctl start. Added -w option for better synchronization. 2) At pinging coordinator/datanode, when ping does not detect the node, retries three times with 100 millisecond interval just in case.
1 parent 6f57d03 commit bac0b46

File tree

3 files changed

+42
-18
lines changed

3 files changed

+42
-18
lines changed

contrib/pgxc_ctl/coord_cmd.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -275,12 +275,12 @@ cmd_t *prepare_initCoordinatorSlave(char *nodeName)
275275
* a master which can handle the request. So GTM should be running. We can test all of them by
276276
* single 'select 1' command.
277277
*/
278-
if (pingNode(aval(VAR_coordMasterServers)[idx], aval(VAR_coordPorts)[idx]) != 0)
278+
if (pingNode(aval(VAR_coordMasterServers)[idx], aval(VAR_coordPorts)[idx]) != -2)
279279
{
280280
/* Master is not running. Must start it first */
281281
appendCmdEl(cmdBuildDir, (cmdStartMaster = initCmd(aval(VAR_coordMasterServers)[idx])));
282282
snprintf(newCommand(cmdStartMaster), MAXLINE,
283-
"pg_ctl start -Z coordinator -D %s -o -i",
283+
"pg_ctl start -Z coordinator -D %s -o -i -w",
284284
aval(VAR_coordMasterDirs)[idx]);
285285
}
286286
/*
@@ -977,7 +977,7 @@ int add_coordinatorMaster(char *name, char *host, int port, int pooler, char *di
977977
aval(VAR_coordPorts)[0], aval(VAR_coordMasterServers)[0], pgdumpall_out);
978978

979979
/* Start the new coordinator */
980-
doImmediate(host, NULL, "pg_ctl start -Z restoremode -D %s -o -i", dir);
980+
doImmediate(host, NULL, "pg_ctl start -Z restoremode -D %s -o -i -w", dir);
981981

982982
/* Restore the backup */
983983
doImmediateRaw("psql -h %s -p %d -d %s -f %s", host, port, sval(VAR_defaultDatabase), pgdumpall_out);
@@ -1151,7 +1151,7 @@ int add_coordinatorSlave(char *name, char *host, char *dir, char *archDir)
11511151
doImmediate(aval(VAR_coordMasterServers)[idx], NULL,
11521152
"pg_ctl stop -Z coordinator -D %s -m fast", aval(VAR_coordMasterDirs)[idx]);
11531153
doImmediate(aval(VAR_coordMasterServers)[idx], NULL,
1154-
"pg_ctl start -Z coordinator -D %s", aval(VAR_coordMasterDirs)[idx]);
1154+
"pg_ctl start -Z coordinator -D %s -w", aval(VAR_coordMasterDirs)[idx]);
11551155
#if 0
11561156
/* pg_basebackup */
11571157
doImmediate(host, NULL, "pg_basebackup -p %s -h %s -D %s -x",
@@ -1239,7 +1239,7 @@ int add_coordinatorSlave(char *name, char *host, char *dir, char *archDir)
12391239
fclose(f);
12401240

12411241
/* Start the slave */
1242-
doImmediate(host, NULL, "pg_ctl start -Z coordinator -D %s", dir);
1242+
doImmediate(host, NULL, "pg_ctl start -Z coordinator -D %s -w", dir);
12431243
return 0;
12441244
}
12451245

@@ -1502,7 +1502,7 @@ cmd_t *prepare_startCoordinatorMaster(char *nodeName)
15021502
}
15031503
cmd = cmdPgCtl = initCmd(aval(VAR_coordMasterServers)[idx]);
15041504
snprintf(newCommand(cmdPgCtl), MAXLINE,
1505-
"pg_ctl start -Z coordinator -D %s -o -i",
1505+
"pg_ctl start -Z coordinator -D %s -o -i -w",
15061506
aval(VAR_coordMasterDirs)[idx]);
15071507
return(cmd);
15081508
}
@@ -1562,7 +1562,7 @@ cmd_t *prepare_startCoordinatorSlave(char *nodeName)
15621562
}
15631563
cmd = cmdPgCtlStart = initCmd(aval(VAR_coordSlaveServers)[idx]);
15641564
snprintf(newCommand(cmdPgCtlStart), MAXLINE,
1565-
"pg_ctl start -Z coordinator -D %s -o -i",
1565+
"pg_ctl start -Z coordinator -D %s -o -i -w",
15661566
aval(VAR_coordSlaveDirs)[idx]);
15671567

15681568
/* Postgresql.conf at the Master */

contrib/pgxc_ctl/datanode_cmd.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ cmd_t *prepare_startDatanodeMaster(char *nodeName)
370370
}
371371
cmdStartDatanodeMaster = initCmd(aval(VAR_datanodeMasterServers)[idx]);
372372
snprintf(newCommand(cmdStartDatanodeMaster), MAXLINE,
373-
"pg_ctl start -Z datanode -D %s -o -i", aval(VAR_datanodeMasterDirs)[idx]);
373+
"pg_ctl start -Z datanode -D %s -o -i -w", aval(VAR_datanodeMasterDirs)[idx]);
374374
return(cmdStartDatanodeMaster);
375375
}
376376

@@ -429,7 +429,7 @@ cmd_t *prepare_startDatanodeSlave(char *nodeName)
429429

430430
cmd = cmdStartDatanodeSlave = initCmd(aval(VAR_datanodeSlaveServers)[idx]);
431431
snprintf(newCommand(cmdStartDatanodeSlave), MAXLINE,
432-
"pg_ctl start -Z datanode -D %s",
432+
"pg_ctl start -Z datanode -D %s -w",
433433
aval(VAR_datanodeSlaveDirs)[idx]);
434434

435435
/* Change the master to synchronous mode */
@@ -986,7 +986,7 @@ int add_datanodeMaster(char *name, char *host, int port, char *dir)
986986
aval(VAR_datanodePorts)[0], aval(VAR_datanodeMasterServers)[0], pgdumpall_out);
987987

988988
/* Start the new datanode */
989-
doImmediate(host, NULL, "pg_ctl start -Z restoremode -D %s -o -i", dir);
989+
doImmediate(host, NULL, "pg_ctl start -Z restoremode -D %s -o -i -w", dir);
990990

991991
/* Restore the backup */
992992
doImmediateRaw("psql -h %s -p %d -d %s -f %s", host, port, sval(VAR_defaultDatabase), pgdumpall_out);
@@ -1156,7 +1156,7 @@ int add_datanodeSlave(char *name, char *host, char *dir, char *archDir)
11561156
doImmediate(aval(VAR_coordMasterServers)[idx], NULL,
11571157
"pg_ctl stop -Z datanode -D %s -m fast", aval(VAR_datanodeMasterDirs)[idx]);
11581158
doImmediate(aval(VAR_coordMasterServers)[idx], NULL,
1159-
"pg_ctl start -Z datanode -D %s", aval(VAR_datanodeMasterDirs)[idx]);
1159+
"pg_ctl start -Z datanode -D %s -w", aval(VAR_datanodeMasterDirs)[idx]);
11601160
/* pg_basebackup */
11611161
doImmediate(host, NULL, "pg_basebackup -p %s -h %s -D %s -x",
11621162
aval(VAR_datanodePorts)[idx], aval(VAR_datanodeMasterServers)[idx], dir);
@@ -1198,7 +1198,7 @@ int add_datanodeSlave(char *name, char *host, char *dir, char *archDir)
11981198
aval(VAR_datanodeArchLogDirs)[idx], aval(VAR_datanodeArchLogDirs)[idx]);
11991199
fclose(f);
12001200
/* Start the slave */
1201-
doImmediate(host, NULL, "pg_ctl start -Z datanode -D %s", dir);
1201+
doImmediate(host, NULL, "pg_ctl start -Z datanode -D %s -w", dir);
12021202
return 0;
12031203
}
12041204

contrib/pgxc_ctl/utils.c

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,12 @@
2828
#include "config.h"
2929
#include "variables.h"
3030
#include "varnames.h"
31+
#include "sys/time.h"
3132

3233
static int Malloc_ed = 0;
3334
static int Strdup_ed = 0;
3435
static int Freed = 0;
36+
static void myUsleep(long microsec);
3537

3638
void *Malloc(size_t size)
3739
{
@@ -308,6 +310,9 @@ int pingNode(char *host, char *port)
308310
PGPing status;
309311
char conninfo[MAXLINE+1];
310312
char editBuf[MAXPATH+1];
313+
#define RETRY 3
314+
#define sleepMicro 100*1000 /* 100 millisec */
315+
int retry;
311316

312317
conninfo[0] = 0;
313318
if (host)
@@ -322,14 +327,22 @@ int pingNode(char *host, char *port)
322327
}
323328
if (conninfo[0])
324329
{
325-
status = PQping(conninfo);
326-
if (status == PQPING_OK)
327-
return 0;
328-
else
329-
return 1;
330+
for (retry = RETRY; retry; retry--){
331+
status = PQping(conninfo);
332+
if (status == PQPING_OK)
333+
return 0;
334+
else
335+
{
336+
myUsleep(sleepMicro);
337+
continue;
338+
}
339+
}
340+
return 1;
330341
}
331342
else
332343
return -1;
344+
#undef RETRY
345+
#undef sleepMicro
333346
}
334347

335348
void trimNl(char *s)
@@ -378,4 +391,15 @@ char *getIpAddress(char *hostName)
378391
trimNl(ipAddr);
379392
return ipAddr;
380393
}
381-
394+
395+
static void myUsleep(long microsec)
396+
{
397+
struct timeval delay;
398+
399+
if (microsec <= 0)
400+
return;
401+
402+
delay.tv_sec = microsec / 1000000L;
403+
delay.tv_usec = microsec % 1000000L;
404+
(void) select(0, NULL, NULL, NULL, &delay);
405+
}

0 commit comments

Comments
 (0)