@@ -230,7 +230,25 @@ int init_coordinator_slave_all(void)
230230
231231cmd_t * prepare_initCoordinatorSlave (char * nodeName )
232232{
233- cmd_t * cmd , * cmdBuildDir , * cmdStartMaster , * cmdBaseBkup , * cmdRecoveryConf , * cmdPgConf ;
233+ cmd_t * cmd ,
234+ * cmdBuildDir ,
235+ * cmdStartMaster ,
236+ #if 0
237+ * cmdBaseBkup ,
238+ #else
239+ /*
240+ * As of PostgreSQL 9.3 or later, pg_basebackup does now work with coordinator,
241+ * because each coordinator backend needs dbname to read pgxc_node info into
242+ * cache and pg_basebackup does not specify the database name.
243+ * The following uses more primitive means to use pg_start_backup() and pg_stop_backup().
244+ */
245+ * cmdStartBkup ,
246+ * cmdBuildAndSendTar ,
247+ * cmdUntar ,
248+ * cmdStopBkup ,
249+ #endif
250+ * cmdRecoveryConf ,
251+ * cmdPgConf ;
234252 int idx ;
235253 FILE * f ;
236254 char localStdin [MAXPATH + 1 ];
@@ -268,11 +286,67 @@ cmd_t *prepare_initCoordinatorSlave(char *nodeName)
268286 /*
269287 * Obtain base backup of the master
270288 */
289+ #if 0
271290 appendCmdEl (cmdBuildDir , (cmdBaseBkup = initCmd (aval (VAR_coordSlaveServers )[idx ])));
272291 snprintf (newCommand (cmdBaseBkup ), MAXLINE ,
273292 "pg_basebackup -p %s -h %s -D %s -x" ,
274293 aval (VAR_coordPorts )[idx ], aval (VAR_coordMasterServers )[idx ], aval (VAR_coordSlaveDirs )[idx ]);
275-
294+ #else
295+ /*
296+ * As of PostgreSQL-9.3 or later, pg_basebackup does not run with coordinators, beacuse each coordinator
297+ * needs database name to read pgxc_node info into the cache and pg_basebackup does not specify this.
298+ * Current workaround is to use more primitive pg_start_backup() and pg_stop_backup().
299+ */
300+ /* Start backup */
301+ appendCmdEl (cmdBuildDir , (cmdStartBkup = initCmd (aval (VAR_coordMasterServers )[idx ])));
302+ /*
303+ * Here, we specify "quick and spike" CHECKPOINT because it is coordinator and we do not expect
304+ * much updating transactions against coordinators.
305+ */
306+ snprintf (newCommand (cmdStartBkup ), MAXLINE ,
307+ "psql -h localhost -p %s postgres" ,
308+ aval (VAR_coordPorts )[idx ]);
309+ if ((f = prepareLocalStdin ((cmdStartBkup -> localStdin = Malloc (MAXPATH + 1 )), MAXPATH , NULL )) == NULL )
310+ {
311+ cleanCmd (cmd );
312+ return (NULL );
313+ }
314+ fprintf (f ,
315+ "select pg_start_backup('%s', true);\n\\q\n" ,
316+ nodeName );
317+ fclose (f );
318+ /* Build tar and send it */
319+ appendCmdEl (cmdBuildDir , (cmdBuildAndSendTar = initCmd (aval (VAR_coordMasterServers )[idx ])));
320+ snprintf (newCommand (cmdBuildAndSendTar ), MAXLINE ,
321+ "rm -f %s/%s.tgz;" /* We remove this just in case the file does not have write privilege */
322+ "cd %s;"
323+ "tar czf %s/%s.tgz . ;"
324+ "scp %s/%s.tgz %s@%s:%s;"
325+ "rm -f %s/%s.tgz" ,
326+ sval (VAR_tmpDir ), nodeName ,
327+ aval (VAR_coordMasterDirs )[idx ],
328+ sval (VAR_tmpDir ), nodeName ,
329+ sval (VAR_tmpDir ), nodeName , sval (VAR_pgxcUser ), aval (VAR_coordSlaveServers )[idx ], sval (VAR_tmpDir ),
330+ sval (VAR_tmpDir ), nodeName );
331+ /* Stop backup */
332+ appendCmdEl (cmdBuildDir , (cmdStopBkup = initCmd (aval (VAR_coordMasterServers )[idx ])));
333+ snprintf (newCommand (cmdStopBkup ), MAXLINE ,
334+ "psql -h localhost -p %s postgres -c 'select pg_stop_backup()'" ,
335+ aval (VAR_coordPorts )[idx ]);
336+ /* Untar */
337+ appendCmdEl (cmdBuildDir , (cmdUntar = initCmd (aval (VAR_coordSlaveServers )[idx ])));
338+ snprintf (newCommand (cmdUntar ), MAXLINE ,
339+ "rm -rf %s;"
340+ "mkdir -p %s;"
341+ "cd %s;"
342+ "tar xzf %s/%s.tgz;"
343+ "rm -rf %s/%s.tgz" ,
344+ aval (VAR_coordSlaveDirs )[idx ],
345+ aval (VAR_coordSlaveDirs )[idx ],
346+ aval (VAR_coordSlaveDirs )[idx ],
347+ sval (VAR_tmpDir ), nodeName ,
348+ sval (VAR_tmpDir ), nodeName );
349+ #endif
276350 /* Configure recovery.conf file at the slave */
277351 appendCmdEl (cmdBuildDir , (cmdRecoveryConf = initCmd (aval (VAR_coordSlaveServers )[idx ])));
278352 if ((f = prepareLocalStdin (localStdin , MAXPATH , NULL )) == NULL )
@@ -940,6 +1014,7 @@ int add_coordinatorMaster(char *name, char *host, int port, int pooler, char *di
9401014 else
9411015 {
9421016 fprintf (f , "ALTER NODE %s WITH (host='%s', PORT=%d);\n" , name , host , port );
1017+ fprintf (f , "select pgxc_pool_reload();\n" );
9431018 fprintf (f , "\\q\n" );
9441019 fclose (f );
9451020 }
@@ -1077,9 +1152,54 @@ int add_coordinatorSlave(char *name, char *host, char *dir, char *archDir)
10771152 "pg_ctl stop -Z coordinator -D %s -m fast" , aval (VAR_coordMasterDirs )[idx ]);
10781153 doImmediate (aval (VAR_coordMasterServers )[idx ], NULL ,
10791154 "pg_ctl start -Z coordinator -D %s" , aval (VAR_coordMasterDirs )[idx ]);
1155+ #if 0
10801156 /* pg_basebackup */
10811157 doImmediate (host , NULL , "pg_basebackup -p %s -h %s -D %s -x" ,
10821158 aval (VAR_coordPorts )[idx ], aval (VAR_coordMasterServers )[idx ], dir );
1159+ #else
1160+ /*
1161+ * As of PostgreSQL-9.3 or later, pg_basebackup does not run with coordinators.
1162+ * Now pg_basebackup runs without specifying database name. In each coordinator,
1163+ * we need (at present) database name to load node information into chache.
1164+ * More primitive means (pg_start_backup and pg_stop_backup) works as a work around.
1165+ */
1166+ /*
1167+ * Stop backup
1168+ * we specify quick and spike checkpoint here because this is just after the restart
1169+ * and we expect coordinator is static so there should not be much updates
1170+ */
1171+ doImmediate (aval (VAR_coordMasterServers )[idx ], NULL ,
1172+ "psql -h localhost -p %s postgres \"select pg_start_backup\\('%s', true\\)\"" ,
1173+ aval (VAR_coordPorts )[idx ], name );
1174+ /* Build and send it */
1175+ doImmediate (aval (VAR_coordMasterServers )[idx ], NULL ,
1176+ "rm -f %s/%s.tgz;" /* We remove this just in case the file does not have write privilege */
1177+ "cd %s;"
1178+ "tar czf %s/%s.tgz . ;"
1179+ "scp %s/%s.tgz %s@%s:%s;"
1180+ "rm -f %s/%s.tgz" ,
1181+ sval (VAR_tmpDir ), name ,
1182+ aval (VAR_coordMasterDirs )[idx ],
1183+ sval (VAR_tmpDir ), name ,
1184+ sval (VAR_tmpDir ), name , sval (VAR_pgxcUser ), host , sval (VAR_tmpDir ),
1185+ sval (VAR_tmpDir ), name );
1186+ /* Stop Backup */
1187+ doImmediate (aval (VAR_coordMasterServers )[idx ], NULL ,
1188+ "psql -h localhost -p %s postgres -c 'select pg_stop_backup()'" ,
1189+ aval (VAR_coordPorts )[idx ]);
1190+ /* Untar */
1191+ doImmediate (aval (VAR_coordSlaveServers )[idx ], NULL ,
1192+ "rm -rf %s;"
1193+ "mkdir -p %s;"
1194+ "cd %s;"
1195+ "tar xzf %s/%s.tgz;"
1196+ "rm -rf %s/%s.tgz" ,
1197+ dir ,
1198+ dir ,
1199+ dir ,
1200+ sval (VAR_tmpDir ), name ,
1201+ sval (VAR_tmpDir ), name );
1202+ #endif
10831203 /* Update the slave configuration with hot standby and port */
10841204 if ((f = pgxc_popen_w (host , "cat >> %s/postgresql.conf" , dir )) == NULL )
10851205 {
@@ -1800,23 +1920,48 @@ static int failover_oneCoordinator(int coordIdx)
18001920 aval (VAR_coordNames )[jj ]);
18011921 continue ;
18021922 }
1803- if ((f = pgxc_popen_wRaw ("psql -p %s -h %s %s %s" ,
1804- aval (VAR_coordPorts )[jj ],
1805- aval (VAR_coordMasterServers )[jj ],
1806- sval (VAR_defaultDatabase ),
1807- sval (VAR_pgxcOwner )))
1808- == NULL )
1923+ if (jj != coordIdx )
18091924 {
1810- elog (ERROR , "ERROR: failed to start psql for coordinator %s, %s\n" , aval (VAR_coordNames )[jj ], strerror (errno ));
1811- continue ;
1925+ if ((f = pgxc_popen_wRaw ("psql -p %s -h %s %s %s" ,
1926+ aval (VAR_coordPorts )[jj ],
1927+ aval (VAR_coordMasterServers )[jj ],
1928+ sval (VAR_defaultDatabase ),
1929+ sval (VAR_pgxcOwner )))
1930+ == NULL )
1931+ {
1932+ elog (ERROR , "ERROR: failed to start psql for coordinator %s, %s\n" , aval (VAR_coordNames )[jj ], strerror (errno ));
1933+ continue ;
1934+ }
1935+ fprintf (f ,
1936+ #if 0 /* Now alter node dies not work well in this context. */
1937+ "ALTER NODE %s WITH (HOST='%s', PORT=%s);\n"
1938+ #else
1939+ "DROP NODE %s;\n"
1940+ "CREATE NODE %s WITH (type = coordinator, HOST='%s', PORT=%s);\n"
1941+ #endif
1942+ "select pgxc_pool_reload();\n"
1943+ "\\q\n" ,
1944+ aval (VAR_coordNames )[coordIdx ],
1945+ aval (VAR_coordNames )[coordIdx ], aval (VAR_coordMasterServers )[coordIdx ], aval (VAR_coordPorts )[coordIdx ]);
1946+ fclose (f );
18121947 }
1813- fprintf (f ,
1814- "ALTER NODE %s WITH (HOST='%s', PORT=%s);\n"
1815- "select pgxc_pool_reload();\n"
1816- "\\q\n" ,
1817- aval (VAR_coordNames )[coordIdx ], aval (VAR_coordMasterServers )[coordIdx ], aval (VAR_coordPorts )[coordIdx ]);
1818- fclose (f );
18191948 }
1949+ /* Now update myself */
1950+ if ((f = pgxc_popen_wRaw ("psql -p %s -h %s %s %s" ,
1951+ aval (VAR_coordPorts )[coordIdx ],
1952+ aval (VAR_coordMasterServers )[coordIdx ],
1953+ sval (VAR_defaultDatabase ),
1954+ sval (VAR_pgxcOwner )))
1955+ == NULL )
1956+ {
1957+ elog (ERROR , "ERROR: failed to start psql for coordinator %s, %s\n" , aval (VAR_coordNames )[coordIdx ], strerror (errno ));
1958+ }
1959+ fprintf (f ,
1960+ "ALTER NODE %s WITH (HOST='%s', PORT=%s);\n"
1961+ "select pgxc_pool_reload();\n"
1962+ "\\q\n" ,
1963+ aval (VAR_coordNames )[coordIdx ], aval (VAR_coordMasterServers )[coordIdx ], aval (VAR_coordPorts )[coordIdx ]);
1964+ fclose (f );
18201965 return (rc );
18211966
18221967# undef checkRc
0 commit comments