Skip to content

Commit 6270ee4

Browse files
committed
Rethink the delay-checkpoint-end mechanism in the back-branches.
The back-patch of commit bbace56 had the unfortunate effect of changing the layout of PGPROC in the back-branches, which could break extensions. This happened because it changed the delayChkpt from type bool to type int. So, change it back, and add a new bool delayChkptEnd field instead. The new field should fall within what used to be padding space within the struct, and so hopefully won't cause any extensions to break. Per report from Markus Wanner and discussion with Tom Lane and others. Patch originally by me, somewhat revised by Markus Wanner per a suggestion from Michael Paquier. A very similar patch was developed by Kyotaro Horiguchi, but I failed to see the email in which that was posted before writing one of my own. Discussion: http://postgr.es/m/CA+Tgmoao-kUD9c5nG5sub3F7tbo39+cdr8jKaOVEs_1aBWcJ3Q@mail.gmail.com Discussion: http://postgr.es/m/20220406.164521.17171257901083417.horikyota.ntt@gmail.com
1 parent 79fed07 commit 6270ee4

File tree

10 files changed

+114
-80
lines changed

10 files changed

+114
-80
lines changed

src/backend/access/transam/multixact.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3069,8 +3069,8 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
30693069
* crash/basebackup, even though the state of the data directory would
30703070
* require it.
30713071
*/
3072-
Assert((MyPgXact->delayChkpt & DELAY_CHKPT_START) == 0);
3073-
MyPgXact->delayChkpt |= DELAY_CHKPT_START;
3072+
Assert(!MyPgXact->delayChkpt);
3073+
MyPgXact->delayChkpt = true;
30743074

30753075
/* WAL log truncation */
30763076
WriteMTruncateXlogRec(newOldestMultiDB,
@@ -3096,7 +3096,7 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
30963096
/* Then offsets */
30973097
PerformOffsetsTruncation(oldestMulti, newOldestMulti);
30983098

3099-
MyPgXact->delayChkpt &= ~DELAY_CHKPT_START;
3099+
MyPgXact->delayChkpt = false;
31003100

31013101
END_CRIT_SECTION();
31023102
LWLockRelease(MultiXactTruncationLock);

src/backend/access/transam/twophase.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -476,8 +476,9 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
476476
}
477477
pgxact->xid = xid;
478478
pgxact->xmin = InvalidTransactionId;
479-
pgxact->delayChkpt = 0;
479+
pgxact->delayChkpt = false;
480480
pgxact->vacuumFlags = 0;
481+
proc->delayChkptEnd = false;
481482
proc->pid = 0;
482483
proc->databaseId = databaseid;
483484
proc->roleId = owner;
@@ -1175,8 +1176,8 @@ EndPrepare(GlobalTransaction gxact)
11751176

11761177
START_CRIT_SECTION();
11771178

1178-
Assert((MyPgXact->delayChkpt & DELAY_CHKPT_START) == 0);
1179-
MyPgXact->delayChkpt |= DELAY_CHKPT_START;
1179+
Assert(!MyPgXact->delayChkpt);
1180+
MyPgXact->delayChkpt = true;
11801181

11811182
XLogBeginInsert();
11821183
for (record = records.head; record != NULL; record = record->next)
@@ -1219,7 +1220,7 @@ EndPrepare(GlobalTransaction gxact)
12191220
* checkpoint starting after this will certainly see the gxact as a
12201221
* candidate for fsyncing.
12211222
*/
1222-
MyPgXact->delayChkpt &= ~DELAY_CHKPT_START;
1223+
MyPgXact->delayChkpt = false;
12231224

12241225
/*
12251226
* Remember that we have this GlobalTransaction entry locked for us. If
@@ -2353,8 +2354,8 @@ RecordTransactionCommitPrepared(TransactionId xid,
23532354
START_CRIT_SECTION();
23542355

23552356
/* See notes in RecordTransactionCommit */
2356-
Assert((MyPgXact->delayChkpt & DELAY_CHKPT_START) == 0);
2357-
MyPgXact->delayChkpt |= DELAY_CHKPT_START;
2357+
Assert(!MyPgXact->delayChkpt);
2358+
MyPgXact->delayChkpt = true;
23582359

23592360
/*
23602361
* Emit the XLOG commit record. Note that we mark 2PC commits as
@@ -2402,7 +2403,7 @@ RecordTransactionCommitPrepared(TransactionId xid,
24022403
TransactionIdCommitTree(xid, nchildren, children);
24032404

24042405
/* Checkpoint can proceed now */
2405-
MyPgXact->delayChkpt &= ~DELAY_CHKPT_START;
2406+
MyPgXact->delayChkpt = false;
24062407

24072408
END_CRIT_SECTION();
24082409

src/backend/access/transam/xact.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1239,9 +1239,9 @@ RecordTransactionCommit(void)
12391239
* This makes checkpoint's determination of which xacts are delayChkpt
12401240
* a bit fuzzy, but it doesn't matter.
12411241
*/
1242-
Assert((MyPgXact->delayChkpt & DELAY_CHKPT_START) == 0);
1242+
Assert(!MyPgXact->delayChkpt);
12431243
START_CRIT_SECTION();
1244-
MyPgXact->delayChkpt |= DELAY_CHKPT_START;
1244+
MyPgXact->delayChkpt = true;
12451245

12461246
SetCurrentTransactionStopTimestamp();
12471247

@@ -1342,7 +1342,7 @@ RecordTransactionCommit(void)
13421342
*/
13431343
if (markXidCommitted)
13441344
{
1345-
MyPgXact->delayChkpt &= ~DELAY_CHKPT_START;
1345+
MyPgXact->delayChkpt = false;
13461346
END_CRIT_SECTION();
13471347
}
13481348

src/backend/access/transam/xlog.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9064,27 +9064,25 @@ CreateCheckPoint(int flags)
90649064
* and we will correctly flush the update below. So we cannot miss any
90659065
* xacts we need to wait for.
90669066
*/
9067-
vxids = GetVirtualXIDsDelayingChkpt(&nvxids, DELAY_CHKPT_START);
9067+
vxids = GetVirtualXIDsDelayingChkpt(&nvxids);
90689068
if (nvxids > 0)
90699069
{
90709070
do
90719071
{
90729072
pg_usleep(10000L); /* wait for 10 msec */
9073-
} while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
9074-
DELAY_CHKPT_START));
9073+
} while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids));
90759074
}
90769075
pfree(vxids);
90779076

90789077
CheckPointGuts(checkPoint.redo, flags);
90799078

9080-
vxids = GetVirtualXIDsDelayingChkpt(&nvxids, DELAY_CHKPT_COMPLETE);
9079+
vxids = GetVirtualXIDsDelayingChkptEnd(&nvxids);
90819080
if (nvxids > 0)
90829081
{
90839082
do
90849083
{
90859084
pg_usleep(10000L); /* wait for 10 msec */
9086-
} while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
9087-
DELAY_CHKPT_COMPLETE));
9085+
} while (HaveVirtualXIDsDelayingChkptEnd(vxids, nvxids));
90889086
}
90899087
pfree(vxids);
90909088

src/backend/access/transam/xloginsert.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -899,7 +899,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
899899
/*
900900
* Ensure no checkpoint can change our view of RedoRecPtr.
901901
*/
902-
Assert((MyPgXact->delayChkpt & DELAY_CHKPT_START) != 0);
902+
Assert(MyPgXact->delayChkpt);
903903

904904
/*
905905
* Update RedoRecPtr so that we can make the right decision

src/backend/catalog/storage.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,8 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
262262
* the blocks to not exist on disk at all, but not for them to have the
263263
* wrong contents.
264264
*/
265-
Assert((MyPgXact->delayChkpt & DELAY_CHKPT_COMPLETE) == 0);
266-
MyPgXact->delayChkpt |= DELAY_CHKPT_COMPLETE;
265+
Assert(!MyProc->delayChkptEnd);
266+
MyProc->delayChkptEnd = true;
267267

268268
/*
269269
* We WAL-log the truncation before actually truncating, which means
@@ -311,7 +311,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
311311
smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks);
312312

313313
/* We've done all the critical work, so checkpoints are OK now. */
314-
MyPgXact->delayChkpt &= ~DELAY_CHKPT_COMPLETE;
314+
MyProc->delayChkptEnd = false;
315315
}
316316

317317
/*

src/backend/storage/buffer/bufmgr.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3471,8 +3471,8 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
34713471
* essential that CreateCheckpoint waits for virtual transactions
34723472
* rather than full transactionids.
34733473
*/
3474-
Assert((MyPgXact->delayChkpt & DELAY_CHKPT_START) == 0);
3475-
MyPgXact->delayChkpt |= DELAY_CHKPT_START;
3474+
Assert(!MyPgXact->delayChkpt);
3475+
MyPgXact->delayChkpt = true;
34763476
delayChkpt = true;
34773477
lsn = XLogSaveBufferForHint(buffer, buffer_std);
34783478
}
@@ -3506,7 +3506,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
35063506
UnlockBufHdr(bufHdr, buf_state);
35073507

35083508
if (delayChkpt)
3509-
MyPgXact->delayChkpt &= ~DELAY_CHKPT_START;
3509+
MyPgXact->delayChkpt = false;
35103510

35113511
if (dirtied)
35123512
{

src/backend/storage/ipc/procarray.c

Lines changed: 75 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,11 @@ static void DisplayXidCache(void);
151151
#define xc_slow_answer_inc() ((void) 0)
152152
#endif /* XIDCACHE_DEBUG */
153153

154+
static VirtualTransactionId *GetVirtualXIDsDelayingChkptGuts(int *nvxids,
155+
int type);
156+
static bool HaveVirtualXIDsDelayingChkptGuts(VirtualTransactionId *vxids,
157+
int nvxids, int type);
158+
154159
/* Primitives for KnownAssignedXids array handling for standby */
155160
static void KnownAssignedXidsCompress(bool force);
156161
static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
@@ -434,8 +439,9 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
434439
/* must be cleared with xid/xmin: */
435440
pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
436441

437-
/* be sure this is cleared in abort */
438-
pgxact->delayChkpt = 0;
442+
/* be sure these are cleared in abort */
443+
pgxact->delayChkpt = false;
444+
proc->delayChkptEnd = false;
439445

440446
proc->recoveryConflictPending = false;
441447

@@ -459,8 +465,9 @@ ProcArrayEndTransactionInternal(PGPROC *proc, PGXACT *pgxact,
459465
/* must be cleared with xid/xmin: */
460466
pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
461467

462-
/* be sure this is cleared in abort */
463-
pgxact->delayChkpt = 0;
468+
/* be sure these are cleared in abort */
469+
pgxact->delayChkpt = false;
470+
proc->delayChkptEnd = false;
464471

465472
proc->recoveryConflictPending = false;
466473

@@ -2269,26 +2276,28 @@ GetOldestSafeDecodingTransactionId(bool catalogOnly)
22692276
}
22702277

22712278
/*
2272-
* GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are
2273-
* delaying checkpoint because they have critical actions in progress.
2279+
* GetVirtualXIDsDelayingChkptGuts -- Get the VXIDs of transactions that are
2280+
* delaying the start or end of a checkpoint because they have critical
2281+
* actions in progress.
22742282
*
22752283
* Constructs an array of VXIDs of transactions that are currently in commit
2276-
* critical sections, as shown by having specified delayChkpt bits set in their
2277-
* PGXACT.
2284+
* critical sections, as shown by having specified delayChkpt or delayChkptEnd
2285+
* set.
22782286
*
22792287
* Returns a palloc'd array that should be freed by the caller.
22802288
* *nvxids is the number of valid entries.
22812289
*
2282-
* Note that because backends set or clear delayChkpt without holding any lock,
2283-
* the result is somewhat indeterminate, but we don't really care. Even in
2284-
* a multiprocessor with delayed writes to shared memory, it should be certain
2285-
* that setting of delayChkpt will propagate to shared memory when the backend
2286-
* takes a lock, so we cannot fail to see a virtual xact as delayChkpt if
2287-
* it's already inserted its commit record. Whether it takes a little while
2288-
* for clearing of delayChkpt to propagate is unimportant for correctness.
2290+
* Note that because backends set or clear delayChkpt and delayChkptEnd
2291+
* without holding any lock, the result is somewhat indeterminate, but we
2292+
* don't really care. Even in a multiprocessor with delayed writes to
2293+
* shared memory, it should be certain that setting of delayChkpt will
2294+
* propagate to shared memory when the backend takes a lock, so we cannot
2295+
* fail to see a virtual xact as delayChkpt if it's already inserted its
2296+
* commit record. Whether it takes a little while for clearing of
2297+
* delayChkpt to propagate is unimportant for correctness.
22892298
*/
2290-
VirtualTransactionId *
2291-
GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
2299+
static VirtualTransactionId *
2300+
GetVirtualXIDsDelayingChkptGuts(int *nvxids, int type)
22922301
{
22932302
VirtualTransactionId *vxids;
22942303
ProcArrayStruct *arrayP = procArray;
@@ -2309,7 +2318,8 @@ GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
23092318
volatile PGPROC *proc = &allProcs[pgprocno];
23102319
volatile PGXACT *pgxact = &allPgXact[pgprocno];
23112320

2312-
if ((pgxact->delayChkpt & type) != 0)
2321+
if (((type & DELAY_CHKPT_START) && pgxact->delayChkpt) ||
2322+
((type & DELAY_CHKPT_COMPLETE) && proc->delayChkptEnd))
23132323
{
23142324
VirtualTransactionId vxid;
23152325

@@ -2325,6 +2335,26 @@ GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
23252335
return vxids;
23262336
}
23272337

2338+
/*
2339+
* GetVirtualXIDsDelayingChkpt - Get the VXIDs of transactions that are
2340+
* delaying the start of a checkpoint.
2341+
*/
2342+
VirtualTransactionId *
2343+
GetVirtualXIDsDelayingChkpt(int *nvxids)
2344+
{
2345+
return GetVirtualXIDsDelayingChkptGuts(nvxids, DELAY_CHKPT_START);
2346+
}
2347+
2348+
/*
2349+
* GetVirtualXIDsDelayingChkptEnd - Get the VXIDs of transactions that are
2350+
* delaying the end of a checkpoint.
2351+
*/
2352+
VirtualTransactionId *
2353+
GetVirtualXIDsDelayingChkptEnd(int *nvxids)
2354+
{
2355+
return GetVirtualXIDsDelayingChkptGuts(nvxids, DELAY_CHKPT_COMPLETE);
2356+
}
2357+
23282358
/*
23292359
* HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying?
23302360
*
@@ -2334,8 +2364,9 @@ GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
23342364
* Note: this is O(N^2) in the number of vxacts that are/were delaying, but
23352365
* those numbers should be small enough for it not to be a problem.
23362366
*/
2337-
bool
2338-
HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
2367+
static bool
2368+
HaveVirtualXIDsDelayingChkptGuts(VirtualTransactionId *vxids, int nvxids,
2369+
int type)
23392370
{
23402371
bool result = false;
23412372
ProcArrayStruct *arrayP = procArray;
@@ -2354,7 +2385,8 @@ HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
23542385

23552386
GET_VXID_FROM_PGPROC(vxid, *proc);
23562387

2357-
if ((pgxact->delayChkpt & type) != 0 &&
2388+
if ((((type & DELAY_CHKPT_START) && pgxact->delayChkpt) ||
2389+
((type & DELAY_CHKPT_COMPLETE) && proc->delayChkptEnd)) &&
23582390
VirtualTransactionIdIsValid(vxid))
23592391
{
23602392
int i;
@@ -2377,6 +2409,28 @@ HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
23772409
return result;
23782410
}
23792411

2412+
/*
2413+
* HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying
2414+
* the start of a checkpoint?
2415+
*/
2416+
bool
2417+
HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids)
2418+
{
2419+
return HaveVirtualXIDsDelayingChkptGuts(vxids, nvxids,
2420+
DELAY_CHKPT_START);
2421+
}
2422+
2423+
/*
2424+
* HaveVirtualXIDsDelayingChkptEnd -- Are any of the specified VXIDs delaying
2425+
* the end of a checkpoint?
2426+
*/
2427+
bool
2428+
HaveVirtualXIDsDelayingChkptEnd(VirtualTransactionId *vxids, int nvxids)
2429+
{
2430+
return HaveVirtualXIDsDelayingChkptGuts(vxids, nvxids,
2431+
DELAY_CHKPT_COMPLETE);
2432+
}
2433+
23802434
/*
23812435
* BackendPidGetProc -- get a backend's PGPROC given its PID
23822436
*

src/include/storage/proc.h

Lines changed: 9 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -77,36 +77,8 @@ struct XidCache
7777
#define INVALID_PGPROCNO PG_INT32_MAX
7878

7979
/*
80-
* Flags for PGPROC.delayChkpt
81-
*
82-
* These flags can be used to delay the start or completion of a checkpoint
83-
* for short periods. A flag is in effect if the corresponding bit is set in
84-
* the PGPROC of any backend.
85-
*
86-
* For our purposes here, a checkpoint has three phases: (1) determine the
87-
* location to which the redo pointer will be moved, (2) write all the
88-
* data durably to disk, and (3) WAL-log the checkpoint.
89-
*
90-
* Setting DELAY_CHKPT_START prevents the system from moving from phase 1
91-
* to phase 2. This is useful when we are performing a WAL-logged modification
92-
* of data that will be flushed to disk in phase 2. By setting this flag
93-
* before writing WAL and clearing it after we've both written WAL and
94-
* performed the corresponding modification, we ensure that if the WAL record
95-
* is inserted prior to the new redo point, the corresponding data changes will
96-
* also be flushed to disk before the checkpoint can complete. (In the
97-
* extremely common case where the data being modified is in shared buffers
98-
* and we acquire an exclusive content lock on the relevant buffers before
99-
* writing WAL, this mechanism is not needed, because phase 2 will block
100-
* until we release the content lock and then flush the modified data to
101-
* disk.)
102-
*
103-
* Setting DELAY_CHKPT_COMPLETE prevents the system from moving from phase 2
104-
* to phase 3. This is useful if we are performing a WAL-logged operation that
105-
* might invalidate buffers, such as relation truncation. In this case, we need
106-
* to ensure that any buffers which were invalidated and thus not flushed by
107-
* the checkpoint are actaully destroyed on disk. Replay can cope with a file
108-
* or block that doesn't exist, but not with a block that has the wrong
109-
* contents.
80+
* Flags used only for type of internal functions
81+
* GetVirtualXIDsDelayingChkptGuts and HaveVirtualXIDsDelayingChkptGuts.
11082
*/
11183
#define DELAY_CHKPT_START (1<<0)
11284
#define DELAY_CHKPT_COMPLETE (1<<1)
@@ -185,6 +157,12 @@ struct PGPROC
185157
*/
186158
XLogRecPtr waitLSN; /* waiting for this LSN or higher */
187159
int syncRepState; /* wait state for sync rep */
160+
bool delayChkptEnd; /* true if this proc delays checkpoint end;
161+
* this doesn't have anything to do with
162+
* sync rep but we don't want to change
163+
* the size of PGPROC in released branches
164+
* and thus must fit this new field into
165+
* existing padding space */
188166
SHM_QUEUE syncRepLinks; /* list link if process is in syncrep queue */
189167

190168
/*
@@ -267,7 +245,7 @@ typedef struct PGXACT
267245

268246
uint8 vacuumFlags; /* vacuum-related flags, see above */
269247
bool overflowed;
270-
int delayChkpt; /* for DELAY_CHKPT_* flags */
248+
bool delayChkpt; /* true if this proc delays checkpoint start */
271249

272250
uint8 nxids;
273251
} PGXACT;

0 commit comments

Comments
 (0)