Skip to content

Commit d4ffb0e

Browse files
authored
Merge pull request #154 from postgres/master
Sync Fork from Upstream Repo
2 parents 7080ff3 + 301ed88 commit d4ffb0e

File tree

33 files changed

+1338
-1654
lines changed

33 files changed

+1338
-1654
lines changed

contrib/amcheck/verify_nbtree.c

Lines changed: 56 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -769,7 +769,7 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
769769
P_FIRSTDATAKEY(opaque));
770770
itup = (IndexTuple) PageGetItem(state->target, itemid);
771771
nextleveldown.leftmost = BTreeTupleGetDownLink(itup);
772-
nextleveldown.level = opaque->btpo.level - 1;
772+
nextleveldown.level = opaque->btpo_level - 1;
773773
}
774774
else
775775
{
@@ -794,14 +794,14 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
794794
if (opaque->btpo_prev != leftcurrent)
795795
bt_recheck_sibling_links(state, opaque->btpo_prev, leftcurrent);
796796

797-
/* Check level, which must be valid for non-ignorable page */
798-
if (level.level != opaque->btpo.level)
797+
/* Check level */
798+
if (level.level != opaque->btpo_level)
799799
ereport(ERROR,
800800
(errcode(ERRCODE_INDEX_CORRUPTED),
801801
errmsg("leftmost down link for level points to block in index \"%s\" whose level is not one level down",
802802
RelationGetRelationName(state->rel)),
803803
errdetail_internal("Block pointed to=%u expected level=%u level in pointed to block=%u.",
804-
current, level.level, opaque->btpo.level)));
804+
current, level.level, opaque->btpo_level)));
805805

806806
/* Verify invariants for page */
807807
bt_target_page_check(state);
@@ -1164,7 +1164,7 @@ bt_target_page_check(BtreeCheckState *state)
11641164
bt_child_highkey_check(state,
11651165
offset,
11661166
NULL,
1167-
topaque->btpo.level);
1167+
topaque->btpo_level);
11681168
}
11691169
continue;
11701170
}
@@ -1520,7 +1520,7 @@ bt_target_page_check(BtreeCheckState *state)
15201520
if (!P_ISLEAF(topaque) && P_RIGHTMOST(topaque) && state->readonly)
15211521
{
15221522
bt_child_highkey_check(state, InvalidOffsetNumber,
1523-
NULL, topaque->btpo.level);
1523+
NULL, topaque->btpo_level);
15241524
}
15251525
}
15261526

@@ -1597,7 +1597,7 @@ bt_right_page_check_scankey(BtreeCheckState *state)
15971597
ereport(DEBUG1,
15981598
(errcode(ERRCODE_NO_DATA),
15991599
errmsg_internal("level %u leftmost page of index \"%s\" was found deleted or half dead",
1600-
opaque->btpo.level, RelationGetRelationName(state->rel)),
1600+
opaque->btpo_level, RelationGetRelationName(state->rel)),
16011601
errdetail_internal("Deleted page found when building scankey from right sibling.")));
16021602

16031603
/* Be slightly more pro-active in freeing this memory, just in case */
@@ -1900,14 +1900,15 @@ bt_child_highkey_check(BtreeCheckState *state,
19001900
state->targetblock, blkno,
19011901
LSN_FORMAT_ARGS(state->targetlsn))));
19021902

1903-
/* Check level for non-ignorable page */
1904-
if (!P_IGNORE(opaque) && opaque->btpo.level != target_level - 1)
1903+
/* Do level sanity check */
1904+
if ((!P_ISDELETED(opaque) || P_HAS_FULLXID(opaque)) &&
1905+
opaque->btpo_level != target_level - 1)
19051906
ereport(ERROR,
19061907
(errcode(ERRCODE_INDEX_CORRUPTED),
19071908
errmsg("block found while following rightlinks from child of index \"%s\" has invalid level",
19081909
RelationGetRelationName(state->rel)),
19091910
errdetail_internal("Block pointed to=%u expected level=%u level in pointed to block=%u.",
1910-
blkno, target_level - 1, opaque->btpo.level)));
1911+
blkno, target_level - 1, opaque->btpo_level)));
19111912

19121913
/* Try to detect circular links */
19131914
if ((!first && blkno == state->prevrightlink) || blkno == opaque->btpo_prev)
@@ -2132,7 +2133,7 @@ bt_child_check(BtreeCheckState *state, BTScanInsert targetkey,
21322133
* check for downlink connectivity.
21332134
*/
21342135
bt_child_highkey_check(state, downlinkoffnum,
2135-
child, topaque->btpo.level);
2136+
child, topaque->btpo_level);
21362137

21372138
/*
21382139
* Since there cannot be a concurrent VACUUM operation in readonly mode,
@@ -2275,7 +2276,7 @@ bt_downlink_missing_check(BtreeCheckState *state, bool rightsplit,
22752276
errmsg_internal("harmless interrupted page split detected in index %s",
22762277
RelationGetRelationName(state->rel)),
22772278
errdetail_internal("Block=%u level=%u left sibling=%u page lsn=%X/%X.",
2278-
blkno, opaque->btpo.level,
2279+
blkno, opaque->btpo_level,
22792280
opaque->btpo_prev,
22802281
LSN_FORMAT_ARGS(pagelsn))));
22812282
return;
@@ -2304,7 +2305,7 @@ bt_downlink_missing_check(BtreeCheckState *state, bool rightsplit,
23042305
elog(DEBUG1, "checking for interrupted multi-level deletion due to missing downlink in index \"%s\"",
23052306
RelationGetRelationName(state->rel));
23062307

2307-
level = opaque->btpo.level;
2308+
level = opaque->btpo_level;
23082309
itemid = PageGetItemIdCareful(state, blkno, page, P_FIRSTDATAKEY(opaque));
23092310
itup = (IndexTuple) PageGetItem(page, itemid);
23102311
childblk = BTreeTupleGetDownLink(itup);
@@ -2319,16 +2320,16 @@ bt_downlink_missing_check(BtreeCheckState *state, bool rightsplit,
23192320
break;
23202321

23212322
/* Do an extra sanity check in passing on internal pages */
2322-
if (copaque->btpo.level != level - 1)
2323+
if (copaque->btpo_level != level - 1)
23232324
ereport(ERROR,
23242325
(errcode(ERRCODE_INDEX_CORRUPTED),
23252326
errmsg_internal("downlink points to block in index \"%s\" whose level is not one level down",
23262327
RelationGetRelationName(state->rel)),
23272328
errdetail_internal("Top parent/under check block=%u block pointed to=%u expected level=%u level in pointed to block=%u.",
23282329
blkno, childblk,
2329-
level - 1, copaque->btpo.level)));
2330+
level - 1, copaque->btpo_level)));
23302331

2331-
level = copaque->btpo.level;
2332+
level = copaque->btpo_level;
23322333
itemid = PageGetItemIdCareful(state, childblk, child,
23332334
P_FIRSTDATAKEY(copaque));
23342335
itup = (IndexTuple) PageGetItem(child, itemid);
@@ -2389,7 +2390,7 @@ bt_downlink_missing_check(BtreeCheckState *state, bool rightsplit,
23892390
errmsg("internal index block lacks downlink in index \"%s\"",
23902391
RelationGetRelationName(state->rel)),
23912392
errdetail_internal("Block=%u level=%u page lsn=%X/%X.",
2392-
blkno, opaque->btpo.level,
2393+
blkno, opaque->btpo_level,
23932394
LSN_FORMAT_ARGS(pagelsn))));
23942395
}
23952396

@@ -2983,21 +2984,28 @@ palloc_btree_page(BtreeCheckState *state, BlockNumber blocknum)
29832984
}
29842985

29852986
/*
2986-
* Deleted pages have no sane "level" field, so can only check non-deleted
2987-
* page level
2987+
* Deleted pages that still use the old 32-bit XID representation have no
2988+
* sane "level" field because they type pun the field, but all other pages
2989+
* (including pages deleted on Postgres 14+) have a valid value.
29882990
*/
2989-
if (P_ISLEAF(opaque) && !P_ISDELETED(opaque) && opaque->btpo.level != 0)
2990-
ereport(ERROR,
2991-
(errcode(ERRCODE_INDEX_CORRUPTED),
2992-
errmsg("invalid leaf page level %u for block %u in index \"%s\"",
2993-
opaque->btpo.level, blocknum, RelationGetRelationName(state->rel))));
2991+
if (!P_ISDELETED(opaque) || P_HAS_FULLXID(opaque))
2992+
{
2993+
/* Okay, no reason not to trust btpo_level field from page */
29942994

2995-
if (!P_ISLEAF(opaque) && !P_ISDELETED(opaque) &&
2996-
opaque->btpo.level == 0)
2997-
ereport(ERROR,
2998-
(errcode(ERRCODE_INDEX_CORRUPTED),
2999-
errmsg("invalid internal page level 0 for block %u in index \"%s\"",
3000-
blocknum, RelationGetRelationName(state->rel))));
2995+
if (P_ISLEAF(opaque) && opaque->btpo_level != 0)
2996+
ereport(ERROR,
2997+
(errcode(ERRCODE_INDEX_CORRUPTED),
2998+
errmsg_internal("invalid leaf page level %u for block %u in index \"%s\"",
2999+
opaque->btpo_level, blocknum,
3000+
RelationGetRelationName(state->rel))));
3001+
3002+
if (!P_ISLEAF(opaque) && opaque->btpo_level == 0)
3003+
ereport(ERROR,
3004+
(errcode(ERRCODE_INDEX_CORRUPTED),
3005+
errmsg_internal("invalid internal page level 0 for block %u in index \"%s\"",
3006+
blocknum,
3007+
RelationGetRelationName(state->rel))));
3008+
}
30013009

30023010
/*
30033011
* Sanity checks for number of items on page.
@@ -3044,8 +3052,6 @@ palloc_btree_page(BtreeCheckState *state, BlockNumber blocknum)
30443052
* state. This state is nonetheless treated as corruption by VACUUM on
30453053
* from version 9.4 on, so do the same here. See _bt_pagedel() for full
30463054
* details.
3047-
*
3048-
* Internal pages should never have garbage items, either.
30493055
*/
30503056
if (!P_ISLEAF(opaque) && P_ISHALFDEAD(opaque))
30513057
ereport(ERROR,
@@ -3054,11 +3060,27 @@ palloc_btree_page(BtreeCheckState *state, BlockNumber blocknum)
30543060
blocknum, RelationGetRelationName(state->rel)),
30553061
errhint("This can be caused by an interrupted VACUUM in version 9.3 or older, before upgrade. Please REINDEX it.")));
30563062

3063+
/*
3064+
* Check that internal pages have no garbage items, and that no page has
3065+
* an invalid combination of deletion-related page level flags
3066+
*/
30573067
if (!P_ISLEAF(opaque) && P_HAS_GARBAGE(opaque))
30583068
ereport(ERROR,
30593069
(errcode(ERRCODE_INDEX_CORRUPTED),
3060-
errmsg("internal page block %u in index \"%s\" has garbage items",
3061-
blocknum, RelationGetRelationName(state->rel))));
3070+
errmsg_internal("internal page block %u in index \"%s\" has garbage items",
3071+
blocknum, RelationGetRelationName(state->rel))));
3072+
3073+
if (P_HAS_FULLXID(opaque) && !P_ISDELETED(opaque))
3074+
ereport(ERROR,
3075+
(errcode(ERRCODE_INDEX_CORRUPTED),
3076+
errmsg_internal("full transaction id page flag appears in non-deleted block %u in index \"%s\"",
3077+
blocknum, RelationGetRelationName(state->rel))));
3078+
3079+
if (P_ISDELETED(opaque) && P_ISHALFDEAD(opaque))
3080+
ereport(ERROR,
3081+
(errcode(ERRCODE_INDEX_CORRUPTED),
3082+
errmsg_internal("deleted page block %u in index \"%s\" is half-dead",
3083+
blocknum, RelationGetRelationName(state->rel))));
30623084

30633085
return page;
30643086
}

contrib/pageinspect/btreefuncs.c

Lines changed: 49 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,7 @@ typedef struct BTPageStat
7575
/* opaque data */
7676
BlockNumber btpo_prev;
7777
BlockNumber btpo_next;
78-
union
79-
{
80-
uint32 level;
81-
TransactionId xact;
82-
} btpo;
78+
uint32 btpo_level;
8379
uint16 btpo_flags;
8480
BTCycleId btpo_cycleid;
8581
} BTPageStat;
@@ -112,9 +108,33 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
112108
/* page type (flags) */
113109
if (P_ISDELETED(opaque))
114110
{
115-
stat->type = 'd';
116-
stat->btpo.xact = opaque->btpo.xact;
117-
return;
111+
/* We divide deleted pages into leaf ('d') or internal ('D') */
112+
if (P_ISLEAF(opaque) || !P_HAS_FULLXID(opaque))
113+
stat->type = 'd';
114+
else
115+
stat->type = 'D';
116+
117+
/*
118+
* Report safexid in a deleted page.
119+
*
120+
* Handle pg_upgrade'd deleted pages that used the previous safexid
121+
* representation in btpo_level field (this used to be a union type
122+
* called "bpto").
123+
*/
124+
if (P_HAS_FULLXID(opaque))
125+
{
126+
FullTransactionId safexid = BTPageGetDeleteXid(page);
127+
128+
elog(NOTICE, "deleted page from block %u has safexid %u:%u",
129+
blkno, EpochFromFullTransactionId(safexid),
130+
XidFromFullTransactionId(safexid));
131+
}
132+
else
133+
elog(NOTICE, "deleted page from block %u has safexid %u",
134+
blkno, opaque->btpo_level);
135+
136+
/* Don't interpret BTDeletedPageData as index tuples */
137+
maxoff = InvalidOffsetNumber;
118138
}
119139
else if (P_IGNORE(opaque))
120140
stat->type = 'e';
@@ -128,7 +148,7 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
128148
/* btpage opaque data */
129149
stat->btpo_prev = opaque->btpo_prev;
130150
stat->btpo_next = opaque->btpo_next;
131-
stat->btpo.level = opaque->btpo.level;
151+
stat->btpo_level = opaque->btpo_level;
132152
stat->btpo_flags = opaque->btpo_flags;
133153
stat->btpo_cycleid = opaque->btpo_cycleid;
134154

@@ -237,7 +257,7 @@ bt_page_stats_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
237257
values[j++] = psprintf("%u", stat.free_size);
238258
values[j++] = psprintf("%u", stat.btpo_prev);
239259
values[j++] = psprintf("%u", stat.btpo_next);
240-
values[j++] = psprintf("%u", (stat.type == 'd') ? stat.btpo.xact : stat.btpo.level);
260+
values[j++] = psprintf("%u", stat.btpo_level);
241261
values[j++] = psprintf("%d", stat.btpo_flags);
242262

243263
tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
@@ -503,10 +523,14 @@ bt_page_items_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version)
503523

504524
opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page);
505525

506-
if (P_ISDELETED(opaque))
507-
elog(NOTICE, "page is deleted");
508-
509-
fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
526+
if (!P_ISDELETED(opaque))
527+
fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
528+
else
529+
{
530+
/* Don't interpret BTDeletedPageData as index tuples */
531+
elog(NOTICE, "page from block " INT64_FORMAT " is deleted", blkno);
532+
fctx->max_calls = 0;
533+
}
510534
uargs->leafpage = P_ISLEAF(opaque);
511535
uargs->rightmost = P_RIGHTMOST(opaque);
512536

@@ -603,7 +627,14 @@ bt_page_items_bytea(PG_FUNCTION_ARGS)
603627
if (P_ISDELETED(opaque))
604628
elog(NOTICE, "page is deleted");
605629

606-
fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
630+
if (!P_ISDELETED(opaque))
631+
fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
632+
else
633+
{
634+
/* Don't interpret BTDeletedPageData as index tuples */
635+
elog(NOTICE, "page from block is deleted");
636+
fctx->max_calls = 0;
637+
}
607638
uargs->leafpage = P_ISLEAF(opaque);
608639
uargs->rightmost = P_RIGHTMOST(opaque);
609640

@@ -692,10 +723,7 @@ bt_metap(PG_FUNCTION_ARGS)
692723

693724
/*
694725
* We need a kluge here to detect API versions prior to 1.8. Earlier
695-
* versions incorrectly used int4 for certain columns. This caused
696-
* various problems. For example, an int4 version of the "oldest_xact"
697-
* column would not work with TransactionId values that happened to exceed
698-
* PG_INT32_MAX.
726+
* versions incorrectly used int4 for certain columns.
699727
*
700728
* There is no way to reliably avoid the problems created by the old
701729
* function definition at this point, so insist that the user update the
@@ -723,7 +751,8 @@ bt_metap(PG_FUNCTION_ARGS)
723751
*/
724752
if (metad->btm_version >= BTREE_NOVAC_VERSION)
725753
{
726-
values[j++] = psprintf("%u", metad->btm_oldest_btpo_xact);
754+
values[j++] = psprintf(INT64_FORMAT,
755+
(int64) metad->btm_last_cleanup_num_delpages);
727756
values[j++] = psprintf("%f", metad->btm_last_cleanup_num_heap_tuples);
728757
values[j++] = metad->btm_allequalimage ? "t" : "f";
729758
}

contrib/pageinspect/expected/btree.out

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,16 @@ INSERT INTO test1 VALUES (72057594037927937, 'text');
33
CREATE INDEX test1_a_idx ON test1 USING btree (a);
44
\x
55
SELECT * FROM bt_metap('test1_a_idx');
6-
-[ RECORD 1 ]-----------+-------
7-
magic | 340322
8-
version | 4
9-
root | 1
10-
level | 0
11-
fastroot | 1
12-
fastlevel | 0
13-
oldest_xact | 0
14-
last_cleanup_num_tuples | -1
15-
allequalimage | t
6+
-[ RECORD 1 ]-------------+-------
7+
magic | 340322
8+
version | 4
9+
root | 1
10+
level | 0
11+
fastroot | 1
12+
fastlevel | 0
13+
last_cleanup_num_delpages | 0
14+
last_cleanup_num_tuples | -1
15+
allequalimage | t
1616

1717
SELECT * FROM bt_page_stats('test1_a_idx', -1);
1818
ERROR: invalid block number
@@ -29,7 +29,7 @@ page_size | 8192
2929
free_size | 8128
3030
btpo_prev | 0
3131
btpo_next | 0
32-
btpo | 0
32+
btpo_level | 0
3333
btpo_flags | 3
3434

3535
SELECT * FROM bt_page_stats('test1_a_idx', 2);

0 commit comments

Comments
 (0)