Skip to content

Commit d15e4c2

Browse files
Bug #22963951 REDUCE THE TIME OF LOOKING FOR
MLOG_CHECKPOINT DURING CRASH RECOVERY Analysis: ========= In mysql-5.7, there can be maximum three scans are possible. Simultaneous scan and apply is possible only in the third scan. First scan: Scan all the redo logs from checkpoint lsn and process only MLOG_FILE_* records during first scan. It scans till the last MLOG_CHECKPOINT. Second scan: Scan all redo logs from checkpoint lsn and add log records to hash table. It verifies whether space id is having corresponding MLOG_FILE_NAME record. If the hash table heap memory is reached the threshold then stop adding records to hash but it continues to scan till end of the redo log file. Third scan: Scan all redo logs from checkpoint lsn and add log records to hash table only if the tablespace exists. If the heap memory reached the threshold then simultaneous scan and apply will happen. Note: Third scan only happens if hash table memory reaches the threshold during second scan. Fix: ==== Merge first scan and second scan and make it a single scan. Single scan if the hash table doesn't reach the threshold limit. In worst case scenario, it does two scan. Reviewed-by: Debarun Banerjee <debarun.banerjee@oracle.com> RB: 14725
1 parent 6af0cdd commit d15e4c2

File tree

1 file changed

+43
-59
lines changed

1 file changed

+43
-59
lines changed

storage/innobase/log/log0recv.cc

+43-59
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,6 @@ fil_name_process(
387387
@param[in] first_page_no first page number in the file
388388
@param[in] type MLOG_FILE_NAME or MLOG_FILE_DELETE
389389
or MLOG_FILE_CREATE2 or MLOG_FILE_RENAME2
390-
@param[in] apply whether to apply the record
391390
@return pointer to next redo log record
392391
@retval NULL if this log record was truncated */
393392
static
@@ -397,8 +396,7 @@ fil_name_parse(
397396
const byte* end,
398397
ulint space_id,
399398
ulint first_page_no,
400-
mlog_id_t type,
401-
bool apply)
399+
mlog_id_t type)
402400
{
403401
if (type == MLOG_FILE_CREATE2) {
404402
if (end < ptr + 4) {
@@ -488,9 +486,6 @@ fil_name_parse(
488486
reinterpret_cast<char*>(new_name), new_len,
489487
space_id, false);
490488

491-
if (!apply) {
492-
break;
493-
}
494489
if (!fil_op_replay_rename(
495490
space_id, first_page_no,
496491
reinterpret_cast<const char*>(ptr),
@@ -583,7 +578,6 @@ fil_name_process(
583578
@param[in] first_page_no first page number in the file
584579
@param[in] type MLOG_FILE_NAME or MLOG_FILE_DELETE
585580
or MLOG_FILE_CREATE2 or MLOG_FILE_RENAME2
586-
@param[in] apply whether to apply the record
587581
@retval pointer to next redo log record
588582
@retval NULL if this log record was truncated */
589583
static
@@ -593,8 +587,7 @@ fil_name_parse(
593587
const byte* end,
594588
ulint space_id,
595589
ulint first_page_no,
596-
mlog_id_t type,
597-
bool apply)
590+
mlog_id_t type)
598591
{
599592

600593
ulint flags = mach_read_from_4(ptr);
@@ -674,7 +667,7 @@ fil_name_parse(
674667
fil_name_process(
675668
name, len, space_id, true);
676669

677-
if (apply && recv_replay_file_ops
670+
if (recv_replay_file_ops
678671
&& fil_space_get(space_id)) {
679672
dberr_t err = fil_delete_tablespace(
680673
space_id, BUF_REMOVE_FLUSH_NO_WRITE);
@@ -1668,7 +1661,6 @@ specified.
16681661
@param[in] end_ptr end of buffer
16691662
@param[in] space_id tablespace identifier
16701663
@param[in] page_no page number
1671-
@param[in] apply whether to apply the record
16721664
@param[in,out] block buffer block, or NULL if
16731665
a page log record should not be applied
16741666
or if it is a MLOG_FILE_ operation
@@ -1683,14 +1675,10 @@ recv_parse_or_apply_log_rec_body(
16831675
byte* end_ptr,
16841676
ulint space_id,
16851677
ulint page_no,
1686-
bool apply,
16871678
buf_block_t* block,
16881679
mtr_t* mtr)
16891680
{
16901681
ut_ad(!block == !mtr);
1691-
#ifndef UNIV_HOTBACKUP
1692-
ut_ad(!apply || recv_sys->mlog_checkpoint_lsn != 0);
1693-
#endif /* !UNIV_HOTBACKUP */
16941682

16951683
switch (type) {
16961684
case MLOG_FILE_NAME:
@@ -1700,8 +1688,7 @@ recv_parse_or_apply_log_rec_body(
17001688
ut_ad(block == NULL);
17011689
/* Collect the file names when parsing the log,
17021690
before applying any log records. */
1703-
return(fil_name_parse(ptr, end_ptr, space_id, page_no, type,
1704-
apply));
1691+
return(fil_name_parse(ptr, end_ptr, space_id, page_no, type));
17051692
case MLOG_INDEX_LOAD:
17061693
#ifdef UNIV_HOTBACKUP
17071694
/* While scaning redo logs during backup phase a
@@ -1774,8 +1761,7 @@ recv_parse_or_apply_log_rec_body(
17741761
encryption key information before the page 0 is recovered.
17751762
Otherwise, redo will not find the key to decrypt
17761763
the data pages. */
1777-
if (page_no == 0 && !is_system_tablespace(space_id)
1778-
&& !apply) {
1764+
if (page_no == 0 && !is_system_tablespace(space_id)) {
17791765
return(fil_write_encryption_parse(ptr,
17801766
end_ptr,
17811767
space_id));
@@ -1795,18 +1781,9 @@ recv_parse_or_apply_log_rec_body(
17951781

17961782
if (block) {
17971783
/* Applying a page log record. */
1798-
ut_ad(apply);
17991784
page = block->frame;
18001785
page_zip = buf_block_get_page_zip(block);
18011786
ut_d(page_type = fil_page_get_type(page));
1802-
} else if (apply
1803-
&& !is_predefined_tablespace(space_id)
1804-
&& recv_spaces.find(space_id) == recv_spaces.end()) {
1805-
ib::fatal() << "Missing MLOG_FILE_NAME or MLOG_FILE_DELETE"
1806-
" for redo log record " << type << " (page "
1807-
<< space_id << ":" << page_no << ") at "
1808-
<< recv_sys->recovered_lsn << ".";
1809-
return(NULL);
18101787
} else {
18111788
/* Parsing a page log record. */
18121789
page = NULL;
@@ -2520,7 +2497,7 @@ recv_recover_page_func(
25202497
recv_parse_or_apply_log_rec_body(
25212498
recv->type, buf, buf + recv->len,
25222499
recv_addr->space, recv_addr->page_no,
2523-
true, block, &mtr);
2500+
block, &mtr);
25242501

25252502
end_lsn = recv->start_lsn + recv->len;
25262503
mach_write_to_8(FIL_PAGE_LSN + page, end_lsn);
@@ -3052,7 +3029,7 @@ recv_parse_log_rec(
30523029
}
30533030

30543031
new_ptr = recv_parse_or_apply_log_rec_body(
3055-
*type, new_ptr, end_ptr, *space, *page_no, apply, NULL, NULL);
3032+
*type, new_ptr, end_ptr, *space, *page_no, NULL, NULL);
30563033

30573034
if (UNIV_UNLIKELY(new_ptr == NULL)) {
30583035

@@ -3159,15 +3136,13 @@ enum store_t {
31593136
hash table to wait merging to file pages.
31603137
@param[in] checkpoint_lsn the LSN of the latest checkpoint
31613138
@param[in] store whether to store page operations
3162-
@param[in] apply whether to apply the records
31633139
@return whether MLOG_CHECKPOINT record was seen the first time,
31643140
or corruption was noticed */
31653141
static MY_ATTRIBUTE((warn_unused_result))
31663142
bool
31673143
recv_parse_log_recs(
31683144
lsn_t checkpoint_lsn,
3169-
store_t store,
3170-
bool apply)
3145+
store_t store)
31713146
{
31723147
byte* ptr;
31733148
byte* end_ptr;
@@ -3213,7 +3188,7 @@ recv_parse_log_recs(
32133188
page no, and a pointer to the body of the log record */
32143189

32153190
len = recv_parse_log_rec(&type, ptr, end_ptr, &space,
3216-
&page_no, apply, &body);
3191+
&page_no, true, &body);
32173192

32183193
if (len == 0) {
32193194
return(false);
@@ -3279,9 +3254,6 @@ recv_parse_log_recs(
32793254
}
32803255
recv_sys->mlog_checkpoint_lsn
32813256
= recv_sys->recovered_lsn;
3282-
#ifndef UNIV_HOTBACKUP
3283-
return(true);
3284-
#endif /* !UNIV_HOTBACKUP */
32853257
}
32863258
break;
32873259
case MLOG_FILE_NAME:
@@ -3425,7 +3397,7 @@ recv_parse_log_recs(
34253397
completely recovered (until MLOG_MULTI_REC_END). */
34263398
len = recv_parse_log_rec(
34273399
&type, ptr, end_ptr, &space, &page_no,
3428-
apply, &body);
3400+
true, &body);
34293401

34303402
if (recv_sys->found_corrupt_log
34313403
&& !recv_report_corrupt_log(
@@ -3614,7 +3586,6 @@ recv_scan_log_recs(
36143586
bool finished = false;
36153587
ulint data_len;
36163588
bool more_data = false;
3617-
bool apply = recv_sys->mlog_checkpoint_lsn != 0;
36183589
ulint recv_parsing_buf_size = RECV_PARSING_BUF_SIZE;
36193590

36203591
ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
@@ -3781,7 +3752,7 @@ recv_scan_log_recs(
37813752
/* Try to parse more log records */
37823753

37833754
if (recv_parse_log_recs(checkpoint_lsn,
3784-
*store_to_hash, apply)) {
3755+
*store_to_hash)) {
37853756
ut_ad(recv_sys->found_corrupt_log
37863757
|| recv_sys->found_corrupt_fs
37873758
|| recv_sys->mlog_checkpoint_lsn
@@ -3843,8 +3814,7 @@ recv_group_scan_log_recs(
38433814
lsn_t checkpoint_lsn = *contiguous_lsn;
38443815
lsn_t start_lsn;
38453816
lsn_t end_lsn;
3846-
store_t store_to_hash = recv_sys->mlog_checkpoint_lsn == 0
3847-
? STORE_NO : (last_phase ? STORE_IF_EXISTS : STORE_YES);
3817+
store_t store_to_hash = last_phase ? STORE_IF_EXISTS : STORE_YES;
38483818
ulint available_mem = UNIV_PAGE_SIZE
38493819
* (buf_pool_get_n_pages()
38503820
- (recv_n_pool_free_frames * srv_buf_pool_instances));
@@ -3926,6 +3896,25 @@ recv_init_missing_space(dberr_t err, const recv_spaces_t::const_iterator& i)
39263896
return(err);
39273897
}
39283898

3899+
/** Report a missing mlog_file_name or mlog_file_delete record for
3900+
the tablespace.
3901+
@param[in] recv_addr Hashed page file address. */
3902+
static
3903+
void
3904+
recv_init_missing_mlog(
3905+
recv_addr_t* recv_addr)
3906+
{
3907+
ulint space_id = recv_addr->space;
3908+
ulint page_no = recv_addr->page_no;
3909+
ulint type = UT_LIST_GET_FIRST(recv_addr->rec_list)->type;
3910+
ulint start_lsn = UT_LIST_GET_FIRST(recv_addr->rec_list)->start_lsn;
3911+
3912+
ib::fatal() << "Missing MLOG_FILE_NAME or MLOG_FILE_DELETE "
3913+
"for redo log record " << type << " (page "
3914+
<< space_id << ":" << page_no << ") at "
3915+
<< start_lsn;
3916+
}
3917+
39293918
/** Check if all tablespaces were found for crash recovery.
39303919
@return error code or DB_SUCCESS */
39313920
static MY_ATTRIBUTE((warn_unused_result))
@@ -3981,7 +3970,12 @@ recv_init_crash_recovery_spaces(void)
39813970

39823971
recv_spaces_t::iterator i
39833972
= recv_spaces.find(space);
3984-
ut_ad(i != recv_spaces.end());
3973+
3974+
if (i != recv_spaces.end()) {
3975+
recv_init_missing_mlog(recv_addr);
3976+
recv_addr->state = RECV_DISCARDED;
3977+
continue;
3978+
}
39853979

39863980
if (i->second.deleted) {
39873981
ut_ad(missing_spaces.find(space)
@@ -4153,13 +4147,13 @@ recv_recovery_from_checkpoint_start(
41534147
return(DB_ERROR);
41544148
}
41554149

4156-
/* Look for MLOG_CHECKPOINT. */
4157-
recv_group_scan_log_recs(group, &contiguous_lsn, false);
4158-
/* The first scan should not have stored or applied any records. */
4159-
ut_ad(recv_sys->n_addrs == 0);
4160-
ut_ad(!recv_sys->found_corrupt_fs);
4150+
/** Scan the redo log from checkpoint lsn and redo log to
4151+
the hash table. */
4152+
rescan = recv_group_scan_log_recs(group, &contiguous_lsn, false);
41614153

4162-
if (recv_sys->found_corrupt_log && !srv_force_recovery) {
4154+
4155+
if ((recv_sys->found_corrupt_log && !srv_force_recovery)
4156+
|| recv_sys->found_corrupt_fs) {
41634157
log_mutex_exit();
41644158
return(DB_ERROR);
41654159
}
@@ -4179,16 +4173,6 @@ recv_recovery_from_checkpoint_start(
41794173

41804174
group->scanned_lsn = checkpoint_lsn;
41814175
rescan = false;
4182-
} else {
4183-
contiguous_lsn = checkpoint_lsn;
4184-
rescan = recv_group_scan_log_recs(
4185-
group, &contiguous_lsn, false);
4186-
4187-
if ((recv_sys->found_corrupt_log && !srv_force_recovery)
4188-
|| recv_sys->found_corrupt_fs) {
4189-
log_mutex_exit();
4190-
return(DB_ERROR);
4191-
}
41924176
}
41934177

41944178
/* NOTE: we always do a 'recovery' at startup, but only if

0 commit comments

Comments
 (0)