Skip to content

Commit 2cfe752

Browse files
author
Tor Didriksen
committed
Bug#34969838 heap-buffer-overflow in StrXfrmTest.ChineseUTF8MB4 gunit test
The loading/unloading of UCA character sets was never really written to tolerate a sequence of init/uninit/init/uninit. Rewrite handling of MY_UCA_INFO structs, so that it is simple to see whether it owns allocated memory, or points to static data structures. Re-introduce mem_malloc/mem_free in MY_CHARSET_LOADER, so that we can free allocated memory. The previous use of once_alloc was really only hiding memory leaks. Change-Id: I02c792e612fe1bb3488ebe4161ed791fbe4aebc2
1 parent bd0d51c commit 2cfe752

File tree

13 files changed

+160
-98
lines changed

13 files changed

+160
-98
lines changed

include/mysql/strings/m_ctype.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232

3333
#include <cstddef>
3434
#include <cstdint>
35+
#include <cstdlib>
3536
#include <deque>
3637

3738
#include "mysql/attribute.h"
@@ -228,6 +229,9 @@ class MY_CHARSET_LOADER {
228229
*/
229230
virtual void *once_alloc(size_t);
230231

232+
virtual void *mem_malloc(size_t size) { return malloc(size); }
233+
virtual void mem_free(void *ptr) { free(ptr); }
234+
231235
private:
232236
std::deque<void *> m_delete_list;
233237
};
@@ -239,7 +243,7 @@ enum Pad_attribute { PAD_SPACE, NO_PAD };
239243
/* See strings/CHARSET_INFO.txt for information about this structure */
240244
struct MY_COLLATION_HANDLER {
241245
bool (*init)(CHARSET_INFO *, MY_CHARSET_LOADER *, MY_CHARSET_ERRMSG *);
242-
void (*uninit)(CHARSET_INFO *);
246+
void (*uninit)(CHARSET_INFO *, MY_CHARSET_LOADER *);
243247
/* Collation routines */
244248
int (*strnncoll)(const CHARSET_INFO *, const uint8_t *, size_t,
245249
const uint8_t *, size_t, bool);

mysys/charset.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ class Mysys_charset_loader : public MY_CHARSET_LOADER {
104104
return my_once_alloc(sz, MYF(MY_WME));
105105
}
106106

107+
void *mem_malloc(size_t sz) override {
108+
return my_malloc(key_memory_charset_loader, sz, MYF(MY_WME));
109+
}
110+
111+
void mem_free(void *ptr) override { my_free(ptr); }
112+
107113
void *read_file(const char *path, size_t *size) override;
108114
};
109115

mysys/my_init.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,8 @@ static PSI_memory_info all_mysys_memory[] = {
539539

540540
{&key_memory_max_alloca, "max_alloca", PSI_FLAG_ONLY_GLOBAL_STAT, 0,
541541
PSI_DOCUMENT_ME},
542+
{&key_memory_charset_loader, "charset_loader", PSI_FLAG_ONLY_GLOBAL_STAT, 0,
543+
PSI_DOCUMENT_ME},
542544
{&key_memory_lf_node, "lf_node", PSI_FLAG_ONLY_GLOBAL_STAT, 0,
543545
PSI_DOCUMENT_ME},
544546
{&key_memory_lf_dynarray, "lf_dynarray", PSI_FLAG_ONLY_GLOBAL_STAT, 0,

mysys/my_static.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
/* get memory in hunks */
5151
constexpr uint ONCE_ALLOC_INIT = 4096 - MALLOC_OVERHEAD;
5252

53+
PSI_memory_key key_memory_charset_loader;
5354
PSI_memory_key key_memory_lf_node;
5455
PSI_memory_key key_memory_lf_dynarray;
5556
PSI_memory_key key_memory_lf_slist;

mysys/mysys_priv.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ extern PSI_file_key key_file_charset;
6565

6666
/* These keys are always defined. */
6767

68+
extern PSI_memory_key key_memory_charset_loader;
6869
extern PSI_memory_key key_memory_lf_node;
6970
extern PSI_memory_key key_memory_lf_dynarray;
7071
extern PSI_memory_key key_memory_lf_slist;

strings/collations_internal.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -654,7 +654,7 @@ Collations::~Collations() {
654654
for (auto p : m_all_by_id) {
655655
CHARSET_INFO *cs = p.second;
656656
if (cs->coll && cs->coll->uninit) {
657-
cs->coll->uninit(cs);
657+
cs->coll->uninit(cs, m_loader);
658658
}
659659
}
660660
if (m_owns_loader) {

strings/ctype-mb.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -729,7 +729,7 @@ bool my_like_range_mb(const CHARSET_INFO *cs, const char *ptr,
729729
'ab\min\min\min\min' and 'ab\max\max\max\max'.
730730
731731
*/
732-
const char *contraction_flags = nullptr;
732+
const MY_UCA_INFO::flags_type *contraction_flags = nullptr;
733733
if (cs->uca) contraction_flags = cs->uca->contraction_flags;
734734
if (contraction_flags && ptr + 1 < end &&
735735
my_uca_can_be_contraction_head(contraction_flags, (uint8_t)*ptr)) {
@@ -875,7 +875,7 @@ bool my_like_range_generic(const CHARSET_INFO *cs, const char *ptr,
875875
goto pad_min_max;
876876
}
877877

878-
const char *contraction_flags = nullptr;
878+
const MY_UCA_INFO::flags_type *contraction_flags = nullptr;
879879
if (cs->uca) contraction_flags = cs->uca->contraction_flags;
880880
if (contraction_flags &&
881881
my_uca_can_be_contraction_head(contraction_flags, wc) &&

strings/ctype-uca.cc

Lines changed: 86 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,16 @@
6363
#include "template_utils.h"
6464

6565
MY_UCA_INFO my_uca_v400 = {
66-
UCA_V400,
66+
UCA_V400, /* version */
67+
nullptr, /* in case we clone an instance */
6768

68-
0xFFFF, /* maxchar */
69-
uca_length, uca_weight, false, nullptr, /* contractions */
70-
nullptr,
69+
0xFFFF, /* maxchar */
70+
uca_length, /* lengths */
71+
nullptr, /* m_allocated_weights */
72+
uca_weight, /* weights */
73+
false, /* have_contractions */
74+
nullptr, /* contraction_nodes */
75+
nullptr, /* contraction_flags */
7176

7277
/* Logical positions */
7378
0x0009, /* first_non_ignorable p != ignore */
@@ -95,10 +100,12 @@ MY_UCA_INFO my_uca_v400 = {
95100
/******************************************************/
96101

97102
MY_UCA_INFO my_uca_v520 = {
98-
UCA_V520,
103+
UCA_V520, /* version */
104+
nullptr, /* in case we clone an instance */
99105

100106
0x10FFFF, /* maxchar */
101107
uca520_length,
108+
nullptr, /* m_allocated_weights */
102109
uca520_weight,
103110
false,
104111
nullptr, /* contractions */
@@ -771,7 +778,7 @@ class my_uca_scanner {
771778
unsigned wbeg_stride{0}; /* Number of bytes between weights in string */
772779
const uint8_t *sbeg; /* Beginning of the input string */
773780
const uint8_t *send; /* End of the input string */
774-
const MY_UCA_INFO *uca;
781+
const MY_UCA_INFO *uca{nullptr};
775782
uint16_t implicit[10]{};
776783
my_wc_t prev_char{0}; // Previous code point we scanned, if any.
777784
const CHARSET_INFO *cs;
@@ -890,9 +897,9 @@ class uca_scanner_900 : public my_uca_scanner {
890897
@param flag flag: "is contraction head", "is contraction tail"
891898
*/
892899

893-
static inline void my_uca_add_contraction_flag(char *flags, my_wc_t wc,
894-
int flag) {
895-
flags[wc & MY_UCA_CNT_FLAG_MASK] |= flag;
900+
static inline void my_uca_add_contraction_flag(MY_UCA_INFO::flags_type *flags,
901+
my_wc_t wc, int flag) {
902+
(*flags)[wc & MY_UCA_CNT_FLAG_MASK] |= flag;
896903
}
897904

898905
/**
@@ -969,9 +976,9 @@ const uint16_t *my_uca_contraction2_weight(
969976
@retval true - can be previous context head
970977
*/
971978

972-
static inline bool my_uca_can_be_previous_context_head(const char *flags,
973-
my_wc_t wc) {
974-
return flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_HEAD;
979+
static inline bool my_uca_can_be_previous_context_head(
980+
const MY_UCA_INFO::flags_type *flags, my_wc_t wc) {
981+
return (*flags)[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_HEAD;
975982
}
976983

977984
/**
@@ -984,9 +991,9 @@ static inline bool my_uca_can_be_previous_context_head(const char *flags,
984991
@retval true - can be contraction tail
985992
*/
986993

987-
static inline bool my_uca_can_be_previous_context_tail(const char *flags,
988-
my_wc_t wc) {
989-
return flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_TAIL;
994+
static inline bool my_uca_can_be_previous_context_tail(
995+
const MY_UCA_INFO::flags_type *flags, my_wc_t wc) {
996+
return (*flags)[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_TAIL;
990997
}
991998

992999
/**
@@ -2804,13 +2811,13 @@ typedef enum {
28042811
} my_coll_shift_method;
28052812

28062813
struct MY_COLL_RULES {
2807-
MY_UCA_INFO *uca; /* Unicode weight data */
2808-
size_t nrules; /* Number of rules in the rule array */
2809-
size_t mrules; /* Number of allocated rules */
2810-
MY_COLL_RULE *rule; /* Rule array */
2811-
MY_CHARSET_LOADER *loader;
2812-
MY_CHARSET_ERRMSG *errmsg;
2813-
my_coll_shift_method shift_after_method;
2814+
MY_UCA_INFO *uca{nullptr}; /* Unicode weight data */
2815+
size_t nrules{0}; /* Number of rules in the rule array */
2816+
size_t mrules{0}; /* Number of allocated rules */
2817+
MY_COLL_RULE *rule{nullptr}; /* Rule array */
2818+
MY_CHARSET_LOADER *loader{nullptr};
2819+
MY_CHARSET_ERRMSG *errmsg{nullptr};
2820+
my_coll_shift_method shift_after_method{my_shift_method_simple};
28142821
};
28152822

28162823
/**
@@ -3727,9 +3734,9 @@ static bool my_uca_copy_page(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
37273734
const MY_UCA_INFO *src, MY_UCA_INFO *dst,
37283735
size_t page) {
37293736
const unsigned dst_size = 256 * dst->lengths[page] * sizeof(uint16_t);
3730-
if (!(dst->weights[page] =
3731-
static_cast<uint16_t *>((loader->once_alloc)(dst_size))))
3732-
return true;
3737+
dst->weights[page] = static_cast<uint16_t *>(loader->mem_malloc(dst_size));
3738+
if (dst->weights[page] == nullptr) return true;
3739+
dst->m_allocated_weights->at(page) = 1;
37333740

37343741
assert(src->lengths[page] <= dst->lengths[page]);
37353742
memset(dst->weights[page], 0, dst_size);
@@ -4093,6 +4100,7 @@ static void copy_ja_han_pages(const CHARSET_INFO *cs, MY_UCA_INFO *dst) {
40934100
// may already be set.
40944101
assert(dst->weights[page] == nullptr ||
40954102
dst->weights[page] == ja_han_pages[page - MIN_JA_HAN_PAGE]);
4103+
assert(dst->m_allocated_weights->at(page) == 0);
40964104
dst->weights[page] = ja_han_pages[page - MIN_JA_HAN_PAGE];
40974105
}
40984106
}
@@ -4102,9 +4110,13 @@ static void copy_ja_han_pages(const CHARSET_INFO *cs, MY_UCA_INFO *dst) {
41024110
characters with uca9dump (see dump_zh_pages() in uca9-dump.cc). Replace the
41034111
DUCET pages with these pages.
41044112
*/
4105-
static void copy_zh_han_pages(MY_UCA_INFO *dst) {
4113+
static void copy_zh_han_pages(MY_UCA_INFO *dst, MY_CHARSET_LOADER *loader) {
41064114
for (int page = MIN_ZH_HAN_PAGE; page <= MAX_ZH_HAN_PAGE; page++) {
41074115
if (zh_han_pages[page - MIN_ZH_HAN_PAGE]) {
4116+
if (dst->m_allocated_weights->at(page)) {
4117+
loader->mem_free(dst->weights[page]);
4118+
dst->m_allocated_weights->at(page) = 0;
4119+
}
41084120
dst->weights[page] = zh_han_pages[page - MIN_ZH_HAN_PAGE];
41094121
}
41104122
}
@@ -4208,8 +4220,7 @@ static void modify_all_zh_pages(Reorder_param *reorder_param, MY_UCA_INFO *dst,
42084220
}
42094221

42104222
static bool init_weight_level(CHARSET_INFO *cs, MY_COLL_RULES *rules, int level,
4211-
MY_UCA_INFO *dst, const MY_UCA_INFO *src,
4212-
bool lengths_are_temporary) {
4223+
MY_UCA_INFO *dst, const MY_UCA_INFO *src) {
42134224
MY_COLL_RULE *r, *rlast;
42144225
size_t i, npages = (src->maxchar + 1) / 256;
42154226
bool has_contractions = false;
@@ -4220,19 +4231,14 @@ static bool init_weight_level(CHARSET_INFO *cs, MY_COLL_RULES *rules, int level,
42204231
if (check_rules(rules, dst, src)) return true;
42214232

42224233
/* Allocate memory for pages and their lengths */
4223-
if (lengths_are_temporary) {
4224-
if (!(dst->lengths = static_cast<uint8_t *>(malloc(npages)))) return true;
4225-
if (!(dst->weights = static_cast<uint16_t **>(
4226-
(loader->once_alloc)(npages * sizeof(uint16_t *))))) {
4227-
free(dst->lengths);
4228-
return true;
4229-
}
4230-
} else {
4231-
if (!(dst->lengths =
4232-
static_cast<uint8_t *>((loader->once_alloc)(npages))) ||
4233-
!(dst->weights = static_cast<uint16_t **>(
4234-
(loader->once_alloc)(npages * sizeof(uint16_t *)))))
4235-
return true;
4234+
dst->lengths = static_cast<uint8_t *>(loader->mem_malloc(npages));
4235+
dst->weights =
4236+
static_cast<uint16_t **>(loader->mem_malloc(npages * sizeof(uint16_t *)));
4237+
if (dst->lengths == nullptr || dst->weights == nullptr) return true;
4238+
4239+
if (dst->m_allocated_weights == nullptr) {
4240+
dst->m_allocated_weights = new std::vector<uint8_t>();
4241+
dst->m_allocated_weights->resize(npages, 0);
42364242
}
42374243

42384244
/*
@@ -4273,10 +4279,7 @@ static bool init_weight_level(CHARSET_INFO *cs, MY_COLL_RULES *rules, int level,
42734279
if (has_contractions) {
42744280
dst->have_contractions = true;
42754281
dst->contraction_nodes = new std::vector<MY_CONTRACTION>(0);
4276-
if (!(dst->contraction_flags =
4277-
(char *)loader->once_alloc(MY_UCA_CNT_FLAG_SIZE)))
4278-
return true;
4279-
memset(dst->contraction_flags, 0, MY_UCA_CNT_FLAG_SIZE);
4282+
dst->contraction_flags = new std::array<char, MY_UCA_CNT_FLAG_SIZE>{};
42804283
}
42814284
if (cs->coll_param == &zh_coll_param) {
42824285
/*
@@ -4292,7 +4295,7 @@ static bool init_weight_level(CHARSET_INFO *cs, MY_COLL_RULES *rules, int level,
42924295
return rc;
42934296
}
42944297
modify_all_zh_pages(cs->coll_param->reorder_param, dst, npages);
4295-
copy_zh_han_pages(dst);
4298+
copy_zh_han_pages(dst, loader);
42964299
} else {
42974300
/* Allocate pages that we'll overwrite and copy default weights */
42984301
for (i = 0; i < npages; i++) {
@@ -4742,7 +4745,8 @@ static bool create_tailoring(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
47424745
return false; /* Ok to add a collation without tailoring */
47434746

47444747
MY_COLL_RULES rules;
4745-
MY_UCA_INFO new_uca, *src_uca = nullptr;
4748+
MY_UCA_INFO new_uca;
4749+
MY_UCA_INFO *src_uca = nullptr;
47464750
int rc = 0;
47474751
MY_UCA_INFO *src, *dst;
47484752
size_t npages;
@@ -4751,11 +4755,9 @@ static bool create_tailoring(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
47514755
errmsg->errcode = 0;
47524756
*errmsg->errarg = '\0';
47534757

4754-
memset(&rules, 0, sizeof(rules));
47554758
rules.loader = loader;
47564759
rules.errmsg = errmsg;
47574760
rules.uca = cs->uca ? cs->uca : &my_uca_v400; /* For logical positions, etc */
4758-
memset(&new_uca, 0, sizeof(new_uca));
47594761

47604762
/* Parse ICU Collation Customization expression */
47614763
if ((rc = my_coll_rule_parse(&rules, cs->tailoring,
@@ -4801,45 +4803,64 @@ static bool create_tailoring(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
48014803
}
48024804

48034805
npages = (src->maxchar + 1) / 256;
4804-
if (rules.uca->version == UCA_V900) {
4805-
if (!(src->lengths = static_cast<uint8_t *>(malloc(npages)))) {
4806-
goto ex;
4807-
}
4806+
lengths_are_temporary = (rules.uca->version == UCA_V900);
4807+
if (lengths_are_temporary) {
4808+
src->lengths = static_cast<uint8_t *>(loader->mem_malloc(npages));
4809+
if (src->lengths == nullptr) goto ex;
48084810
synthesize_lengths_900(src->lengths, src->weights, npages);
48094811
}
48104812

4811-
lengths_are_temporary = (rules.uca->version == UCA_V900);
4812-
if ((rc = init_weight_level(cs, &rules, 0, dst, src, lengths_are_temporary)))
4813-
goto ex;
4813+
if ((rc = init_weight_level(cs, &rules, 0, dst, src))) goto ex;
48144814

48154815
if (lengths_are_temporary) {
4816-
free(src->lengths);
4817-
free(dst->lengths);
4816+
loader->mem_free(src->lengths);
4817+
loader->mem_free(dst->lengths);
48184818
src->lengths = nullptr;
48194819
dst->lengths = nullptr;
48204820
}
48214821

48224822
new_uca.version = src_uca->version;
4823-
if (!(cs->uca = (MY_UCA_INFO *)loader->once_alloc(sizeof(MY_UCA_INFO)))) {
4824-
rc = 1;
4825-
goto ex;
4826-
}
4827-
memset(cs->uca, 0, sizeof(MY_UCA_INFO));
4828-
cs->uca[0] = new_uca;
4823+
new_uca.m_based_on = src_uca;
4824+
cs->uca = new MY_UCA_INFO(new_uca);
48294825

48304826
ex:
48314827
free(rules.rule);
48324828
if (rc != 0 && errmsg->errcode) {
48334829
delete new_uca.contraction_nodes;
4830+
delete new_uca.contraction_flags;
48344831
loader->reporter(ERROR_LEVEL, errmsg->errcode, errmsg->errarg);
48354832
}
48364833
return rc;
48374834
}
48384835

4839-
static void my_coll_uninit_uca(CHARSET_INFO *cs) {
4836+
static void my_coll_uninit_uca(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader) {
48404837
if (cs->uca && cs->uca->contraction_nodes) {
48414838
delete cs->uca->contraction_nodes;
4839+
delete cs->uca->contraction_flags;
48424840
cs->uca->contraction_nodes = nullptr;
4841+
cs->uca->contraction_flags = nullptr;
4842+
}
4843+
if (cs->uca != nullptr && cs->uca != &my_uca_v400 &&
4844+
cs->uca != &my_uca_v520 && cs->uca != &my_uca_v900) {
4845+
if (cs->uca->m_allocated_weights != nullptr) {
4846+
for (size_t i = 0; i < cs->uca->m_allocated_weights->size(); ++i) {
4847+
if (cs->uca->m_allocated_weights->at(i) != 0) {
4848+
loader->mem_free(cs->uca->weights[i]);
4849+
cs->uca->weights[i] = nullptr;
4850+
}
4851+
}
4852+
}
4853+
loader->mem_free(cs->uca->lengths);
4854+
cs->uca->lengths = nullptr;
4855+
loader->mem_free(cs->uca->weights);
4856+
cs->uca->weights = nullptr;
4857+
4858+
delete cs->uca->m_allocated_weights;
4859+
cs->uca->m_allocated_weights = nullptr;
4860+
4861+
MY_UCA_INFO *to_be_deleted = cs->uca;
4862+
cs->uca = cs->uca->m_based_on;
4863+
delete to_be_deleted;
48434864
cs->state &= ~MY_CS_READY;
48444865
}
48454866
}

0 commit comments

Comments
 (0)