63
63
#include " template_utils.h"
64
64
65
65
MY_UCA_INFO my_uca_v400 = {
66
- UCA_V400,
66
+ UCA_V400, /* version */
67
+ nullptr , /* in case we clone an instance */
67
68
68
- 0xFFFF , /* maxchar */
69
- uca_length, uca_weight, false , nullptr , /* contractions */
70
- nullptr ,
69
+ 0xFFFF , /* maxchar */
70
+ uca_length, /* lengths */
71
+ nullptr , /* m_allocated_weights */
72
+ uca_weight, /* weights */
73
+ false , /* have_contractions */
74
+ nullptr , /* contraction_nodes */
75
+ nullptr , /* contraction_flags */
71
76
72
77
/* Logical positions */
73
78
0x0009 , /* first_non_ignorable p != ignore */
@@ -95,10 +100,12 @@ MY_UCA_INFO my_uca_v400 = {
95
100
/* *****************************************************/
96
101
97
102
MY_UCA_INFO my_uca_v520 = {
98
- UCA_V520,
103
+ UCA_V520, /* version */
104
+ nullptr , /* in case we clone an instance */
99
105
100
106
0x10FFFF , /* maxchar */
101
107
uca520_length,
108
+ nullptr , /* m_allocated_weights */
102
109
uca520_weight,
103
110
false ,
104
111
nullptr , /* contractions */
@@ -771,7 +778,7 @@ class my_uca_scanner {
771
778
unsigned wbeg_stride{0 }; /* Number of bytes between weights in string */
772
779
const uint8_t *sbeg; /* Beginning of the input string */
773
780
const uint8_t *send; /* End of the input string */
774
- const MY_UCA_INFO *uca;
781
+ const MY_UCA_INFO *uca{ nullptr } ;
775
782
uint16_t implicit[10 ]{};
776
783
my_wc_t prev_char{0 }; // Previous code point we scanned, if any.
777
784
const CHARSET_INFO *cs;
@@ -890,9 +897,9 @@ class uca_scanner_900 : public my_uca_scanner {
890
897
@param flag flag: "is contraction head", "is contraction tail"
891
898
*/
892
899
893
- static inline void my_uca_add_contraction_flag (char *flags, my_wc_t wc ,
894
- int flag) {
895
- flags[wc & MY_UCA_CNT_FLAG_MASK] |= flag;
900
+ static inline void my_uca_add_contraction_flag (MY_UCA_INFO::flags_type *flags,
901
+ my_wc_t wc, int flag) {
902
+ (* flags) [wc & MY_UCA_CNT_FLAG_MASK] |= flag;
896
903
}
897
904
898
905
/* *
@@ -969,9 +976,9 @@ const uint16_t *my_uca_contraction2_weight(
969
976
@retval true - can be previous context head
970
977
*/
971
978
972
- static inline bool my_uca_can_be_previous_context_head (const char *flags,
973
- my_wc_t wc) {
974
- return flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_HEAD;
979
+ static inline bool my_uca_can_be_previous_context_head (
980
+ const MY_UCA_INFO::flags_type *flags, my_wc_t wc) {
981
+ return (* flags) [wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_HEAD;
975
982
}
976
983
977
984
/* *
@@ -984,9 +991,9 @@ static inline bool my_uca_can_be_previous_context_head(const char *flags,
984
991
@retval true - can be contraction tail
985
992
*/
986
993
987
- static inline bool my_uca_can_be_previous_context_tail (const char *flags,
988
- my_wc_t wc) {
989
- return flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_TAIL;
994
+ static inline bool my_uca_can_be_previous_context_tail (
995
+ const MY_UCA_INFO::flags_type *flags, my_wc_t wc) {
996
+ return (* flags) [wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_TAIL;
990
997
}
991
998
992
999
/* *
@@ -2804,13 +2811,13 @@ typedef enum {
2804
2811
} my_coll_shift_method;
2805
2812
2806
2813
struct MY_COLL_RULES {
2807
- MY_UCA_INFO *uca; /* Unicode weight data */
2808
- size_t nrules; /* Number of rules in the rule array */
2809
- size_t mrules; /* Number of allocated rules */
2810
- MY_COLL_RULE *rule; /* Rule array */
2811
- MY_CHARSET_LOADER *loader;
2812
- MY_CHARSET_ERRMSG *errmsg;
2813
- my_coll_shift_method shift_after_method;
2814
+ MY_UCA_INFO *uca{ nullptr } ; /* Unicode weight data */
2815
+ size_t nrules{ 0 }; /* Number of rules in the rule array */
2816
+ size_t mrules{ 0 }; /* Number of allocated rules */
2817
+ MY_COLL_RULE *rule{ nullptr } ; /* Rule array */
2818
+ MY_CHARSET_LOADER *loader{ nullptr } ;
2819
+ MY_CHARSET_ERRMSG *errmsg{ nullptr } ;
2820
+ my_coll_shift_method shift_after_method{my_shift_method_simple} ;
2814
2821
};
2815
2822
2816
2823
/* *
@@ -3727,9 +3734,9 @@ static bool my_uca_copy_page(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
3727
3734
const MY_UCA_INFO *src, MY_UCA_INFO *dst,
3728
3735
size_t page) {
3729
3736
const unsigned dst_size = 256 * dst->lengths [page] * sizeof (uint16_t );
3730
- if (!( dst->weights [page] =
3731
- static_cast < uint16_t *>((loader-> once_alloc )(dst_size))))
3732
- return true ;
3737
+ dst->weights [page] = static_cast < uint16_t *>(loader-> mem_malloc (dst_size));
3738
+ if (dst-> weights [page] == nullptr ) return true ;
3739
+ dst-> m_allocated_weights -> at (page) = 1 ;
3733
3740
3734
3741
assert (src->lengths [page] <= dst->lengths [page]);
3735
3742
memset (dst->weights [page], 0 , dst_size);
@@ -4093,6 +4100,7 @@ static void copy_ja_han_pages(const CHARSET_INFO *cs, MY_UCA_INFO *dst) {
4093
4100
// may already be set.
4094
4101
assert (dst->weights [page] == nullptr ||
4095
4102
dst->weights [page] == ja_han_pages[page - MIN_JA_HAN_PAGE]);
4103
+ assert (dst->m_allocated_weights ->at (page) == 0 );
4096
4104
dst->weights [page] = ja_han_pages[page - MIN_JA_HAN_PAGE];
4097
4105
}
4098
4106
}
@@ -4102,9 +4110,13 @@ static void copy_ja_han_pages(const CHARSET_INFO *cs, MY_UCA_INFO *dst) {
4102
4110
characters with uca9dump (see dump_zh_pages() in uca9-dump.cc). Replace the
4103
4111
DUCET pages with these pages.
4104
4112
*/
4105
- static void copy_zh_han_pages (MY_UCA_INFO *dst) {
4113
+ static void copy_zh_han_pages (MY_UCA_INFO *dst, MY_CHARSET_LOADER *loader ) {
4106
4114
for (int page = MIN_ZH_HAN_PAGE; page <= MAX_ZH_HAN_PAGE; page++) {
4107
4115
if (zh_han_pages[page - MIN_ZH_HAN_PAGE]) {
4116
+ if (dst->m_allocated_weights ->at (page)) {
4117
+ loader->mem_free (dst->weights [page]);
4118
+ dst->m_allocated_weights ->at (page) = 0 ;
4119
+ }
4108
4120
dst->weights [page] = zh_han_pages[page - MIN_ZH_HAN_PAGE];
4109
4121
}
4110
4122
}
@@ -4208,8 +4220,7 @@ static void modify_all_zh_pages(Reorder_param *reorder_param, MY_UCA_INFO *dst,
4208
4220
}
4209
4221
4210
4222
static bool init_weight_level (CHARSET_INFO *cs, MY_COLL_RULES *rules, int level,
4211
- MY_UCA_INFO *dst, const MY_UCA_INFO *src,
4212
- bool lengths_are_temporary) {
4223
+ MY_UCA_INFO *dst, const MY_UCA_INFO *src) {
4213
4224
MY_COLL_RULE *r, *rlast;
4214
4225
size_t i, npages = (src->maxchar + 1 ) / 256 ;
4215
4226
bool has_contractions = false ;
@@ -4220,19 +4231,14 @@ static bool init_weight_level(CHARSET_INFO *cs, MY_COLL_RULES *rules, int level,
4220
4231
if (check_rules (rules, dst, src)) return true ;
4221
4232
4222
4233
/* Allocate memory for pages and their lengths */
4223
- if (lengths_are_temporary) {
4224
- if (!(dst->lengths = static_cast <uint8_t *>(malloc (npages)))) return true ;
4225
- if (!(dst->weights = static_cast <uint16_t **>(
4226
- (loader->once_alloc )(npages * sizeof (uint16_t *))))) {
4227
- free (dst->lengths );
4228
- return true ;
4229
- }
4230
- } else {
4231
- if (!(dst->lengths =
4232
- static_cast <uint8_t *>((loader->once_alloc )(npages))) ||
4233
- !(dst->weights = static_cast <uint16_t **>(
4234
- (loader->once_alloc )(npages * sizeof (uint16_t *)))))
4235
- return true ;
4234
+ dst->lengths = static_cast <uint8_t *>(loader->mem_malloc (npages));
4235
+ dst->weights =
4236
+ static_cast <uint16_t **>(loader->mem_malloc (npages * sizeof (uint16_t *)));
4237
+ if (dst->lengths == nullptr || dst->weights == nullptr ) return true ;
4238
+
4239
+ if (dst->m_allocated_weights == nullptr ) {
4240
+ dst->m_allocated_weights = new std::vector<uint8_t >();
4241
+ dst->m_allocated_weights ->resize (npages, 0 );
4236
4242
}
4237
4243
4238
4244
/*
@@ -4273,10 +4279,7 @@ static bool init_weight_level(CHARSET_INFO *cs, MY_COLL_RULES *rules, int level,
4273
4279
if (has_contractions) {
4274
4280
dst->have_contractions = true ;
4275
4281
dst->contraction_nodes = new std::vector<MY_CONTRACTION>(0 );
4276
- if (!(dst->contraction_flags =
4277
- (char *)loader->once_alloc (MY_UCA_CNT_FLAG_SIZE)))
4278
- return true ;
4279
- memset (dst->contraction_flags , 0 , MY_UCA_CNT_FLAG_SIZE);
4282
+ dst->contraction_flags = new std::array<char , MY_UCA_CNT_FLAG_SIZE>{};
4280
4283
}
4281
4284
if (cs->coll_param == &zh_coll_param) {
4282
4285
/*
@@ -4292,7 +4295,7 @@ static bool init_weight_level(CHARSET_INFO *cs, MY_COLL_RULES *rules, int level,
4292
4295
return rc;
4293
4296
}
4294
4297
modify_all_zh_pages (cs->coll_param ->reorder_param , dst, npages);
4295
- copy_zh_han_pages (dst);
4298
+ copy_zh_han_pages (dst, loader );
4296
4299
} else {
4297
4300
/* Allocate pages that we'll overwrite and copy default weights */
4298
4301
for (i = 0 ; i < npages; i++) {
@@ -4742,7 +4745,8 @@ static bool create_tailoring(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
4742
4745
return false ; /* Ok to add a collation without tailoring */
4743
4746
4744
4747
MY_COLL_RULES rules;
4745
- MY_UCA_INFO new_uca, *src_uca = nullptr ;
4748
+ MY_UCA_INFO new_uca;
4749
+ MY_UCA_INFO *src_uca = nullptr ;
4746
4750
int rc = 0 ;
4747
4751
MY_UCA_INFO *src, *dst;
4748
4752
size_t npages;
@@ -4751,11 +4755,9 @@ static bool create_tailoring(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
4751
4755
errmsg->errcode = 0 ;
4752
4756
*errmsg->errarg = ' \0 ' ;
4753
4757
4754
- memset (&rules, 0 , sizeof (rules));
4755
4758
rules.loader = loader;
4756
4759
rules.errmsg = errmsg;
4757
4760
rules.uca = cs->uca ? cs->uca : &my_uca_v400; /* For logical positions, etc */
4758
- memset (&new_uca, 0 , sizeof (new_uca));
4759
4761
4760
4762
/* Parse ICU Collation Customization expression */
4761
4763
if ((rc = my_coll_rule_parse (&rules, cs->tailoring ,
@@ -4801,45 +4803,64 @@ static bool create_tailoring(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
4801
4803
}
4802
4804
4803
4805
npages = (src->maxchar + 1 ) / 256 ;
4804
- if (rules.uca ->version == UCA_V900) {
4805
- if (!(src-> lengths = static_cast < uint8_t *>( malloc (npages))) ) {
4806
- goto ex ;
4807
- }
4806
+ lengths_are_temporary = (rules.uca ->version == UCA_V900);
4807
+ if (lengths_are_temporary ) {
4808
+ src-> lengths = static_cast < uint8_t *>(loader-> mem_malloc (npages)) ;
4809
+ if (src-> lengths == nullptr ) goto ex;
4808
4810
synthesize_lengths_900 (src->lengths , src->weights , npages);
4809
4811
}
4810
4812
4811
- lengths_are_temporary = (rules.uca ->version == UCA_V900);
4812
- if ((rc = init_weight_level (cs, &rules, 0 , dst, src, lengths_are_temporary)))
4813
- goto ex;
4813
+ if ((rc = init_weight_level (cs, &rules, 0 , dst, src))) goto ex;
4814
4814
4815
4815
if (lengths_are_temporary) {
4816
- free (src->lengths );
4817
- free (dst->lengths );
4816
+ loader-> mem_free (src->lengths );
4817
+ loader-> mem_free (dst->lengths );
4818
4818
src->lengths = nullptr ;
4819
4819
dst->lengths = nullptr ;
4820
4820
}
4821
4821
4822
4822
new_uca.version = src_uca->version ;
4823
- if (!(cs->uca = (MY_UCA_INFO *)loader->once_alloc (sizeof (MY_UCA_INFO)))) {
4824
- rc = 1 ;
4825
- goto ex;
4826
- }
4827
- memset (cs->uca , 0 , sizeof (MY_UCA_INFO));
4828
- cs->uca [0 ] = new_uca;
4823
+ new_uca.m_based_on = src_uca;
4824
+ cs->uca = new MY_UCA_INFO (new_uca);
4829
4825
4830
4826
ex:
4831
4827
free (rules.rule );
4832
4828
if (rc != 0 && errmsg->errcode ) {
4833
4829
delete new_uca.contraction_nodes ;
4830
+ delete new_uca.contraction_flags ;
4834
4831
loader->reporter (ERROR_LEVEL, errmsg->errcode , errmsg->errarg );
4835
4832
}
4836
4833
return rc;
4837
4834
}
4838
4835
4839
- static void my_coll_uninit_uca (CHARSET_INFO *cs) {
4836
+ static void my_coll_uninit_uca (CHARSET_INFO *cs, MY_CHARSET_LOADER *loader ) {
4840
4837
if (cs->uca && cs->uca ->contraction_nodes ) {
4841
4838
delete cs->uca ->contraction_nodes ;
4839
+ delete cs->uca ->contraction_flags ;
4842
4840
cs->uca ->contraction_nodes = nullptr ;
4841
+ cs->uca ->contraction_flags = nullptr ;
4842
+ }
4843
+ if (cs->uca != nullptr && cs->uca != &my_uca_v400 &&
4844
+ cs->uca != &my_uca_v520 && cs->uca != &my_uca_v900) {
4845
+ if (cs->uca ->m_allocated_weights != nullptr ) {
4846
+ for (size_t i = 0 ; i < cs->uca ->m_allocated_weights ->size (); ++i) {
4847
+ if (cs->uca ->m_allocated_weights ->at (i) != 0 ) {
4848
+ loader->mem_free (cs->uca ->weights [i]);
4849
+ cs->uca ->weights [i] = nullptr ;
4850
+ }
4851
+ }
4852
+ }
4853
+ loader->mem_free (cs->uca ->lengths );
4854
+ cs->uca ->lengths = nullptr ;
4855
+ loader->mem_free (cs->uca ->weights );
4856
+ cs->uca ->weights = nullptr ;
4857
+
4858
+ delete cs->uca ->m_allocated_weights ;
4859
+ cs->uca ->m_allocated_weights = nullptr ;
4860
+
4861
+ MY_UCA_INFO *to_be_deleted = cs->uca ;
4862
+ cs->uca = cs->uca ->m_based_on ;
4863
+ delete to_be_deleted;
4843
4864
cs->state &= ~MY_CS_READY;
4844
4865
}
4845
4866
}
0 commit comments