Skip to content

Commit 0208744

Browse files
author
Xing Zhang
committed
WL#9108: Add language specific case insensitive collations of utf8mb4
Add case insensitive collation for following languages: icelandic, latvian, romanian, slovenian, polish, estonian, spanish, traditional spanish, swedish, turkish, czech, danish, lithuanian, slovak, roman, esperanto, hungarian, german phonebook, croatian, vietnamese Add test cases for added collations Add utf8mb4_da_800_ci_ai's synonym utf8mb4_no_800_ci_ai Implement one of the parametric tailoring: reorder Move UCA data tables into seperate files.
1 parent 9ac7450 commit 0208744

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+219831
-67544
lines changed

include/m_ctype.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#define _m_ctype_h
2323

2424
#include "my_global.h" /* uint16, uchar */
25+
#include "str_uca_type.h"
2526

2627
#ifdef __cplusplus
2728
extern "C" {
@@ -78,7 +79,7 @@ extern MY_UNICASE_INFO my_unicase_unicode520;
7879
extern MY_UNICASE_INFO my_unicase_unicode800;
7980

8081
#define MY_UCA_MAX_CONTRACTION 6
81-
#define MY_UCA_MAX_WEIGHT_SIZE 24
82+
#define MY_UCA_MAX_WEIGHT_SIZE 25
8283
#define MY_UCA_WEIGHT_LEVELS 1
8384

8485
typedef struct my_contraction_t
@@ -377,7 +378,6 @@ extern MY_CHARSET_HANDLER my_charset_8bit_handler;
377378
extern MY_CHARSET_HANDLER my_charset_ascii_handler;
378379
extern MY_CHARSET_HANDLER my_charset_ucs2_handler;
379380

380-
381381
/* See strings/CHARSET_INFO.txt about information on this structure */
382382
typedef struct charset_info_st
383383
{
@@ -389,6 +389,7 @@ typedef struct charset_info_st
389389
const char *name;
390390
const char *comment;
391391
const char *tailoring;
392+
struct Coll_param *coll_param;
392393
const uchar *ctype;
393394
const uchar *to_lower;
394395
const uchar *to_upper;

include/str_uca_type.h

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
2+
3+
This program is free software; you can redistribute it and/or modify
4+
it under the terms of the GNU General Public License as published by
5+
the Free Software Foundation; version 2 of the License.
6+
7+
This program is distributed in the hope that it will be useful,
8+
but WITHOUT ANY WARRANTY; without even the implied warranty of
9+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10+
GNU General Public License for more details.
11+
12+
You should have received a copy of the GNU General Public License
13+
along with this program; if not, write to the Free Software
14+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15+
16+
/* This header file contains type declarations used by UCA code. */
17+
18+
#ifndef STR_UCA_TYPE_H
19+
#define STR_UCA_TYPE_H
20+
/*
21+
So far we have only Croatian collation needs to reorder Latin and
22+
Cyrillic group of characters. May add more in future.
23+
*/
24+
#define UCA_MAX_CHAR_GRP 4
25+
enum enum_char_grp
26+
{
27+
CHARGRP_NONE,
28+
CHARGRP_CORE,
29+
CHARGRP_LATIN,
30+
CHARGRP_CYRILLIC,
31+
CHARGRP_ARAB,
32+
CHARGRP_OTHERS
33+
};
34+
35+
struct Weight_boundary
36+
{
37+
uint16 begin;
38+
uint16 end;
39+
};
40+
41+
struct Reorder_wt_rec
42+
{
43+
struct Weight_boundary old_wt_bdy;
44+
struct Weight_boundary new_wt_bdy;
45+
};
46+
47+
struct Reorder_param
48+
{
49+
enum enum_char_grp reorder_grp[UCA_MAX_CHAR_GRP];
50+
struct Reorder_wt_rec wt_rec[2 * UCA_MAX_CHAR_GRP];
51+
uint16 max_weight;
52+
};
53+
54+
struct Coll_param
55+
{
56+
struct Reorder_param *reorder_param;
57+
my_bool norm_enabled; // false = normalization off, default;
58+
// true = on
59+
};
60+
61+
#endif

mysql-test/collections/default.daily

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,5 @@ perl mysql-test-run.pl --force --timer --testcase-timeout=60 --parallel=auto --
5858
# Run some tests that use no_parallel.inc
5959
perl mysql-test-run.pl --force --timer --testcase-timeout=60 --comment=serial_tests --vardir=var-serial-tests main.blackhole_plugin main.archive_plugin
6060

61+
# Run collation regression tests
62+
perl mysql-test-run.pl --force --timer --comment=collations --vardir=var-collations --suite=collations
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
insert into t1 values (_ucs2 0x0218), (_ucs2 0x0219), (_ucs2 0x021a), (_ucs2 0x021b);
2+
3+
insert into t1 values (_ucs2 0x0d96), (_ucs2 0x0da4), (_ucs2 0x0da5);
4+
5+
insert into t1 values (_ucs2 0x0064017e), (_ucs2 0x0044017e), (_ucs2 0x0044017d);
6+
insert into t1 values ('CS'),('Cs'),('cs'),('cS');
7+
insert into t1 values ('DZS'),('DZs'),('Dzs'),('DzS');
8+
insert into t1 values ('dZS'),('dZs'),('dzs'),('dzS');
9+
insert into t1 values ('GY'),('Gy'),('gy'),('gY');
10+
insert into t1 values ('LY'),('Ly'),('ly'),('lY');
11+
insert into t1 values ('NY'),('Ny'),('ny'),('nY');
12+
insert into t1 values ('SZ'),('Sz'),('sz'),('sZ');
13+
insert into t1 values ('TY'),('Ty'),('ty'),('tY');
14+
insert into t1 values ('ZS'),('Zs'),('zs'),('zS');
15+
insert into t1 values ('RR'),('Rr'),('rr'),('rR');
16+
insert into t1 values ('ccs'),('Ccs'),('CCS'),('cCS');
17+
insert into t1 values ('ddz'),('Ddz'),('DDZ'),('dDZ');
18+
insert into t1 values ('ddzs'),('Ddzs'),('DDZS'),('dDZS');
19+
insert into t1 values ('ggy'),('Ggy'),('GGY'),('gGY');
20+
insert into t1 values ('lly'),('Lly'),('LLY'),('lLY');
21+
insert into t1 values ('nny'),('Nny'),('NNY'),('nNY');
22+
insert into t1 values ('ssz'),('Ssz'),('SSZ'),('sSZ');
23+
insert into t1 values ('tty'),('Tty'),('TTY'),('tTY');
24+
insert into t1 values ('zzs'),('Zzs'),('ZZS'),('zZS');
25+
insert into t1 values ('UE'),('Ue'),('ue'),('uE');

mysql-test/r/ctype_ldml.result

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,25 @@ select "foo" = "foo " collate latin1_test;
456456
The following tests check that two-byte collation IDs work
457457
select * from information_schema.collations where id>256 order by id;
458458
COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN
459+
utf8mb4_is_800_ci_ai utf8mb4 257 Yes 8
460+
utf8mb4_lv_800_ci_ai utf8mb4 258 Yes 8
461+
utf8mb4_ro_800_ci_ai utf8mb4 259 Yes 8
462+
utf8mb4_sl_800_ci_ai utf8mb4 260 Yes 8
463+
utf8mb4_pl_800_ci_ai utf8mb4 261 Yes 8
464+
utf8mb4_et_800_ci_ai utf8mb4 262 Yes 8
465+
utf8mb4_es_800_ci_ai utf8mb4 263 Yes 8
466+
utf8mb4_sv_800_ci_ai utf8mb4 264 Yes 8
467+
utf8mb4_tr_800_ci_ai utf8mb4 265 Yes 8
468+
utf8mb4_cs_800_ci_ai utf8mb4 266 Yes 8
469+
utf8mb4_da_800_ci_ai utf8mb4 267 Yes 8
470+
utf8mb4_lt_800_ci_ai utf8mb4 268 Yes 8
471+
utf8mb4_sk_800_ci_ai utf8mb4 269 Yes 8
472+
utf8mb4_es_traditional_800_ci_ai utf8mb4 270 Yes 8
473+
utf8mb4_la_800_ci_ai utf8mb4 271 Yes 8
474+
utf8mb4_eo_800_ci_ai utf8mb4 273 Yes 8
475+
utf8mb4_hu_800_ci_ai utf8mb4 274 Yes 8
476+
utf8mb4_hr_800_ci_ai utf8mb4 275 Yes 8
477+
utf8mb4_vi_800_ci_ai utf8mb4 277 Yes 8
459478
utf8mb4_test_ci utf8mb4 326 8
460479
utf16_test_ci utf16 327 8
461480
utf8mb4_test_400_ci utf8mb4 328 8

0 commit comments

Comments
 (0)