Skip to content

Commit 6a3b1d4

Browse files
author
bar@mysql.com
committed
Many files:
Allow mixing of different character sets for more SQL functions. item_func.h: Allow mixing of different character sets for more SQL functions..
1 parent 1e2b370 commit 6a3b1d4

File tree

8 files changed

+274
-127
lines changed

8 files changed

+274
-127
lines changed

mysql-test/r/ctype_recoding.result

+54
Original file line numberDiff line numberDiff line change
@@ -186,3 +186,57 @@ select * from t1 where a=_latin1'
186186
ERROR HY000: Illegal mix of collations (cp1251_general_ci,IMPLICIT) and (latin1_swedish_ci,COERCIBLE) for operation '='
187187
drop table t1;
188188
set names latin1;
189+
set names koi8r;
190+
create table t1 (c1 char(10) character set cp1251);
191+
insert into t1 values ('�');
192+
select c1 from t1 where c1 between '�' and '�';
193+
c1
194+
195+
select ifnull(c1,'�'), ifnull(null,c1) from t1;
196+
ifnull(c1,'�') ifnull(null,c1)
197+
� �
198+
select if(1,c1,'�'), if(0,c1,'�') from t1;
199+
if(1,c1,'�') if(0,c1,'�')
200+
� �
201+
select coalesce('�',c1), coalesce(null,c1) from t1;
202+
coalesce('�',c1) coalesce(null,c1)
203+
� �
204+
select least(c1,'�'), greatest(c1,'�') from t1;
205+
least(c1,'�') greatest(c1,'�')
206+
� �
207+
select locate(c1,'�'), locate('�',c1) from t1;
208+
locate(c1,'�') locate('�',c1)
209+
1 1
210+
select field(c1,'�'),field('�',c1) from t1;
211+
field(c1,'�') field('�',c1)
212+
1 1
213+
select concat(c1,'�'), concat('�',c1) from t1;
214+
concat(c1,'�') concat('�',c1)
215+
�� ��
216+
select concat_ws(c1,'�','�'), concat_ws('�',c1,'�') from t1;
217+
concat_ws(c1,'�','�') concat_ws('�',c1,'�')
218+
��� ���
219+
select replace(c1,'�','�'), replace('�',c1,'�') from t1;
220+
replace(c1,'�','�') replace('�',c1,'�')
221+
� �
222+
select substring_index(c1,'����',2) from t1;
223+
substring_index(c1,'����',2)
224+
225+
select elt(1,c1,'�'),elt(1,'�',c1) from t1;
226+
elt(1,c1,'�') elt(1,'�',c1)
227+
� �
228+
select make_set(3,c1,'�'), make_set(3,'�',c1) from t1;
229+
make_set(3,c1,'�') make_set(3,'�',c1)
230+
�,� �,�
231+
select insert(c1,1,2,'�'),insert('�',1,2,c1) from t1;
232+
insert(c1,1,2,'�') insert('�',1,2,c1)
233+
� �
234+
select trim(c1 from '�'),trim('�' from c1) from t1;
235+
trim(c1 from '�') trim('�' from c1)
236+
237+
select lpad(c1,3,'�'), lpad('�',3,c1) from t1;
238+
lpad(c1,3,'�') lpad('�',3,c1)
239+
��� ���
240+
select rpad(c1,3,'�'), rpad('�',3,c1) from t1;
241+
rpad(c1,3,'�') rpad('�',3,c1)
242+
��� ���

mysql-test/t/ctype_recoding.test

+26
Original file line numberDiff line numberDiff line change
@@ -153,3 +153,29 @@ select * from t1 where a=_latin1'
153153
drop table t1;
154154
set names latin1;
155155

156+
#
157+
# Check more automatic conversion
158+
#
159+
set names koi8r;
160+
create table t1 (c1 char(10) character set cp1251);
161+
insert into t1 values ('�');
162+
select c1 from t1 where c1 between '�' and '�';
163+
select ifnull(c1,'�'), ifnull(null,c1) from t1;
164+
select if(1,c1,'�'), if(0,c1,'�') from t1;
165+
select coalesce('�',c1), coalesce(null,c1) from t1;
166+
select least(c1,'�'), greatest(c1,'�') from t1;
167+
select locate(c1,'�'), locate('�',c1) from t1;
168+
select field(c1,'�'),field('�',c1) from t1;
169+
select concat(c1,'�'), concat('�',c1) from t1;
170+
select concat_ws(c1,'�','�'), concat_ws('�',c1,'�') from t1;
171+
select replace(c1,'�','�'), replace('�',c1,'�') from t1;
172+
select substring_index(c1,'����',2) from t1;
173+
select elt(1,c1,'�'),elt(1,'�',c1) from t1;
174+
select make_set(3,c1,'�'), make_set(3,'�',c1) from t1;
175+
select insert(c1,1,2,'�'),insert('�',1,2,c1) from t1;
176+
select trim(c1 from '�'),trim('�' from c1) from t1;
177+
select lpad(c1,3,'�'), lpad('�',3,c1) from t1;
178+
select rpad(c1,3,'�'), rpad('�',3,c1) from t1;
179+
# TODO
180+
#select case c1 when '�' then '�' when '�' then '�' else 'c' end from t1;
181+
#select export_set(5,c1,'�'), export_set(5,'�',c1) from t1;

sql/item.cc

+41
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,41 @@ bool Item::eq(const Item *item, bool binary_cmp) const
205205
}
206206

207207

208+
Item *Item::safe_charset_converter(CHARSET_INFO *tocs)
209+
{
210+
/*
211+
Don't allow automatic conversion to non-Unicode charsets,
212+
as it potentially loses data.
213+
*/
214+
if (!(tocs->state & MY_CS_UNICODE))
215+
return NULL; // safe conversion is not possible
216+
return new Item_func_conv_charset(this, tocs);
217+
}
218+
219+
220+
Item *Item_string::safe_charset_converter(CHARSET_INFO *tocs)
221+
{
222+
Item_string *conv;
223+
uint conv_errors;
224+
String tmp, cstr, *ostr= val_str(&tmp);
225+
cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), tocs, &conv_errors);
226+
if (conv_errors || !(conv= new Item_string(cstr.ptr(), cstr.length(),
227+
cstr.charset(),
228+
collation.derivation)))
229+
{
230+
/*
231+
Safe conversion is not possible (or EOM).
232+
We could not convert a string into the requested character set
233+
without data loss. The target charset does not cover all the
234+
characters from the string. Operation cannot be done correctly.
235+
*/
236+
return NULL;
237+
}
238+
conv->str_value.copy();
239+
return conv;
240+
}
241+
242+
208243
bool Item_string::eq(const Item *item, bool binary_cmp) const
209244
{
210245
if (type() == item->type())
@@ -723,6 +758,12 @@ String *Item_null::val_str(String *str)
723758
}
724759

725760

761+
Item *Item_null::safe_charset_converter(CHARSET_INFO *tocs)
762+
{
763+
collation.set(tocs);
764+
return this;
765+
}
766+
726767
/*********************** Item_param related ******************************/
727768

728769
/*

sql/item.h

+15-3
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,22 @@ enum Derivation
3939

4040
/*
4141
Flags for collation aggregation modes:
42-
allow conversion to a superset
43-
allow conversion of a coercible value (i.e. constant).
42+
MY_COLL_ALLOW_SUPERSET_CONV - allow conversion to a superset
43+
MY_COLL_ALLOW_COERCIBLE_CONV - allow conversion of a coercible value
44+
(i.e. constant).
45+
MY_COLL_ALLOW_CONV - allow any kind of conversion
46+
(combintion of the above two)
47+
MY_COLL_DISALLOW_NONE - don't allow return DERIVATION_NONE
48+
(e.g. when aggregating for comparison)
49+
MY_COLL_CMP_CONV - combination of MY_COLL_ALLOW_CONV
50+
and MY_COLL_DISALLOW_NONE
4451
*/
4552

4653
#define MY_COLL_ALLOW_SUPERSET_CONV 1
4754
#define MY_COLL_ALLOW_COERCIBLE_CONV 2
48-
55+
#define MY_COLL_ALLOW_CONV 3
56+
#define MY_COLL_DISALLOW_NONE 4
57+
#define MY_COLL_CMP_CONV 7
4958

5059
class DTCollation {
5160
public:
@@ -302,6 +311,7 @@ class Item {
302311
Field *tmp_table_field_from_field_type(TABLE *table);
303312

304313
virtual Item *neg_transformer(THD *thd) { return NULL; }
314+
virtual Item *safe_charset_converter(CHARSET_INFO *tocs);
305315
void delete_self()
306316
{
307317
cleanup();
@@ -447,6 +457,7 @@ class Item_null :public Item
447457
Item *new_item() { return new Item_null(name); }
448458
bool is_null() { return 1; }
449459
void print(String *str) { str->append("NULL", 4); }
460+
Item *safe_charset_converter(CHARSET_INFO *tocs);
450461
};
451462

452463

@@ -717,6 +728,7 @@ class Item_string :public Item
717728
return new Item_string(name, str_value.ptr(),
718729
str_value.length(), &my_charset_bin);
719730
}
731+
Item *safe_charset_converter(CHARSET_INFO *tocs);
720732
String *const_string() { return &str_value; }
721733
inline void append(char *str, uint length) { str_value.append(str, length); }
722734
void print(String *str);

sql/item_cmpfunc.cc

+9-87
Original file line numberDiff line numberDiff line change
@@ -173,89 +173,11 @@ void Item_bool_func2::fix_length_and_dec()
173173
if (!args[0] || !args[1])
174174
return;
175175

176-
/*
177-
We allow to apply automatic character set conversion in some cases.
178-
The conditions when conversion is possible are:
179-
- arguments A and B have different charsets
180-
- A wins according to coercibility rules
181-
(i.e. a column is stronger than a string constant,
182-
an explicit COLLATE clause is stronger than a column)
183-
- character set of A is either superset for character set of B,
184-
or B is a string constant which can be converted into the
185-
character set of A without data loss.
186-
187-
If all of the above is true, then it's possible to convert
188-
B into the character set of A, and then compare according
189-
to the collation of A.
190-
*/
191-
192-
uint32 dummy_offset;
193176
DTCollation coll;
194-
195177
if (args[0]->result_type() == STRING_RESULT &&
196178
args[1]->result_type() == STRING_RESULT &&
197-
String::needs_conversion(0, args[0]->collation.collation,
198-
args[1]->collation.collation,
199-
&dummy_offset) &&
200-
!coll.set(args[0]->collation, args[1]->collation,
201-
MY_COLL_ALLOW_SUPERSET_CONV |
202-
MY_COLL_ALLOW_COERCIBLE_CONV))
203-
{
204-
Item* conv= 0;
205-
Item_arena *arena= thd->current_arena, backup;
206-
uint strong= coll.strong;
207-
uint weak= strong ? 0 : 1;
208-
/*
209-
In case we're in statement prepare, create conversion item
210-
in its memory: it will be reused on each execute.
211-
*/
212-
if (arena->is_stmt_prepare())
213-
thd->set_n_backup_item_arena(arena, &backup);
214-
if (args[weak]->type() == STRING_ITEM)
215-
{
216-
uint conv_errors;
217-
String tmp, cstr, *ostr= args[weak]->val_str(&tmp);
218-
cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(),
219-
args[strong]->collation.collation, &conv_errors);
220-
if (conv_errors)
221-
{
222-
/*
223-
We could not convert a string into the character set
224-
of the stronger side of the operation without data loss.
225-
It can happen if we tried to combine a column with a string
226-
constant, and the column charset does not cover all the
227-
characters from the string. Operation cannot be done
228-
correctly. Return an error.
229-
*/
230-
my_coll_agg_error(args[0]->collation, args[1]->collation,
231-
func_name());
232-
return;
233-
}
234-
conv= new Item_string(cstr.ptr(),cstr.length(),cstr.charset(),
235-
args[weak]->collation.derivation);
236-
((Item_string*)conv)->str_value.copy();
237-
}
238-
else
239-
{
240-
if (!(coll.collation->state & MY_CS_UNICODE))
241-
{
242-
/*
243-
Don't allow automatic conversion to non-Unicode charsets,
244-
as it potentially loses data.
245-
*/
246-
my_coll_agg_error(args[0]->collation, args[1]->collation,
247-
func_name());
248-
return;
249-
}
250-
conv= new Item_func_conv_charset(args[weak],
251-
args[strong]->collation.collation);
252-
conv->collation.set(args[weak]->collation.derivation);
253-
conv->fix_fields(thd, 0, &conv);
254-
}
255-
if (arena->is_stmt_prepare())
256-
thd->restore_backup_item_arena(arena, &backup);
257-
args[weak]= conv ? conv : args[weak];
258-
}
179+
agg_arg_charsets(coll, args, 2, MY_COLL_CMP_CONV))
180+
return;
259181

260182
// Make a special case of compare with fields to get nicer DATE comparisons
261183

@@ -871,7 +793,7 @@ void Item_func_between::fix_length_and_dec()
871793
return;
872794
agg_cmp_type(&cmp_type, args, 3);
873795
if (cmp_type == STRING_RESULT &&
874-
agg_arg_collations_for_comparison(cmp_collation, args, 3))
796+
agg_arg_charsets(cmp_collation, args, 3, MY_COLL_CMP_CONV))
875797
return;
876798

877799
/*
@@ -987,7 +909,7 @@ Item_func_ifnull::fix_length_and_dec()
987909
decimals=max(args[0]->decimals,args[1]->decimals);
988910
agg_result_type(&cached_result_type, args, 2);
989911
if (cached_result_type == STRING_RESULT)
990-
agg_arg_collations(collation, args, arg_count);
912+
agg_arg_charsets(collation, args, arg_count, MY_COLL_CMP_CONV);
991913
else if (cached_result_type != REAL_RESULT)
992914
decimals= 0;
993915

@@ -1083,7 +1005,7 @@ Item_func_if::fix_length_and_dec()
10831005
agg_result_type(&cached_result_type, args+1, 2);
10841006
if (cached_result_type == STRING_RESULT)
10851007
{
1086-
if (agg_arg_collations(collation, args+1, 2))
1008+
if (agg_arg_charsets(collation, args+1, 2, MY_COLL_ALLOW_CONV))
10871009
return;
10881010
}
10891011
else
@@ -1354,7 +1276,7 @@ void Item_func_case::fix_length_and_dec()
13541276

13551277
agg_result_type(&cached_result_type, agg, nagg);
13561278
if ((cached_result_type == STRING_RESULT) &&
1357-
agg_arg_collations(collation, agg, nagg))
1279+
agg_arg_charsets(collation, agg, nagg, MY_COLL_ALLOW_CONV))
13581280
return;
13591281

13601282

@@ -1370,7 +1292,7 @@ void Item_func_case::fix_length_and_dec()
13701292
nagg++;
13711293
agg_cmp_type(&cmp_type, agg, nagg);
13721294
if ((cmp_type == STRING_RESULT) &&
1373-
agg_arg_collations_for_comparison(cmp_collation, agg, nagg))
1295+
agg_arg_charsets(cmp_collation, agg, nagg, MY_COLL_CMP_CONV))
13741296
return;
13751297
}
13761298

@@ -1477,7 +1399,7 @@ void Item_func_coalesce::fix_length_and_dec()
14771399
set_if_bigger(decimals,args[i]->decimals);
14781400
}
14791401
if (cached_result_type == STRING_RESULT)
1480-
agg_arg_collations(collation, args, arg_count);
1402+
agg_arg_charsets(collation, args, arg_count, MY_COLL_ALLOW_CONV);
14811403
else if (cached_result_type != REAL_RESULT)
14821404
decimals= 0;
14831405
}
@@ -2423,7 +2345,7 @@ Item_func_regex::fix_fields(THD *thd, TABLE_LIST *tables, Item **ref)
24232345
max_length= 1;
24242346
decimals= 0;
24252347

2426-
if (agg_arg_collations(cmp_collation, args, 2))
2348+
if (agg_arg_charsets(cmp_collation, args, 2, MY_COLL_CMP_CONV))
24272349
return 1;
24282350

24292351
used_tables_cache=args[0]->used_tables() | args[1]->used_tables();

0 commit comments

Comments
 (0)