Skip to content

Commit ac64a64

Browse files
author
Kailasnath Nagarkar
committed
Bug #25865525: BACKPORT 25147988 FIX TO 5.7
Description: LOAD DATA fails to accept the multibyte character which follows escape mark, but INSERT can. Fix: Change READ_INFO::read_field to ignore the escape mark if it is followed by mulitibyte character.
1 parent 9487f8e commit ac64a64

File tree

6 files changed

+78
-1
lines changed

6 files changed

+78
-1
lines changed

mysql-test/r/loaddata.result

+21
Original file line numberDiff line numberDiff line change
@@ -597,3 +597,24 @@ LOAD DATA INFILE 'test.dat' INTO TABLE v(@a, @d) SET a= @a, d= @d;
597597
ERROR HY000: Column 'd' is not updatable
598598
DROP VIEW v;
599599
DROP TABLE t;
600+
#
601+
# Bug #25147988: LOAD DATA INFILE FAILS WITH AN ESCAPE CHARACTER
602+
# FOLLOWED BY A MULTI-BYTE ONE
603+
#
604+
CREATE TABLE t1(a VARCHAR(20)) CHARSET utf8mb4;
605+
LOAD DATA INFILE '../../std_data/loaddata_utf8.dat' INTO TABLE t1 CHARACTER SET utf8mb4;
606+
SELECT HEX(a) FROM t1;
607+
HEX(a)
608+
E4B880E4BA8CE4B889
609+
E59B9BE4BA94E585AD
610+
E4B883E585ABE4B99D
611+
E4B880E4BA8CE4B889
612+
E59B9BE4BA94E585AD
613+
E4B883E585ABE4B99D0A
614+
DROP TABLE t1;
615+
CREATE TABLE t1(a VARCHAR(20)) CHARSET gb18030;
616+
LOAD DATA INFILE '../../std_data/loaddata7.dat' INTO TABLE t1 CHARACTER SET gb18030;
617+
SELECT HEX(a) FROM t1;
618+
HEX(a)
619+
815C825C
620+
DROP TABLE t1;

mysql-test/std_data/loaddata7.dat

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
\�\\�\

mysql-test/std_data/loaddata_utf8.dat

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
一二三
22
四五六
33
七八九
4+
\一二三
5+
四\五六
6+
七八九\

mysql-test/suite/rpl/r/rpl_loaddata_charset.result

+12
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,19 @@ hex(cl)
5757
E4B880E4BA8CE4B889
5858
E59B9BE4BA94E585AD
5959
E4B883E585ABE4B99D
60+
E4B880E4BA8CE4B889
61+
E59B9BE4BA94E585AD
62+
E4B883E585ABE4B99D0A
6063
----------content on slave----------
6164
USE mysqltest;
6265
SELECT hex(cl) FROM t;
6366
hex(cl)
6467
E4B880E4BA8CE4B889
6568
E59B9BE4BA94E585AD
6669
E4B883E585ABE4B99D
70+
E4B880E4BA8CE4B889
71+
E59B9BE4BA94E585AD
72+
E4B883E585ABE4B99D0A
6773
DROP DATABASE mysqltest;
6874
DROP DATABASE IF EXISTS mysqltest;
6975
CREATE DATABASE mysqltest CHARSET UTF8;
@@ -77,12 +83,18 @@ hex(cl)
7783
E4B880E4BA8CE4B889
7884
E59B9BE4BA94E585AD
7985
E4B883E585ABE4B99D
86+
E4B880E4BA8CE4B889
87+
E59B9BE4BA94E585AD
88+
E4B883E585ABE4B99D0A
8089
----------content on slave----------
8190
USE mysqltest;
8291
SELECT hex(cl) FROM t;
8392
hex(cl)
8493
E4B880E4BA8CE4B889
8594
E59B9BE4BA94E585AD
8695
E4B883E585ABE4B99D
96+
E4B880E4BA8CE4B889
97+
E59B9BE4BA94E585AD
98+
E4B883E585ABE4B99D0A
8799
DROP DATABASE mysqltest;
88100
include/rpl_end.inc

mysql-test/t/loaddata.test

+15
Original file line numberDiff line numberDiff line change
@@ -709,3 +709,18 @@ LOAD DATA INFILE 'test.dat' INTO TABLE v(@a, @d) SET a= @a, d= @d;
709709

710710
DROP VIEW v;
711711
DROP TABLE t;
712+
713+
--echo #
714+
--echo # Bug #25147988: LOAD DATA INFILE FAILS WITH AN ESCAPE CHARACTER
715+
--echo # FOLLOWED BY A MULTI-BYTE ONE
716+
--echo #
717+
# Test escape mark followed by multibyte character
718+
CREATE TABLE t1(a VARCHAR(20)) CHARSET utf8mb4;
719+
LOAD DATA INFILE '../../std_data/loaddata_utf8.dat' INTO TABLE t1 CHARACTER SET utf8mb4;
720+
SELECT HEX(a) FROM t1;
721+
DROP TABLE t1;
722+
# Test multibyte character whose second byte is 0x5C
723+
CREATE TABLE t1(a VARCHAR(20)) CHARSET gb18030;
724+
LOAD DATA INFILE '../../std_data/loaddata7.dat' INTO TABLE t1 CHARACTER SET gb18030;
725+
SELECT HEX(a) FROM t1;
726+
DROP TABLE t1;

sql/sql_load.cc

+26-1
Original file line numberDiff line numberDiff line change
@@ -1644,6 +1644,7 @@ int READ_INFO::read_field()
16441644

16451645
for (;;)
16461646
{
1647+
bool escaped_mb= false;
16471648
while ( to < end_of_buff)
16481649
{
16491650
chr = GET;
@@ -1665,7 +1666,23 @@ int READ_INFO::read_field()
16651666
*/
16661667
if (escape_char != enclosed_char || chr == escape_char)
16671668
{
1668-
*to++ = (uchar) unescape((char) chr);
1669+
uint ml;
1670+
GET_MBCHARLEN(read_charset, chr, ml);
1671+
/*
1672+
For escaped multibyte character, push back the first byte,
1673+
and will handle it below.
1674+
Because multibyte character's second byte is possible to be
1675+
0x5C, per Query_result_export::send_data, both head byte and
1676+
tail byte are escaped for such characters. So mark it if the
1677+
head byte is escaped and will handle it below.
1678+
*/
1679+
if (ml == 1)
1680+
*to++= (uchar) unescape((char) chr);
1681+
else
1682+
{
1683+
escaped_mb= true;
1684+
PUSH(chr);
1685+
}
16691686
continue;
16701687
}
16711688
PUSH(chr);
@@ -1757,8 +1774,16 @@ int READ_INFO::read_field()
17571774
to-= i;
17581775
goto found_eof;
17591776
}
1777+
else if (chr == escape_char && escaped_mb)
1778+
{
1779+
// Unescape the second byte if it is escaped.
1780+
chr= GET;
1781+
chr= (uchar) unescape((char) chr);
1782+
}
17601783
*to++ = chr;
17611784
}
1785+
if (escaped_mb)
1786+
escaped_mb= false;
17621787
if (my_ismbchar(read_charset,
17631788
(const char *)p,
17641789
(const char *)to))

0 commit comments

Comments
 (0)