forked from rust-lang/rust
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathunicode_chars.rs
251 lines (246 loc) · 10.5 KB
/
unicode_chars.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// Characters and their corresponding confusables were collected from
// http://www.unicode.org/Public/security/revision-06/confusables.txt
use syntax_pos::mk_sp as make_span;
use errors::DiagnosticBuilder;
use super::StringReader;
const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
(' ', "No-Break Space", ' '),
(' ', "Ogham Space Mark", ' '),
(' ', "En Quad", ' '),
(' ', "Em Quad", ' '),
(' ', "En Space", ' '),
(' ', "Em Space", ' '),
(' ', "Three-Per-Em Space", ' '),
(' ', "Four-Per-Em Space", ' '),
(' ', "Six-Per-Em Space", ' '),
(' ', "Figure Space", ' '),
(' ', "Punctuation Space", ' '),
(' ', "Thin Space", ' '),
(' ', "Hair Space", ' '),
(' ', "Narrow No-Break Space", ' '),
(' ', "Medium Mathematical Space", ' '),
(' ', "Ideographic Space", ' '),
('ߺ', "Nko Lajanyalan", '_'),
('﹍', "Dashed Low Line", '_'),
('﹎', "Centreline Low Line", '_'),
('﹏', "Wavy Low Line", '_'),
('‐', "Hyphen", '-'),
('‑', "Non-Breaking Hyphen", '-'),
('‒', "Figure Dash", '-'),
('–', "En Dash", '-'),
('—', "Em Dash", '-'),
('﹘', "Small Em Dash", '-'),
('⁃', "Hyphen Bullet", '-'),
('˗', "Modifier Letter Minus Sign", '-'),
('−', "Minus Sign", '-'),
('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'),
('٫', "Arabic Decimal Separator", ','),
('‚', "Single Low-9 Quotation Mark", ','),
('ꓹ', "Lisu Letter Tone Na Po", ','),
(',', "Fullwidth Comma", ','),
(';', "Greek Question Mark", ';'),
(';', "Fullwidth Semicolon", ';'),
('ः', "Devanagari Sign Visarga", ':'),
('ઃ', "Gujarati Sign Visarga", ':'),
(':', "Fullwidth Colon", ':'),
('։', "Armenian Full Stop", ':'),
('܃', "Syriac Supralinear Colon", ':'),
('܄', "Syriac Sublinear Colon", ':'),
('︰', "Presentation Form For Vertical Two Dot Leader", ':'),
('᠃', "Mongolian Full Stop", ':'),
('᠉', "Mongolian Manchu Full Stop", ':'),
('⁚', "Two Dot Punctuation", ':'),
('׃', "Hebrew Punctuation Sof Pasuq", ':'),
('˸', "Modifier Letter Raised Colon", ':'),
('꞉', "Modifier Letter Colon", ':'),
('∶', "Ratio", ':'),
('ː', "Modifier Letter Triangular Colon", ':'),
('ꓽ', "Lisu Letter Tone Mya Jeu", ':'),
('!', "Fullwidth Exclamation Mark", '!'),
('ǃ', "Latin Letter Retroflex Click", '!'),
('ʔ', "Latin Letter Glottal Stop", '?'),
('ॽ', "Devanagari Letter Glottal Stop", '?'),
('Ꭾ', "Cherokee Letter He", '?'),
('?', "Fullwidth Question Mark", '?'),
('𝅭', "Musical Symbol Combining Augmentation Dot", '.'),
('․', "One Dot Leader", '.'),
('۔', "Arabic Full Stop", '.'),
('܁', "Syriac Supralinear Full Stop", '.'),
('܂', "Syriac Sublinear Full Stop", '.'),
('꘎', "Vai Full Stop", '.'),
('𐩐', "Kharoshthi Punctuation Dot", '.'),
('·', "Middle Dot", '.'),
('٠', "Arabic-Indic Digit Zero", '.'),
('۰', "Extended Arabic-Indic Digit Zero", '.'),
('ꓸ', "Lisu Letter Tone Mya Ti", '.'),
('。', "Ideographic Full Stop", '.'),
('・', "Katakana Middle Dot", '.'),
('՝', "Armenian Comma", '\''),
(''', "Fullwidth Apostrophe", '\''),
('‘', "Left Single Quotation Mark", '\''),
('’', "Right Single Quotation Mark", '\''),
('‛', "Single High-Reversed-9 Quotation Mark", '\''),
('′', "Prime", '\''),
('‵', "Reversed Prime", '\''),
('՚', "Armenian Apostrophe", '\''),
('׳', "Hebrew Punctuation Geresh", '\''),
('`', "Greek Varia", '\''),
('`', "Fullwidth Grave Accent", '\''),
('΄', "Greek Tonos", '\''),
('´', "Greek Oxia", '\''),
('᾽', "Greek Koronis", '\''),
('᾿', "Greek Psili", '\''),
('῾', "Greek Dasia", '\''),
('ʹ', "Modifier Letter Prime", '\''),
('ʹ', "Greek Numeral Sign", '\''),
('ˊ', "Modifier Letter Acute Accent", '\''),
('ˋ', "Modifier Letter Grave Accent", '\''),
('˴', "Modifier Letter Middle Grave Accent", '\''),
('ʻ', "Modifier Letter Turned Comma", '\''),
('ʽ', "Modifier Letter Reversed Comma", '\''),
('ʼ', "Modifier Letter Apostrophe", '\''),
('ʾ', "Modifier Letter Right Half Ring", '\''),
('ꞌ', "Latin Small Letter Saltillo", '\''),
('י', "Hebrew Letter Yod", '\''),
('ߴ', "Nko High Tone Apostrophe", '\''),
('ߵ', "Nko Low Tone Apostrophe", '\''),
('"', "Fullwidth Quotation Mark", '"'),
('“', "Left Double Quotation Mark", '"'),
('”', "Right Double Quotation Mark", '"'),
('‟', "Double High-Reversed-9 Quotation Mark", '"'),
('″', "Double Prime", '"'),
('‶', "Reversed Double Prime", '"'),
('〃', "Ditto Mark", '"'),
('״', "Hebrew Punctuation Gershayim", '"'),
('˝', "Double Acute Accent", '"'),
('ʺ', "Modifier Letter Double Prime", '"'),
('˶', "Modifier Letter Middle Double Acute Accent", '"'),
('˵', "Modifier Letter Middle Double Grave Accent", '"'),
('ˮ', "Modifier Letter Double Apostrophe", '"'),
('ײ', "Hebrew Ligature Yiddish Double Yod", '"'),
('❞', "Heavy Double Comma Quotation Mark Ornament", '"'),
('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'),
('❨', "Medium Left Parenthesis Ornament", '('),
('﴾', "Ornate Left Parenthesis", '('),
('(', "Fullwidth Left Parenthesis", '('),
('❩', "Medium Right Parenthesis Ornament", ')'),
('﴿', "Ornate Right Parenthesis", ')'),
(')', "Fullwidth Right Parenthesis", ')'),
('[', "Fullwidth Left Square Bracket", '['),
('❲', "Light Left Tortoise Shell Bracket Ornament", '['),
('「', "Left Corner Bracket", '['),
('『', "Left White Corner Bracket", '['),
('【', "Left Black Lenticular Bracket", '['),
('〔', "Left Tortoise Shell Bracket", '['),
('〖', "Left White Lenticular Bracket", '['),
('〘', "Left White Tortoise Shell Bracket", '['),
('〚', "Left White Square Bracket", '['),
(']', "Fullwidth Right Square Bracket", ']'),
('❳', "Light Right Tortoise Shell Bracket Ornament", ']'),
('」', "Right Corner Bracket", ']'),
('』', "Right White Corner Bracket", ']'),
('】', "Right Black Lenticular Bracket", ']'),
('〕', "Right Tortoise Shell Bracket", ']'),
('〗', "Right White Lenticular Bracket", ']'),
('〙', "Right White Tortoise Shell Bracket", ']'),
('〛', "Right White Square Bracket", ']'),
('❴', "Medium Left Curly Bracket Ornament", '{'),
('❵', "Medium Right Curly Bracket Ornament", '}'),
('⁎', "Low Asterisk", '*'),
('٭', "Arabic Five Pointed Star", '*'),
('∗', "Asterisk Operator", '*'),
('᜵', "Philippine Single Punctuation", '/'),
('⁁', "Caret Insertion Point", '/'),
('∕', "Division Slash", '/'),
('⁄', "Fraction Slash", '/'),
('╱', "Box Drawings Light Diagonal Upper Right To Lower Left", '/'),
('⟋', "Mathematical Rising Diagonal", '/'),
('⧸', "Big Solidus", '/'),
('㇓', "Cjk Stroke Sp", '/'),
('〳', "Vertical Kana Repeat Mark Upper Half", '/'),
('丿', "Cjk Unified Ideograph-4E3F", '/'),
('⼃', "Kangxi Radical Slash", '/'),
('\', "Fullwidth Reverse Solidus", '\\'),
('﹨', "Small Reverse Solidus", '\\'),
('∖', "Set Minus", '\\'),
('⟍', "Mathematical Falling Diagonal", '\\'),
('⧵', "Reverse Solidus Operator", '\\'),
('⧹', "Big Reverse Solidus", '\\'),
('、', "Ideographic Comma", '\\'),
('ヽ', "Katakana Iteration Mark", '\\'),
('㇔', "Cjk Stroke D", '\\'),
('丶', "Cjk Unified Ideograph-4E36", '\\'),
('⼂', "Kangxi Radical Dot", '\\'),
('ꝸ', "Latin Small Letter Um", '&'),
('﬩', "Hebrew Letter Alternative Plus Sign", '+'),
('‹', "Single Left-Pointing Angle Quotation Mark", '<'),
('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'),
('˂', "Modifier Letter Left Arrowhead", '<'),
('〈', "Left Angle Bracket", '<'),
('《', "Left Double Angle Bracket", '<'),
('꓿', "Lisu Punctuation Full Stop", '='),
('›', "Single Right-Pointing Angle Quotation Mark", '>'),
('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'),
('˃', "Modifier Letter Right Arrowhead", '>'),
('〉', "Right Angle Bracket", '>'),
('》', "Right Double Angle Bracket", '>'),
('Ⲻ', "Coptic Capital Letter Dialect-P Ni", '-'),
('Ɂ', "Latin Capital Letter Glottal Stop", '?'),
('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'), ];
const ASCII_ARRAY: &'static [(char, &'static str)] = &[
(' ', "Space"),
('_', "Underscore"),
('-', "Minus/Hyphen"),
(',', "Comma"),
(';', "Semicolon"),
(':', "Colon"),
('!', "Exclamation Mark"),
('?', "Question Mark"),
('.', "Period"),
('\'', "Single Quote"),
('"', "Quotation Mark"),
('(', "Left Parenthesis"),
(')', "Right Parenthesis"),
('[', "Left Square Bracket"),
(']', "Right Square Bracket"),
('{', "Left Curly Brace"),
('}', "Right Curly Brace"),
('*', "Asterisk"),
('/', "Slash"),
('\\', "Backslash"),
('&', "Ampersand"),
('+', "Plus Sign"),
('<', "Less-Than Sign"),
('=', "Equals Sign"),
('>', "Greater-Than Sign"), ];
pub fn check_for_substitution<'a>(reader: &StringReader<'a>,
ch: char,
err: &mut DiagnosticBuilder<'a>) {
UNICODE_ARRAY
.iter()
.find(|&&(c, _, _)| c == ch)
.map(|&(_, u_name, ascii_char)| {
let span = make_span(reader.pos, reader.next_pos);
match ASCII_ARRAY.iter().find(|&&(c, _)| c == ascii_char) {
Some(&(ascii_char, ascii_name)) => {
let msg =
format!("unicode character '{}' ({}) looks much like '{}' ({}), but it's not",
ch, u_name, ascii_char, ascii_name);
err.span_help(span, &msg);
},
None => {
let msg = format!("substitution character not found for '{}'", ch);
reader.sess.span_diagnostic.span_bug_no_panic(span, &msg);
}
}
});
}