Skip to content

Commit eda2ffa

Browse files
committed
Add new Unicode 16 Scripts
1 parent f27be25 commit eda2ffa

File tree

2 files changed

+24
-6
lines changed

2 files changed

+24
-6
lines changed

Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift

+9
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ extension Parser {
237237
case "elba", "elbasan": return .elbasan
238238
case "elym", "elymaic": return .elymaic
239239
case "ethi", "ethiopic": return .ethiopic
240+
case "gara", "garay": return .garay
240241
case "geor", "georgian": return .georgian
241242
case "glag", "glagolitic": return .glagolitic
242243
case "gong", "gunjalagondi": return .gunjalaGondi
@@ -245,6 +246,7 @@ extension Parser {
245246
case "gran", "grantha": return .grantha
246247
case "grek", "greek": return .greek
247248
case "gujr", "gujarati": return .gujarati
249+
case "gukh", "gurungkhema": return .gurungKhema
248250
case "guru", "gurmukhi": return .gurmukhi
249251
case "hang", "hangul": return .hangul
250252
case "hani", "han": return .han
@@ -261,11 +263,13 @@ extension Parser {
261263
case "java", "javanese": return .javanese
262264
case "kali", "kayahli": return .kayahLi
263265
case "kana", "katakana": return .katakana
266+
case "kawi": return .kawi
264267
case "khar", "kharoshthi": return .kharoshthi
265268
case "khmr", "khmer": return .khmer
266269
case "khoj", "khojki": return .khojki
267270
case "kits", "khitansmallscript": return .khitanSmallScript
268271
case "knda", "kannada": return .kannada
272+
case "krai", "kiratrai": return .kiratRai
269273
case "kthi", "kaithi": return .kaithi
270274
case "lana", "taitham": return .taiTham
271275
case "laoo", "lao": return .lao
@@ -293,6 +297,7 @@ extension Parser {
293297
case "mtei", "meeteimayek": return .meeteiMayek
294298
case "mult", "multani": return .multani
295299
case "mymr", "myanmar": return .myanmar
300+
case "nagm", "nagmundari": return .nagMundari
296301
case "nand", "nandinagari": return .nandinagari
297302
case "narb", "oldnortharabian": return .oldNorthArabian
298303
case "nbat", "nabataean": return .nabataean
@@ -301,6 +306,7 @@ extension Parser {
301306
case "nshu", "nushu": return .nushu
302307
case "ogam", "ogham": return .ogham
303308
case "olck", "olchiki": return .olChiki
309+
case "onao", "olonal": return .olOnal
304310
case "orkh", "oldturkic": return .oldTurkic
305311
case "orya", "oriya": return .oriya
306312
case "osge", "osage": return .osage
@@ -332,6 +338,7 @@ extension Parser {
332338
case "sora", "sorasompeng": return .soraSompeng
333339
case "soyo", "soyombo": return .soyombo
334340
case "sund", "sundanese": return .sundanese
341+
case "sunu", "sunuwar": return .sunuwar
335342
case "sylo", "sylotinagri": return .sylotiNagri
336343
case "syrc", "syriac": return .syriac
337344
case "tagb", "tagbanwa": return .tagbanwa
@@ -349,7 +356,9 @@ extension Parser {
349356
case "tibt", "tibetan": return .tibetan
350357
case "tirh", "tirhuta": return .tirhuta
351358
case "tnsa", "tangsa": return .tangsa
359+
case "todr", "todhri": return .todhri
352360
case "toto": return .toto
361+
case "tutg", "tulutigalari": return .tuluTigalari
353362
case "ugar", "ugaritic": return .ugaritic
354363
case "vaii", "vai": return .vai
355364
case "vith", "vithkuqi": return .vithkuqi

Sources/_RegexParser/Utility/MissingUnicode.swift

+15-6
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@
1212
// MARK: - Missing stdlib API
1313

1414
extension Unicode {
15-
// Note: The `Script` enum includes the "meta" script type "Katakana_Or_Hiragana", which
16-
// isn't defined by https://www.unicode.org/Public/UCD/latest/ucd/Scripts.txt,
17-
// but is defined by https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt.
18-
// We may want to split it out, as it's the only case that is a union of
19-
// other script types.
20-
2115
/// Character script types.
16+
///
17+
/// Note this includes the "meta" script type "Katakana_Or_Hiragana", which
18+
/// isn't defined by https://www.unicode.org/Public/UCD/latest/ucd/Scripts.txt,
19+
/// but is defined by https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt.
20+
/// We may want to split it out, as it's the only case that is a union of
21+
/// other script types.
2222
public enum Script: String, Hashable, CaseIterable {
2323
case adlam = "Adlam"
2424
case ahom = "Ahom"
@@ -59,6 +59,7 @@ extension Unicode {
5959
case elbasan = "Elbasan"
6060
case elymaic = "Elymaic"
6161
case ethiopic = "Ethiopic"
62+
case garay = "Garay"
6263
case georgian = "Georgian"
6364
case glagolitic = "Glagolitic"
6465
case gothic = "Gothic"
@@ -67,6 +68,7 @@ extension Unicode {
6768
case gujarati = "Gujarati"
6869
case gunjalaGondi = "Gunjala_Gondi"
6970
case gurmukhi = "Gurmukhi"
71+
case gurungKhema = "Gurung_Khema"
7072
case han = "Han"
7173
case hangul = "Hangul"
7274
case hanifiRohingya = "Hanifi_Rohingya"
@@ -83,6 +85,7 @@ extension Unicode {
8385
case kannada = "Kannada"
8486
case katakana = "Katakana"
8587
case katakanaOrHiragana = "Katakana_Or_Hiragana"
88+
case kawi = "Kawi"
8689
case kayahLi = "Kayah_Li"
8790
case kharoshthi = "Kharoshthi"
8891
case khitanSmallScript = "Khitan_Small_Script"
@@ -98,6 +101,7 @@ extension Unicode {
98101
case lisu = "Lisu"
99102
case lycian = "Lycian"
100103
case lydian = "Lydian"
104+
case kiratRai = "Kirat_Rai"
101105
case mahajani = "Mahajani"
102106
case makasar = "Makasar"
103107
case malayalam = "Malayalam"
@@ -117,6 +121,7 @@ extension Unicode {
117121
case multani = "Multani"
118122
case myanmar = "Myanmar"
119123
case nabataean = "Nabataean"
124+
case nagMundari = "Nag_Mundari"
120125
case nandinagari = "Nandinagari"
121126
case newa = "Newa"
122127
case newTaiLue = "New_Tai_Lue"
@@ -134,6 +139,7 @@ extension Unicode {
134139
case oldSouthArabian = "Old_South_Arabian"
135140
case oldTurkic = "Old_Turkic"
136141
case oldUyghur = "Old_Uyghur"
142+
case olOnal = "Ol_Onal"
137143
case oriya = "Oriya"
138144
case osage = "Osage"
139145
case osmanya = "Osmanya"
@@ -156,6 +162,7 @@ extension Unicode {
156162
case soraSompeng = "Sora_Sompeng"
157163
case soyombo = "Soyombo"
158164
case sundanese = "Sundanese"
165+
case sunuwar = "Sunuwar"
159166
case sylotiNagri = "Syloti_Nagri"
160167
case syriac = "Syriac"
161168
case tagalog = "Tagalog"
@@ -173,7 +180,9 @@ extension Unicode {
173180
case tibetan = "Tibetan"
174181
case tifinagh = "Tifinagh"
175182
case tirhuta = "Tirhuta"
183+
case todhri = "Todhri"
176184
case toto = "Toto"
185+
case tuluTigalari = "Tulu_Tigalari"
177186
case ugaritic = "Ugaritic"
178187
case unknown = "Unknown"
179188
case vai = "Vai"

0 commit comments

Comments
 (0)