Skip to content

Commit 11fbd94

Browse files
authored
Merge pull request #78664 from Azoy/unicode-16
[stdlib] Unicode 16
2 parents 82e111a + 69f6df0 commit 11fbd94

File tree

69 files changed

+76458
-52142
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+76458
-52142
lines changed

stdlib/private/StdlibUnicodeUnittest/StdlibUnicodeUnittest.swift

+1-1
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ func readInputNormalizationTest(index: Int) -> [NormalizationTest] {
118118
public let normalizationTests = readInputNormalizationTest(index: 2)
119119

120120
// Native normalization in stdlib supporting Unicode 14
121-
public let normalizationTests14 = readInputNormalizationTest(index: 3)
121+
public let normalizationTestsNew = readInputNormalizationTest(index: 3)
122122
#endif
123123

124124
public struct UTFTest {

stdlib/private/StdlibUnicodeUnittest/UnicodeScalarProperties.swift

+21-7
Original file line numberDiff line numberDiff line change
@@ -691,13 +691,13 @@ public let caseFolding: [Unicode.Scalar: String] = {
691691
//===----------------------------------------------------------------------===//
692692

693693
extension Unicode {
694-
// Note: The `Script` enum includes the "meta" script type "Katakana_Or_Hiragana", which
695-
// isn't defined by https://www.unicode.org/Public/UCD/latest/ucd/Scripts.txt,
696-
// but is defined by https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt.
697-
// We may want to split it out, as it's the only case that is a union of
698-
// other script types.
699-
700694
/// Character script types.
695+
///
696+
/// Note this includes the "meta" script type "Katakana_Or_Hiragana", which
697+
/// isn't defined by https://www.unicode.org/Public/UCD/latest/ucd/Scripts.txt,
698+
/// but is defined by https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt.
699+
/// We may want to split it out, as it's the only case that is a union of
700+
/// other script types.
701701
public enum Script: String, Hashable {
702702
case adlam = "Adlam"
703703
case ahom = "Ahom"
@@ -738,6 +738,7 @@ extension Unicode {
738738
case elbasan = "Elbasan"
739739
case elymaic = "Elymaic"
740740
case ethiopic = "Ethiopic"
741+
case garay = "Garay"
741742
case georgian = "Georgian"
742743
case glagolitic = "Glagolitic"
743744
case gothic = "Gothic"
@@ -746,6 +747,7 @@ extension Unicode {
746747
case gujarati = "Gujarati"
747748
case gunjalaGondi = "Gunjala_Gondi"
748749
case gurmukhi = "Gurmukhi"
750+
case gurungKhema = "Gurung_Khema"
749751
case han = "Han"
750752
case hangul = "Hangul"
751753
case hanifiRohingya = "Hanifi_Rohingya"
@@ -778,6 +780,7 @@ extension Unicode {
778780
case lisu = "Lisu"
779781
case lycian = "Lycian"
780782
case lydian = "Lydian"
783+
case kiratRai = "Kirat_Rai"
781784
case mahajani = "Mahajani"
782785
case makasar = "Makasar"
783786
case malayalam = "Malayalam"
@@ -815,6 +818,7 @@ extension Unicode {
815818
case oldSouthArabian = "Old_South_Arabian"
816819
case oldTurkic = "Old_Turkic"
817820
case oldUyghur = "Old_Uyghur"
821+
case olOnal = "Ol_Onal"
818822
case oriya = "Oriya"
819823
case osage = "Osage"
820824
case osmanya = "Osmanya"
@@ -837,6 +841,7 @@ extension Unicode {
837841
case soraSompeng = "Sora_Sompeng"
838842
case soyombo = "Soyombo"
839843
case sundanese = "Sundanese"
844+
case sunuwar = "Sunuwar"
840845
case sylotiNagri = "Syloti_Nagri"
841846
case syriac = "Syriac"
842847
case tagalog = "Tagalog"
@@ -854,7 +859,9 @@ extension Unicode {
854859
case tibetan = "Tibetan"
855860
case tifinagh = "Tifinagh"
856861
case tirhuta = "Tirhuta"
862+
case todhri = "Todhri"
857863
case toto = "Toto"
864+
case tuluTigalari = "Tulu_Tigalari"
858865
case ugaritic = "Ugaritic"
859866
case unknown = "Unknown"
860867
case vai = "Vai"
@@ -940,6 +947,7 @@ func classifyScriptProperty(
940947
case "elba", "elbasan": return .elbasan
941948
case "elym", "elymaic": return .elymaic
942949
case "ethi", "ethiopic": return .ethiopic
950+
case "gara", "garay": return .garay
943951
case "geor", "georgian": return .georgian
944952
case "glag", "glagolitic": return .glagolitic
945953
case "gong", "gunjalagondi": return .gunjalaGondi
@@ -948,6 +956,7 @@ func classifyScriptProperty(
948956
case "gran", "grantha": return .grantha
949957
case "grek", "greek": return .greek
950958
case "gujr", "gujarati": return .gujarati
959+
case "gukh", "gurungkhema": return .gurungKhema
951960
case "guru", "gurmukhi": return .gurmukhi
952961
case "hang", "hangul": return .hangul
953962
case "hani", "han": return .han
@@ -970,6 +979,7 @@ func classifyScriptProperty(
970979
case "khoj", "khojki": return .khojki
971980
case "kits", "khitansmallscript": return .khitanSmallScript
972981
case "knda", "kannada": return .kannada
982+
case "krai", "kiratrai": return .kiratRai
973983
case "kthi", "kaithi": return .kaithi
974984
case "lana", "taitham": return .taiTham
975985
case "laoo", "lao": return .lao
@@ -997,7 +1007,7 @@ func classifyScriptProperty(
9971007
case "mtei", "meeteimayek": return .meeteiMayek
9981008
case "mult", "multani": return .multani
9991009
case "mymr", "myanmar": return .myanmar
1000-
case "nagm", "nagmundari": return .nagMundari
1010+
case "nagm", "nagmundari": return .nagMundari
10011011
case "nand", "nandinagari": return .nandinagari
10021012
case "narb", "oldnortharabian": return .oldNorthArabian
10031013
case "nbat", "nabataean": return .nabataean
@@ -1006,6 +1016,7 @@ func classifyScriptProperty(
10061016
case "nshu", "nushu": return .nushu
10071017
case "ogam", "ogham": return .ogham
10081018
case "olck", "olchiki": return .olChiki
1019+
case "onao", "olonal": return .olOnal
10091020
case "orkh", "oldturkic": return .oldTurkic
10101021
case "orya", "oriya": return .oriya
10111022
case "osge", "osage": return .osage
@@ -1037,6 +1048,7 @@ func classifyScriptProperty(
10371048
case "sora", "sorasompeng": return .soraSompeng
10381049
case "soyo", "soyombo": return .soyombo
10391050
case "sund", "sundanese": return .sundanese
1051+
case "sunu", "sunuwar": return .sunuwar
10401052
case "sylo", "sylotinagri": return .sylotiNagri
10411053
case "syrc", "syriac": return .syriac
10421054
case "tagb", "tagbanwa": return .tagbanwa
@@ -1054,7 +1066,9 @@ func classifyScriptProperty(
10541066
case "tibt", "tibetan": return .tibetan
10551067
case "tirh", "tirhuta": return .tirhuta
10561068
case "tnsa", "tangsa": return .tangsa
1069+
case "todr", "todhri": return .todhri
10571070
case "toto": return .toto
1071+
case "tutg", "tulutigalari": return .tuluTigalari
10581072
case "ugar", "ugaritic": return .ugaritic
10591073
case "vaii", "vai": return .vai
10601074
case "vith", "vithkuqi": return .vithkuqi

stdlib/public/SwiftShims/swift/shims/UnicodeData.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ SWIFT_RUNTIME_STDLIB_INTERNAL
6262
__swift_uint8_t _swift_stdlib_getGraphemeBreakProperty(__swift_uint32_t scalar);
6363

6464
SWIFT_RUNTIME_STDLIB_INTERNAL
65-
__swift_bool _swift_stdlib_isLinkingConsonant(__swift_uint32_t scalar);
65+
__swift_bool _swift_stdlib_isInCB_Consonant(__swift_uint32_t scalar);
6666

6767
//===----------------------------------------------------------------------===//
6868
// Word Breaking

0 commit comments

Comments
 (0)