Skip to content

Commit 9df5dc6

Browse files
authored
(112770879) “Washington, D.c.” capitalized incorrectly (swiftlang#212)
* (112770879) “Washington, D.c.” capitalized incorrectly Theoretically "." is a case-ignorable character, so "D.C." is conceptually the same as "DC", whose capitalized mapping would be "Dc". This behavior doesn't seem to align with real-world use cases well though. Workaround this by splitting the string with "." and titlecasing each substring individually. * (112770879) “Washington, D.c.” capitalized incorrectly, cont. For FoundationPreview: Move String comparison utilities from FoundationEssentials fo _FoundationInternals so they can be accessed from FoundationInternalization too.
1 parent 0eeb99b commit 9df5dc6

File tree

10 files changed

+96
-60
lines changed

10 files changed

+96
-60
lines changed

Sources/FoundationEssentials/String/String+Essentials.swift

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,46 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
// MARK: - Exported Types
14+
@available(macOS 10.0, iOS 8.0, watchOS 2.0, tvOS 9.0, *)
15+
extension String {
16+
#if FOUNDATION_FRAMEWORK
17+
public typealias CompareOptions = NSString.CompareOptions
18+
#else
19+
/// These options apply to the various search/find and comparison methods (except where noted).
20+
public struct CompareOptions : OptionSet, Sendable {
21+
public let rawValue: UInt
22+
23+
public init(rawValue: UInt) {
24+
self.rawValue = rawValue
25+
}
26+
27+
public static let caseInsensitive = CompareOptions(rawValue: 1)
28+
/// Exact character-by-character equivalence
29+
public static let literal = CompareOptions(rawValue: 2)
30+
/// Search from end of source string
31+
public static let backwards = CompareOptions(rawValue: 4)
32+
/// Search is limited to start (or end, if `.backwards`) of source string
33+
public static let anchored = CompareOptions(rawValue: 8)
34+
/// Numbers within strings are compared using numeric value, that is,
35+
/// Foo2.txt < Foo7.txt < Foo25.txt;
36+
/// only applies to compare methods, not find
37+
public static let numeric = CompareOptions(rawValue: 64)
38+
/// If specified, ignores diacritics (o-umlaut == o)
39+
public static let diacriticInsensitive = CompareOptions(rawValue: 128)
40+
/// If specified, ignores width differences ('a' == UFF41)
41+
public static let widthInsensitive = CompareOptions(rawValue: 256)
42+
/// If specified, comparisons are forced to return either `.orderedAscending`
43+
/// or `.orderedDescending` if the strings are equivalent but not strictly equal,
44+
/// for stability when sorting (e.g. "aaa" > "AAA" with `.caseInsensitive` specified)
45+
public static let forcedOrdering = CompareOptions(rawValue: 512)
46+
/// The search string is treated as an ICU-compatible regular expression;
47+
/// if set, no other options can apply except `.caseInsensitive` and `.anchored`
48+
public static let regularExpression = CompareOptions(rawValue: 1024)
49+
}
50+
#endif // FOUNDATION_FRAMEWORK
51+
}
52+
1353
@available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *)
1454
extension String {
1555
func _capitalized() -> String {

Sources/FoundationInternationalization/Calendar/Calendar_Enumerate.swift

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2131,16 +2131,3 @@ extension Calendar.Component {
21312131
}
21322132
}
21332133

2134-
#if !FOUNDATION_FRAMEWORK
2135-
extension ComparisonResult {
2136-
init<T: Comparable>(_ t1: T, _ t2: T) {
2137-
if t1 < t2 {
2138-
self = .orderedAscending
2139-
} else if t1 > t2 {
2140-
self = .orderedDescending
2141-
} else {
2142-
self = .orderedSame
2143-
}
2144-
}
2145-
}
2146-
#endif

Sources/FoundationInternationalization/ICU/ICU+CaseMap.swift

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,19 @@ extension ICU {
7171
}
7272
}
7373
}
74+
75+
func titlecase(_ s: Substring) -> String? {
76+
lock.withLock {
77+
var s = s
78+
return s.withUTF8 { srcBuf in
79+
srcBuf.withMemoryRebound(to: CChar.self) { buffer in
80+
_withResizingCharBuffer { destBuf, destSize, status in
81+
ucasemap_utf8ToTitle(casemap, destBuf, destSize, buffer.baseAddress!, Int32(buffer.count), &status)
82+
}
83+
}
84+
}
85+
}
86+
}
7487

7588
func titlecase(_ s: String) -> String? {
7689
// `ucasemap_utf8ToTitle` isn't thread-safe

Sources/FoundationInternationalization/String/String+Locale.swift

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,21 @@ extension String {
2121
return lowered
2222
}
2323

24-
func _capitalized(with locale: Locale?) -> String {
25-
guard let casemap = ICU.CaseMap.caseMappingForLocale(locale?.identifier), let titled = casemap.titlecase(self) else {
24+
func _capitalized(with locale: Locale?) -> String {
25+
guard let casemap = ICU.CaseMap.caseMappingForLocale(locale?.identifier) else {
2626
return capitalized
2727
}
28-
return titled
28+
29+
// Theoretically "." is a case-ignorable character, so the character after "." is not uppercased. This results in "D.c." for "D.C".
30+
// Handle this special case by splitting the string with "." and titlecasing each substring individually.
31+
var result = ""
32+
try! self[...]._enumerateComponents(separatedBy: ".", options: []) { substr, isLastComponent in
33+
result += casemap.titlecase(substr) ?? substr.capitalized
34+
if !isLastComponent {
35+
result += "."
36+
}
37+
}
38+
return result
2939
}
3040

3141
func _uppercased(with locale: Locale?) -> String {

Sources/FoundationEssentials/String/String+Comparison.swift renamed to Sources/_FoundationInternals/String/String+Comparison.swift

Lines changed: 10 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -10,46 +10,6 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13-
// MARK: - Exported Types
14-
@available(macOS 10.0, iOS 8.0, watchOS 2.0, tvOS 9.0, *)
15-
extension String {
16-
#if FOUNDATION_FRAMEWORK
17-
public typealias CompareOptions = NSString.CompareOptions
18-
#else
19-
/// These options apply to the various search/find and comparison methods (except where noted).
20-
public struct CompareOptions : OptionSet, Sendable {
21-
public let rawValue: UInt
22-
23-
public init(rawValue: UInt) {
24-
self.rawValue = rawValue
25-
}
26-
27-
static let caseInsensitive = CompareOptions(rawValue: 1)
28-
/// Exact character-by-character equivalence
29-
static let literal = CompareOptions(rawValue: 2)
30-
/// Search from end of source string
31-
static let backwards = CompareOptions(rawValue: 4)
32-
/// Search is limited to start (or end, if `.backwards`) of source string
33-
static let anchored = CompareOptions(rawValue: 8)
34-
/// Numbers within strings are compared using numeric value, that is,
35-
/// Foo2.txt < Foo7.txt < Foo25.txt;
36-
/// only applies to compare methods, not find
37-
static let numeric = CompareOptions(rawValue: 64)
38-
/// If specified, ignores diacritics (o-umlaut == o)
39-
static let diacriticInsensitive = CompareOptions(rawValue: 128)
40-
/// If specified, ignores width differences ('a' == UFF41)
41-
static let widthInsensitive = CompareOptions(rawValue: 256)
42-
/// If specified, comparisons are forced to return either `.orderedAscending`
43-
/// or `.orderedDescending` if the strings are equivalent but not strictly equal,
44-
/// for stability when sorting (e.g. "aaa" > "AAA" with `.caseInsensitive` specified)
45-
static let forcedOrdering = CompareOptions(rawValue: 512)
46-
/// The search string is treated as an ICU-compatible regular expression;
47-
/// if set, no other options can apply except `.caseInsensitive` and `.anchored`
48-
static let regularExpression = CompareOptions(rawValue: 1024)
49-
}
50-
#endif // FOUNDATION_FRAMEWORK
51-
}
52-
5313
extension UTF8.CodeUnit {
5414
static let newline: Self = 0x0A
5515
static let carriageReturn: Self = 0x0D
@@ -676,20 +636,26 @@ extension Substring {
676636

677637
func _components(separatedBy separator: Substring, options: String.CompareOptions = []) throws -> [String] {
678638
var result = [String]()
639+
try _enumerateComponents(separatedBy: separator, options: options) { substr, _ in
640+
result.append(String(substr))
641+
}
642+
return result
643+
}
644+
645+
// Only throws when using `.regularExpression` option
646+
func _enumerateComponents(separatedBy separator: Substring, options: String.CompareOptions = [], withBlock block: (_ component: Substring, _ isLastComponent: Bool) -> ()) throws {
679647
var searchStart = startIndex
680648
while searchStart < endIndex {
681649
let r = try self[searchStart...]._range(of: separator, options: options)
682650
guard let r, !r.isEmpty else {
683651
break
684652
}
685653

686-
result.append(String(self[searchStart ..< r.lowerBound]))
654+
block(self[searchStart ..< r.lowerBound], false)
687655
searchStart = r.upperBound
688656
}
689657

690-
result.append(String(self[searchStart..<endIndex]))
691-
692-
return result
658+
block(self[searchStart..<endIndex], true)
693659
}
694660
}
695661

Tests/FoundationInternationalizationTests/StringTests+Locale.swift

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ final class StringLocaleTests: XCTestCase {
4545
test("İİ", "İi")
4646
test("II", "")
4747
test("«ijs»", "«İjs»")
48+
test("ijs.ıi", "İjs.Ii")
4849
}
4950

5051
do {
@@ -55,12 +56,31 @@ final class StringLocaleTests: XCTestCase {
5556
test("II", "Ii")
5657
test("«ijs»", "«IJs»")
5758
test("ijssEl iglOo IJSSEL", "IJssel Igloo IJssel")
59+
test("ijssEl.ijSSEL", "IJssel.IJssel")
5860
}
5961

6062
do {
6163
locale = Locale(identifier: "el")
6264
test("άυλος", "Άυλος")
6365
}
66+
67+
do {
68+
locale = Locale(identifier: "en_US")
69+
test("washington d.c.", "Washington D.C.")
70+
test("washington D.c.", "Washington D.C.")
71+
test("washington d.C.", "Washington D.C.")
72+
73+
test("washington d. c.", "Washington D. C.")
74+
test("washington d. C.", "Washington D. C.")
75+
76+
test("washington.D.C.", "Washington.D.C.")
77+
78+
test("u.s.a.", "U.S.A.")
79+
test("U.S.A.", "U.S.A.")
80+
81+
test("example county. happy. SOC'y, INC. V. COMM'R, 123 F.24d 314 (2d Cir. 1990).", "Example County. Happy. Soc'y, Inc. V. Comm'r, 123 F.24d 314 (2d Cir. 1990).")
82+
test("3.dollars", "3.Dollars")
83+
}
6484
}
6585

6686
func testUppercase_localized() {

0 commit comments

Comments
 (0)