Skip to content

Commit c01e9da

Browse files
iCharlesHuitingliu
andauthored
rdar://107955097 (FoundationPreview: Batch move string API (continued)) (#34)
* rdar://107955097 (FoundationPreview: Batch move string API (continued)) - Move localized uppercase and lowercase to FoundationLocalization - if-def out of CharacterSet from FoundationPreview. It's not implemented at all there, and having a no-op stub is misleading * rdar://107955097 (FoundationPreview: Batch move string API (continued)) - Move components separated by string and range of string functions * rdar://107955097 (FoundationPreview: Batch move string API (continued)) Enable snake case options for JSON encoder and decoder. We haven't been able to enable this option because it needed `CharacterSet`, which hasn't been properly implemented for FoundationPreview. Now that we have `BuiltInUnicodeScalarSet`, which mirrors `CharacterSet`, we can switch to that and enable the options. * rdar://107955097 (FoundationPreview: Batch move string API (continued)) - Move `StringProtocol.lineRange(for:)` and `paragraphRange(for:)` to FoundationEssentials - Rename String+Regex.swift to RegexPatternCache.swift - Consolidate extensions on various String family members and remove one redundant swift file --------- Co-authored-by: I-Ting Tina Liu <iting_liu@apple.com>
1 parent ecd52ce commit c01e9da

17 files changed

+696
-377
lines changed

Sources/FoundationEssentials/BuiltInUnicodeScalarSet.swift

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,10 @@ internal struct BuiltInUnicodeScalarSet {
8383
return new
8484
}
8585

86-
static let uppercaseLetter = Self.init(type: .uppercaseLetter)
87-
static let lowercaseLetter = Self.init(type: .lowercaseLetter)
88-
static let caseIgnorable = Self.init(type: .caseIgnorable)
89-
static let graphemeExtend = Self.init(type: .graphemeExtend)
90-
static let canonicalDecomposable = Self.init(type: .canonicalDecomposable)
86+
static let uppercaseLetters = Self.init(type: .uppercaseLetter)
87+
static let lowercaseLetters = Self.init(type: .lowercaseLetter)
88+
static let caseIgnorables = Self.init(type: .caseIgnorable)
89+
static let graphemeExtends = Self.init(type: .graphemeExtend)
90+
static let canonicalDecomposables = Self.init(type: .canonicalDecomposable)
9191
}
9292

Sources/FoundationEssentials/CharacterSet+Stub.swift

Lines changed: 0 additions & 38 deletions
This file was deleted.

Sources/FoundationEssentials/JSON/JSONEncoder.swift

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -117,12 +117,9 @@ open class JSONEncoder {
117117
/// Use the keys specified by each type. This is the default strategy.
118118
case useDefaultKeys
119119

120-
#if FOUNDATION_FRAMEWORK
121-
// TODO: Reenable this option once String.rangeOfCharacter(from:) is moved
122-
123120
/// Convert from "camelCaseKeys" to "snake_case_keys" before writing a key to JSON payload.
124121
///
125-
/// Capital characters are determined by testing membership in `CharacterSet.uppercaseLetters` and `CharacterSet.lowercaseLetters` (Unicode General Categories Lu and Lt).
122+
/// Capital characters are determined by testing membership in Unicode General Categories Lu and Lt.
126123
/// The conversion to lower case uses `Locale.system`, also known as the ICU "root" locale. This means the result is consistent regardless of the current user's locale and language preferences.
127124
///
128125
/// Converting from camel case to snake case:
@@ -135,7 +132,6 @@ open class JSONEncoder {
135132
///
136133
/// - Note: Using a key encoding strategy has a nominal performance cost, as each string key has to be converted.
137134
case convertToSnakeCase
138-
#endif
139135

140136
/// Provide a custom conversion to the key in the encoded JSON from the keys specified by the encoded types.
141137
/// The full path to the current encoding position is provided for context (in case you need to locate this key within the payload). The returned key is used in place of the last component in the coding path before encoding.
@@ -157,13 +153,13 @@ open class JSONEncoder {
157153
var searchRange = stringKey.index(after: wordStart)..<stringKey.endIndex
158154

159155
// Find next uppercase character
160-
while let upperCaseRange = stringKey.rangeOfCharacter(from: CharacterSet.uppercaseLetters, options: [], range: searchRange) {
156+
while let upperCaseRange = stringKey[searchRange]._rangeOfCharacter(from: BuiltInUnicodeScalarSet.uppercaseLetters, options: []) {
161157
let untilUpperCase = wordStart..<upperCaseRange.lowerBound
162158
words.append(untilUpperCase)
163159

164160
// Find next lowercase character
165161
searchRange = upperCaseRange.lowerBound..<searchRange.upperBound
166-
guard let lowerCaseRange = stringKey.rangeOfCharacter(from: CharacterSet.lowercaseLetters, options: [], range: searchRange) else {
162+
guard let lowerCaseRange = stringKey[searchRange]._rangeOfCharacter(from: BuiltInUnicodeScalarSet.lowercaseLetters, options: []) else {
167163
// There are no more lower case letters. Just end here.
168164
wordStart = searchRange.lowerBound
169165
break
@@ -741,11 +737,9 @@ private struct _JSONKeyedEncodingContainer<K : CodingKey> : KeyedEncodingContain
741737
switch encoder.options.keyEncodingStrategy {
742738
case .useDefaultKeys:
743739
return key.stringValue
744-
#if FOUNDATION_FRAMEWORK
745740
case .convertToSnakeCase:
746741
let newKeyString = JSONEncoder.KeyEncodingStrategy._convertToSnakeCase(key.stringValue)
747742
return newKeyString
748-
#endif // FOUNDATION_FRAMEWORK
749743
case .custom(let converter):
750744
return converter(codingPathNode.path(with: key)).stringValue
751745
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
extension BidirectionalCollection where Index == String.Index {
14+
internal func _alignIndex(roundingDown i: Index) -> Index {
15+
return i < endIndex ? index(before: index(after: i)) : i
16+
}
17+
18+
internal func _alignIndex(roundingUp i: Index) -> Index {
19+
let truncated = _alignIndex(roundingDown: i)
20+
if i > truncated && i < endIndex {
21+
return index(after: i)
22+
} else {
23+
return i
24+
}
25+
}
26+
27+
internal func _boundaryAlignedRange<R: RangeExpression>(_ r: R) -> Range<Index> where R.Bound == String.Index {
28+
let range = r.relative(to: self)
29+
return _alignIndex(roundingDown: range.lowerBound)..<_alignIndex(roundingUp: range.upperBound)
30+
}
31+
32+
internal func _checkRange(_ r: Range<Index>) -> Range<Index>? {
33+
guard r.lowerBound >= startIndex, r.upperBound <= endIndex else {
34+
return nil
35+
}
36+
return r
37+
}
38+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
struct RegexPatternCache: @unchecked Sendable {
14+
private struct Key : Sendable, Hashable {
15+
var pattern: String
16+
var caseInsensitive: Bool
17+
}
18+
19+
private let _lock: LockedState<[Key: Regex<AnyRegexOutput>]>
20+
21+
static let cache = RegexPatternCache()
22+
23+
fileprivate init() {
24+
_lock = LockedState(initialState: .init())
25+
}
26+
27+
func regex(for pattern: String, caseInsensitive: Bool) throws -> Regex<AnyRegexOutput>? {
28+
29+
let key = Key(pattern: pattern, caseInsensitive: caseInsensitive)
30+
31+
return try _lock.withLock { cache in
32+
33+
if let cached = cache[key] {
34+
return cached
35+
}
36+
37+
var r = try Regex(pattern)
38+
if caseInsensitive {
39+
r = r.ignoresCase()
40+
}
41+
cache[key] = r
42+
return r
43+
}
44+
}
45+
}

Sources/FoundationEssentials/String/String+Comparison.swift

Lines changed: 166 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,11 @@ extension Character : _StringCompareOptionsConvertible {
566566
}
567567

568568
var isExtendCharacter: Bool {
569-
return _isExtendCharacter
569+
guard !self.isASCII else {
570+
return false
571+
}
572+
573+
return unicodeScalars.allSatisfy { $0._isGraphemeExtend }
570574
}
571575

572576
}
@@ -674,10 +678,103 @@ extension Substring {
674678
return result
675679
}
676680

681+
#if FOUNDATION_FRAMEWORK
677682
func _rangeOfCharacter(from set: CharacterSet, options: String.CompareOptions) -> Range<Index>? {
678683
guard !isEmpty else { return nil }
679684

680-
return unicodeScalars._rangeOfCharacter(from: set, anchored: options.contains(.anchored), backwards: options.contains(.backwards))
685+
return unicodeScalars._rangeOfCharacter(anchored: options.contains(.anchored), backwards: options.contains(.backwards), matchingPredicate: set.contains)
686+
}
687+
#endif
688+
689+
func _rangeOfCharacter(from set: BuiltInUnicodeScalarSet, options: String.CompareOptions) -> Range<Index>? {
690+
guard !isEmpty else { return nil }
691+
692+
return unicodeScalars._rangeOfCharacter(anchored: options.contains(.anchored), backwards: options.contains(.backwards), matchingPredicate: set.contains)
693+
}
694+
695+
func _range(of strToFind: Substring, options: String.CompareOptions) throws -> Range<Index>? {
696+
if options.contains(.regularExpression) {
697+
guard let regex = try RegexPatternCache.cache.regex(for: String(strToFind), caseInsensitive: options.contains(.caseInsensitive)) else {
698+
return nil
699+
}
700+
701+
if options.contains(.anchored) {
702+
guard let match = prefixMatch(of: regex) else { return nil }
703+
return match.range
704+
} else {
705+
guard let match = firstMatch(of: regex) else { return nil }
706+
return match.range
707+
}
708+
}
709+
710+
guard !isEmpty, !strToFind.isEmpty else {
711+
return nil
712+
}
713+
714+
let toHalfWidth = options.contains(.widthInsensitive)
715+
let diacriticsInsensitive = options.contains(.diacriticInsensitive)
716+
let caseFold = options.contains(.caseInsensitive)
717+
let anchored = options.contains(.anchored)
718+
let backwards = options.contains(.backwards)
719+
720+
let result: Range<Index>?
721+
if options.contains(.literal) {
722+
result = unicodeScalars._range(of: strToFind.unicodeScalars, toHalfWidth: toHalfWidth, diacriticsInsensitive: diacriticsInsensitive, caseFold: caseFold, anchored: anchored, backwards: backwards)
723+
} else if !toHalfWidth && !diacriticsInsensitive && !caseFold {
724+
// Fast path: iterate through UTF8 view when we don't need to transform string content
725+
guard let utf8Result = utf8._range(of: strToFind.utf8, anchored: anchored, backwards: backwards) else {
726+
return nil
727+
}
728+
729+
// Adjust the index to that of the original slice since we called `makeContiguousUTF8` before
730+
guard let lower = String.Index(utf8Result.lowerBound, within: self), let upper = String.Index(utf8Result.upperBound, within: self) else {
731+
return nil
732+
}
733+
result = lower..<upper
734+
735+
} else if _isASCII && strToFind._isASCII {
736+
// Fast path: Iterate utf8 without having to decode as unicode scalars. In this case only case folding matters.
737+
738+
guard let utf8Result = utf8._range(of: strToFind.utf8, toHalfWidth: false, diacriticsInsensitive: false, caseFold: caseFold, anchored: anchored, backwards: backwards) else {
739+
return nil
740+
}
741+
742+
// Adjust the index to that of the original slice since we called `makeContiguousUTF8` before
743+
guard let lower = String.Index(utf8Result.lowerBound, within: self), let upper = String.Index(utf8Result.upperBound, within: self) else {
744+
return nil
745+
}
746+
result = lower..<upper
747+
748+
} else {
749+
result = _range(of: strToFind, toHalfWidth: toHalfWidth, diacriticsInsensitive: diacriticsInsensitive, caseFold: caseFold, anchored: anchored, backwards: backwards)
750+
}
751+
752+
return result
753+
}
754+
755+
var _isASCII: Bool {
756+
var mutated = self
757+
return mutated.withUTF8 {
758+
_allASCII($0)
759+
}
760+
}
761+
762+
func _components(separatedBy separator: Substring, options: String.CompareOptions = []) throws -> [String] {
763+
var result = [String]()
764+
var searchStart = startIndex
765+
while searchStart < endIndex {
766+
let r = try self[searchStart...]._range(of: separator, options: options)
767+
guard let r, !r.isEmpty else {
768+
break
769+
}
770+
771+
result.append(String(self[searchStart ..< r.lowerBound]))
772+
searchStart = r.upperBound
773+
}
774+
775+
result.append(String(self[searchStart..<endIndex]))
776+
777+
return result
681778
}
682779
}
683780

@@ -703,6 +800,42 @@ extension Substring.UnicodeScalarView {
703800

704801
return ComparisonResult(stringIndex: idx1, idx2: idx2, endIndex1: endIndex, endIndex2: other.endIndex)
705802
}
803+
804+
func _rangeOfCharacter(anchored: Bool, backwards: Bool, matchingPredicate predicate: (Unicode.Scalar) -> Bool) -> Range<Index>? {
805+
guard !isEmpty else { return nil }
806+
807+
let fromLoc: String.Index
808+
let toLoc: String.Index
809+
let step: Int
810+
if backwards {
811+
fromLoc = index(before: endIndex)
812+
toLoc = anchored ? fromLoc : startIndex
813+
step = -1
814+
} else {
815+
fromLoc = startIndex
816+
toLoc = anchored ? fromLoc : index(before: endIndex)
817+
step = 1
818+
}
819+
820+
var done = false
821+
var found = false
822+
823+
var idx = fromLoc
824+
while !done {
825+
let ch = self[idx]
826+
if predicate(ch) {
827+
done = true
828+
found = true
829+
} else if idx == toLoc {
830+
done = true
831+
} else {
832+
formIndex(&idx, offsetBy: step)
833+
}
834+
}
835+
836+
guard found else { return nil }
837+
return idx..<index(after: idx)
838+
}
706839
}
707840

708841
// MARK: - ComparisonResult Extension
@@ -744,3 +877,34 @@ extension BidirectionalCollection {
744877
return idx
745878
}
746879
}
880+
881+
// Borrowed from stdlib
882+
internal func _allASCII(_ input: UnsafeBufferPointer<UInt8>) -> Bool {
883+
if input.isEmpty { return true }
884+
let ptr = input.baseAddress.unsafelyUnwrapped
885+
var i = 0
886+
887+
let count = input.count
888+
let stride = MemoryLayout<UInt>.stride
889+
let address = Int(bitPattern: ptr)
890+
891+
let wordASCIIMask = UInt(truncatingIfNeeded: 0x8080_8080_8080_8080 as UInt64)
892+
let byteASCIIMask = UInt8(truncatingIfNeeded: wordASCIIMask)
893+
894+
while (address &+ i) % stride != 0 && i < count {
895+
guard ptr[i] & byteASCIIMask == 0 else { return false }
896+
i &+= 1
897+
}
898+
899+
while (i &+ stride) <= count {
900+
let word: UInt = UnsafePointer(bitPattern: address &+ i).unsafelyUnwrapped.pointee
901+
guard word & wordASCIIMask == 0 else { return false }
902+
i &+= stride
903+
}
904+
905+
while i < count {
906+
guard ptr[i] & byteASCIIMask == 0 else { return false }
907+
i &+= 1
908+
}
909+
return true
910+
}

Sources/FoundationEssentials/String/String+Essentials.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ extension String {
1616
var new = ""
1717
new.reserveCapacity(utf8.count)
1818

19-
let uppercaseSet = BuiltInUnicodeScalarSet.uppercaseLetter
20-
let lowercaseSet = BuiltInUnicodeScalarSet.lowercaseLetter
21-
let cfcaseIgnorableSet = BuiltInUnicodeScalarSet.caseIgnorable
19+
let uppercaseSet = BuiltInUnicodeScalarSet.uppercaseLetters
20+
let lowercaseSet = BuiltInUnicodeScalarSet.lowercaseLetters
21+
let cfcaseIgnorableSet = BuiltInUnicodeScalarSet.caseIgnorables
2222

2323
var isLastCased = false
2424
for scalar in unicodeScalars {

0 commit comments

Comments
 (0)