|
14 | 14 | // MARK: - Private extensions for parsing encoding names |
15 | 15 |
|
16 | 16 | private extension Unicode.Scalar { |
17 | | - var _isASCIINumeric: Bool { |
18 | | - return ("0"..."9").contains(self) |
19 | | - } |
20 | | - |
21 | | - var _asciiNumericValue: Int { |
22 | | - assert(_isASCIINumeric) |
23 | | - return Int(self.value - 0x30) |
24 | | - } |
25 | | - |
26 | 17 | /// Returns the Boolean value that indicates whether or not `self` is "ASCII whitespace". |
27 | 18 | /// |
28 | 19 | /// Reference: https://infra.spec.whatwg.org/#ascii-whitespace |
@@ -99,68 +90,6 @@ extension StringEncodingNameTokenizer where Self: ~Copyable { |
99 | 90 | } |
100 | 91 | } |
101 | 92 |
|
102 | | -/// ICU-independent parser that follows [Charset Alias Matching](https://www.unicode.org/reports/tr22/tr22-8.html#Charset_Alias_Matching). |
103 | | -private struct UTS22Tokenizer: StringEncodingNameTokenizer, ~Copyable { |
104 | | - enum Token: Equatable { |
105 | | - case numeric(Int) |
106 | | - case alphabet(ASCIICaseInsensitiveUnicodeScalar) |
107 | | - } |
108 | | - |
109 | | - enum Error: Swift.Error { |
110 | | - case tooLargeNumericValue |
111 | | - } |
112 | | - |
113 | | - let scalars: String.UnicodeScalarView |
114 | | - |
115 | | - private var _currentIndex: String.UnicodeScalarView.Index |
116 | | - |
117 | | - init(name: String) { |
118 | | - self.scalars = name.unicodeScalars |
119 | | - self._currentIndex = scalars.startIndex |
120 | | - } |
121 | | - |
122 | | - mutating func nextToken() throws -> Token? { |
123 | | - guard _currentIndex < scalars.endIndex else { |
124 | | - return nil |
125 | | - } |
126 | | - |
127 | | - let scalar = scalars[_currentIndex] |
128 | | - switch scalar { |
129 | | - case "0"..."9": |
130 | | - // Parse a numeric value ignoring leading zeros. |
131 | | - // |
132 | | - // NOTE: To prevent the value from overflow, a threhold is set here. |
133 | | - // The max number of digits to be expected is 8 as of now: i.g. `csISO42JISC62261978`. |
134 | | - // It wouldn't matter to throw an error in practice when the value is too large. |
135 | | - |
136 | | - let threshold: Int = 999_999_999 |
137 | | - var value = scalar._asciiNumericValue |
138 | | - scalars.formIndex(after: &_currentIndex) |
139 | | - while _currentIndex < scalars.endIndex { |
140 | | - let currentScalar = scalars[_currentIndex] |
141 | | - guard currentScalar._isASCIINumeric else { |
142 | | - break |
143 | | - } |
144 | | - value = value * 10 + currentScalar._asciiNumericValue |
145 | | - if value > threshold { |
146 | | - throw Error.tooLargeNumericValue |
147 | | - } |
148 | | - scalars.formIndex(after: &_currentIndex) |
149 | | - } |
150 | | - return .numeric(value) |
151 | | - case "A"..."Z", "a"..."z": |
152 | | - scalars.formIndex(after: &_currentIndex) |
153 | | - return .alphabet(ASCIICaseInsensitiveUnicodeScalar(scalar)) |
154 | | - default: |
155 | | - scalars.formIndex(after: &_currentIndex) |
156 | | - if _currentIndex < scalars.endIndex { |
157 | | - return try nextToken() |
158 | | - } |
159 | | - return nil |
160 | | - } |
161 | | - } |
162 | | -} |
163 | | - |
164 | 93 |
|
165 | 94 | /// A parser that tokenizes a string into `ASCIICaseInsensitiveUnicodeScalar`s. |
166 | 95 | private struct ASCIICaseInsensitiveTokenizer: StringEncodingNameTokenizer, ~Copyable { |
|
0 commit comments