|
10 | 10 | // |
11 | 11 | //===----------------------------------------------------------------------===// |
12 | 12 |
|
| 13 | +#if FOUNDATION_FRAMEWORK |
| 14 | +@_spi(_Unicode) import Swift |
| 15 | +@_implementationOnly import Foundation_Private.NSString |
| 16 | +#endif |
| 17 | + |
| 18 | +#if canImport(Darwin) |
| 19 | +import Darwin |
| 20 | +#endif |
| 21 | + |
13 | 22 | extension String { |
14 | 23 | package func _trimmingWhitespace() -> String { |
15 | 24 | String(unicodeScalars._trimmingCharacters { |
@@ -52,5 +61,199 @@ extension String { |
52 | 61 | } |
53 | 62 | self = str |
54 | 63 | } |
| 64 | + |
| 65 | + enum _NormalizationType { |
| 66 | + case canonical |
| 67 | + case hfsPlus |
| 68 | + |
| 69 | + fileprivate var setType: BuiltInUnicodeScalarSet.SetType { |
| 70 | + switch self { |
| 71 | + case .canonical: .canonicalDecomposable |
| 72 | + case .hfsPlus: .hfsPlusDecomposable |
| 73 | + } |
| 74 | + } |
| 75 | + } |
| 76 | + |
| 77 | + private func _decomposed(_ type: String._NormalizationType, into buffer: UnsafeMutableBufferPointer<UInt8>, nullTerminated: Bool = false) -> Int? { |
| 78 | + var copy = self |
| 79 | + return copy.withUTF8 { |
| 80 | + try? $0._decomposed(type, as: Unicode.UTF8.self, into: buffer, nullTerminated: nullTerminated) |
| 81 | + } |
| 82 | + } |
| 83 | + |
| 84 | + #if canImport(Darwin) || FOUNDATION_FRAMEWORK |
| 85 | + fileprivate func _fileSystemRepresentation(into buffer: UnsafeMutableBufferPointer<CChar>) -> Bool { |
| 86 | + let result = buffer.withMemoryRebound(to: UInt8.self) { uintBuffer in |
| 87 | + let newBuffer = UnsafeMutableBufferPointer(start: uintBuffer.baseAddress, count: uintBuffer.count - 1) |
| 88 | + return _decomposed(.hfsPlus, into: newBuffer, nullTerminated: true) |
| 89 | + } |
| 90 | + |
| 91 | + return result != nil |
| 92 | + } |
| 93 | + #endif |
| 94 | + |
| 95 | + package func withFileSystemRepresentation<R>(_ block: (UnsafePointer<CChar>?) throws -> R) rethrows -> R { |
| 96 | + #if canImport(Darwin) || FOUNDATION_FRAMEWORK |
| 97 | + try withUnsafeTemporaryAllocation(of: CChar.self, capacity: Int(PATH_MAX)) { buffer in |
| 98 | + guard _fileSystemRepresentation(into: buffer) else { |
| 99 | + return try block(nil) |
| 100 | + } |
| 101 | + return try block(buffer.baseAddress!) |
| 102 | + } |
| 103 | + #else |
| 104 | + try self.withCString { |
| 105 | + try block($0) |
| 106 | + } |
| 107 | + #endif |
| 108 | + } |
| 109 | +} |
| 110 | + |
| 111 | +extension UnsafeBufferPointer { |
| 112 | + private enum DecompositionError : Error { |
| 113 | + case insufficientSpace |
| 114 | + case illegalScalar |
| 115 | + case decodingError |
| 116 | + } |
| 117 | + |
| 118 | + fileprivate func _decomposedRebinding<T: UnicodeCodec, InputElement>(_ type: String._NormalizationType, as codec: T.Type, into buffer: UnsafeMutableBufferPointer<InputElement>, nullTerminated: Bool = false) throws -> Int { |
| 119 | + try self.withMemoryRebound(to: T.CodeUnit.self) { reboundSelf in |
| 120 | + try buffer.withMemoryRebound(to: Unicode.UTF8.CodeUnit.self) { reboundBuffer in |
| 121 | + try reboundSelf._decomposed(type, as: codec, into: reboundBuffer, nullTerminated: nullTerminated) |
| 122 | + } |
| 123 | + } |
| 124 | + } |
| 125 | + |
| 126 | + fileprivate func _decomposed<T: UnicodeCodec>(_ type: String._NormalizationType, as codec: T.Type, into buffer: UnsafeMutableBufferPointer<UInt8>, nullTerminated: Bool = false) throws -> Int where Element == T.CodeUnit { |
| 127 | + let scalarSet = BuiltInUnicodeScalarSet(type: type.setType) |
| 128 | + var bufferIdx = 0 |
| 129 | + let bufferLength = buffer.count |
| 130 | + var sortBuffer: [UnicodeScalar] = [] |
| 131 | + var seenNullIdx: Int? = nil |
| 132 | + var decoder = T() |
| 133 | + var iterator = self.makeIterator() |
| 134 | + |
| 135 | + func appendOutput(_ values: some Sequence<UInt8>) throws { |
| 136 | + let bufferPortion = UnsafeMutableBufferPointer(start: buffer.baseAddress!.advanced(by: bufferIdx), count: bufferLength - bufferIdx) |
| 137 | + var (leftOver, idx) = bufferPortion.initialize(from: values) |
| 138 | + bufferIdx += idx |
| 139 | + if bufferIdx == bufferLength && leftOver.next() != nil { |
| 140 | + throw DecompositionError.insufficientSpace |
| 141 | + } |
| 142 | + } |
| 143 | + |
| 144 | + func appendOutput(_ value: UInt8) throws { |
| 145 | + guard bufferIdx < bufferLength else { |
| 146 | + throw DecompositionError.insufficientSpace |
| 147 | + } |
| 148 | + buffer.initializeElement(at: bufferIdx, to: value) |
| 149 | + bufferIdx += 1 |
| 150 | + } |
| 151 | + |
| 152 | + func encodedScalar(_ scalar: UnicodeScalar) throws -> some Collection<UInt8> { |
| 153 | + guard let encoded = UTF8.encode(scalar) else { |
| 154 | + throw DecompositionError.illegalScalar |
| 155 | + } |
| 156 | + return encoded |
| 157 | + } |
| 158 | + |
| 159 | + func fillFromSortBuffer() throws { |
| 160 | + guard !sortBuffer.isEmpty else { return } |
| 161 | + sortBuffer.sort { |
| 162 | + $0.properties.canonicalCombiningClass.rawValue < $1.properties.canonicalCombiningClass.rawValue |
| 163 | + } |
| 164 | + for scalar in sortBuffer { |
| 165 | + try appendOutput(encodedScalar(scalar)) |
| 166 | + } |
| 167 | + sortBuffer.removeAll(keepingCapacity: true) |
| 168 | + } |
| 169 | + |
| 170 | + decodingLoop: while bufferIdx < bufferLength { |
| 171 | + var scalar: UnicodeScalar |
| 172 | + switch decoder.decode(&iterator) { |
| 173 | + // We've finished the input, return the index |
| 174 | + case .emptyInput: break decodingLoop |
| 175 | + case .error: throw DecompositionError.decodingError |
| 176 | + case .scalarValue(let v): scalar = v |
| 177 | + } |
| 178 | + |
| 179 | + if scalar.value == 0 { |
| 180 | + // Null bytes within the string are fine as long as they are at the end |
| 181 | + seenNullIdx = bufferIdx |
| 182 | + } else if seenNullIdx != nil { |
| 183 | + // File system representations are c-strings that do not support embedded null bytes |
| 184 | + throw DecompositionError.illegalScalar |
| 185 | + } |
| 186 | + |
| 187 | + let isASCII = scalar.isASCII |
| 188 | + if isASCII || scalar.properties.canonicalCombiningClass == .notReordered { |
| 189 | + try fillFromSortBuffer() |
| 190 | + } |
| 191 | + |
| 192 | + if isASCII { |
| 193 | + try appendOutput(UInt8(scalar.value)) |
| 194 | + } else { |
| 195 | +#if FOUNDATION_FRAMEWORK |
| 196 | + // Only decompose scalars present in the declared set |
| 197 | + if scalarSet.contains(scalar) { |
| 198 | + sortBuffer.append(contentsOf: String(scalar)._nfd) |
| 199 | + } else { |
| 200 | + // Even if a scalar isn't decomposed, it may still need to be re-ordered |
| 201 | + sortBuffer.append(scalar) |
| 202 | + } |
| 203 | +#else |
| 204 | + // TODO: Implement Unicode decomposition in swift-foundation |
| 205 | + sortBuffer.append(scalar) |
| 206 | +#endif |
| 207 | + } |
| 208 | + } |
| 209 | + try fillFromSortBuffer() |
| 210 | + |
| 211 | + if iterator.next() != nil { |
| 212 | + throw DecompositionError.insufficientSpace |
| 213 | + } else { |
| 214 | + if let seenNullIdx { |
| 215 | + return seenNullIdx + 1 |
| 216 | + } |
| 217 | + if nullTerminated { |
| 218 | + try appendOutput(0) |
| 219 | + } |
| 220 | + return bufferIdx |
| 221 | + } |
| 222 | + } |
| 223 | +} |
55 | 224 |
|
| 225 | +#if FOUNDATION_FRAMEWORK |
| 226 | +@objc |
| 227 | +extension NSString { |
| 228 | + @objc |
| 229 | + func __swiftFillFileSystemRepresentation(pointer: UnsafeMutablePointer<CChar>, maxLength: Int) -> Bool { |
| 230 | + let buffer = UnsafeMutableBufferPointer(start: pointer, count: maxLength) |
| 231 | + // See if we have a quick-access buffer we can just convert directly |
| 232 | + if let fastCharacters = self._fastCharacterContents() { |
| 233 | + // If we have quick access to UTF-16 contents, decompose from UTF-16 |
| 234 | + let charsBuffer = UnsafeBufferPointer(start: fastCharacters, count: self.length) |
| 235 | + return (try? charsBuffer._decomposedRebinding(.hfsPlus, as: Unicode.UTF16.self, into: buffer, nullTerminated: true)) != nil |
| 236 | + } else if self.fastestEncoding == NSASCIIStringEncoding, let fastUTF8 = self._fastCStringContents(false) { |
| 237 | + // If we have quick access to ASCII contents, no need to decompose |
| 238 | + let utf8Buffer = UnsafeBufferPointer(start: fastUTF8, count: self.length) |
| 239 | + |
| 240 | + // We only allow embedded nulls if there are no non-null characters following the first null character |
| 241 | + if let embeddedNullIdx = utf8Buffer.firstIndex(of: 0) { |
| 242 | + if !utf8Buffer[embeddedNullIdx...].allSatisfy({ $0 == 0 }) { |
| 243 | + return false |
| 244 | + } |
| 245 | + } |
| 246 | + |
| 247 | + let next = buffer.initialize(fromContentsOf: utf8Buffer) |
| 248 | + guard next < buffer.endIndex else { |
| 249 | + return false |
| 250 | + } |
| 251 | + buffer[next] = 0 |
| 252 | + return true |
| 253 | + } else { |
| 254 | + // Otherwise, bridge to a String which will create a UTF-8 buffer |
| 255 | + return String(self)._fileSystemRepresentation(into: buffer) |
| 256 | + } |
| 257 | + } |
56 | 258 | } |
| 259 | +#endif |
0 commit comments