-
Notifications
You must be signed in to change notification settings - Fork 441
/
Copy pathSyntaxText.swift
278 lines (247 loc) · 10.1 KB
/
SyntaxText.swift
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2023 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#if canImport(Darwin)
@_implementationOnly import Darwin
#elseif canImport(Glibc)
@_implementationOnly import Glibc
#elseif canImport(Musl)
@_implementationOnly import Musl
#endif
/// Represent a string.
///
/// This type does not own the string data. The data reside in some other buffer
/// whose lifetime extends past that of the SyntaxText.
///
/// ``SyntaxText`` is a `Collection` of `UInt8` which is _expected_ to be a UTF8
/// encoded byte sequence. However, since that is essentially just a span of a
/// memory buffer, it may contain ill-formed UTF8 sequences. And their
/// comparison (e.g.`==`, hasPrefix()) are purely based on the byte sequences,
/// without any Unicode normalization or anything.
///
/// Since it's just a byte sequence, ``SyntaxText`` can represent the exact source
/// buffer regardless of whether it is a valid UTF8. When creating
/// `Swift.String`, ill-formed UTF8 sequences are replaced with the Unicode
/// replacement character (`\u{FFFD}`).
@_spi(RawSyntax)
public struct SyntaxText {
var buffer: UnsafeBufferPointer<UInt8>
/// Construct a ``SyntaxText`` whose text is represented by the given `buffer`.
public init(buffer: UnsafeBufferPointer<UInt8>) {
self.buffer = buffer
}
/// Construct a ``SyntaxText`` whose text is represented by the memory starting
/// at `baseAddress` and ranging `count` bytes.
///
/// If count is not zero, `baseAddress` must not be `nil`.
public init(baseAddress: UnsafePointer<UInt8>?, count: Int) {
precondition(
count == 0 || baseAddress != nil,
"If count is not zero, base address must be exist"
)
buffer = .init(start: baseAddress, count: count)
}
/// Creates an empty ``SyntaxText``
public init() {
self.init(baseAddress: nil, count: 0)
}
/// Creates a ``SyntaxText`` from a `StaticString`
public init(_ string: StaticString) {
self.init(baseAddress: string.utf8Start, count: string.utf8CodeUnitCount)
}
/// Creates a ``SyntaxText`` over the same memory as the given slice.
public init(rebasing slice: SubSequence) {
self.init(
baseAddress: slice.base.baseAddress?.advanced(by: slice.startIndex),
count: slice.count
)
}
/// Base address of the memory range this string refers to.
///
/// If the `baseAddress` is `nil`, the text is empty. However, text can be
/// `isEmpty` even with a non-`nil` base address.
public var baseAddress: UnsafePointer<UInt8>? {
buffer.baseAddress
}
/// Byte length of this string.
public var count: Int {
buffer.count
}
/// A Boolean value indicating whether a string has no characters.
public var isEmpty: Bool {
buffer.isEmpty
}
/// Returns `true` if the memory range of this string is a part of `other`.
///
/// `SyntaxText(rebasing: text[n ..< m]).isSliceOf(text)` is always true as
/// long as `n` and `m` are valid indices.
public func isSlice(of other: SyntaxText) -> Bool {
// If either of it is empty, returns 'true' only if both are empty.
// Otherwise, returns 'false'.
guard !self.isEmpty && !other.isEmpty else {
return self.isEmpty && other.isEmpty
}
let selfEndBound = UnsafePointer<UInt8>(self.baseAddress! + count)
let otherEndBound = UnsafePointer<UInt8>(other.baseAddress! + other.count)
return (other.baseAddress! <= self.baseAddress!) && (selfEndBound <= otherEndBound)
}
/// Returns `true` if `other` is a substring of this ``SyntaxText``.
public func contains(_ other: SyntaxText) -> Bool {
return firstRange(of: other) != nil
}
/// Finds and returns the range of the first occurrence of `other` within this
/// string. Returns `nil` if `other` is not found.
public func firstRange(of other: SyntaxText) -> Range<Index>? {
if other.isEmpty { return nil }
let stop = self.count - other.count
var start = 0
// If 'other' is longer than 'self', 'stop' is less than zero, so the
// condition is never satisfied.
while start <= stop {
// Force unwrappings are safe because we know 'self' and 'other' are both
// not empty.
if compareMemory(self.baseAddress! + start, other.baseAddress!, other.count) {
return start..<(start + other.count)
} else {
start += 1
}
}
return nil
}
/// Returns `true` if the string begins with the specified prefix.
public func hasPrefix(_ other: SyntaxText) -> Bool {
guard self.count >= other.count else { return false }
guard !other.isEmpty else { return true }
let prefixSlice = self[0..<other.count]
return Self(rebasing: prefixSlice) == other
}
/// Returns `true` if the string ends with the specified suffix.
public func hasSuffix(_ other: SyntaxText) -> Bool {
guard self.count >= other.count else { return false }
guard !other.isEmpty else { return true }
let suffixSlice = self[(self.count - other.count)..<self.count]
return Self(rebasing: suffixSlice) == other
}
}
/// ``SyntaxText`` is a collection of `UInt8`.
extension SyntaxText: RandomAccessCollection {
/// SyntaxText operates on bytes and each byte is represented by a `UInt8`.
public typealias Element = UInt8
/// ``SyntaxText`` is a continuous memory region that can be accessed by an integer.
public typealias Index = Int
/// `Slice<SyntaxText>` represents a part of a ``SyntaxText``.
public typealias SubSequence = Slice<SyntaxText>
/// The index of the first byte in ``SyntaxText``
public var startIndex: Index { buffer.startIndex }
/// The index one after the last byte in ``SyntaxText``.
public var endIndex: Index { buffer.endIndex }
/// Access the byte at `index`.
public subscript(index: Index) -> Element {
get { return buffer[index] }
}
}
extension SyntaxText: Hashable {
/// Returns `true` if `lhs` and `rhs` contain the same bytes.
public static func == (lhs: SyntaxText, rhs: SyntaxText) -> Bool {
if lhs.buffer.count != rhs.buffer.count {
return false
}
guard let lBase = lhs.baseAddress, let rBase = rhs.baseAddress else {
// If either `baseAddress` is `nil`, both are empty so returns `true`.
return true
}
// We don't do `lhs.baseAddress == rhs.baseAddress` shortcut, because in
// SwiftSyntax use cases, comparing the same SyntaxText instances is
// extremely rare, and checking it causes extra branch.
// The most common usage is comparing parsed text with a static text e.g.
// `token.text == "func"`. In such cases `compareMemory`(`memcmp`) is
// optimized to a `cmp` or similar opcode if either operand is a short static
// text. So the same-baseAddress shortcut doesn't give us a huge performance
// boost even if they actually refer the same memory.
return compareMemory(lBase, rBase, lhs.count)
}
/// Hash the contents of this ``SyntaxText`` into `hasher`.
public func hash(into hasher: inout Hasher) {
hasher.combine(bytes: .init(buffer))
}
}
extension SyntaxText: ExpressibleByStringLiteral {
/// We can always safely create ``SyntaxText`` from a ``StaticString`` because
/// ``StaticString`` is guaranteed to be alive for the entire execution
/// duration of the process.
public init(stringLiteral value: StaticString) { self.init(value) }
/// We can always safely create ``SyntaxText`` from a ``StaticString`` because
/// ``StaticString`` is guaranteed to be alive for the entire execution
/// duration of the process.
public init(unicodeScalarLiteral value: StaticString) { self.init(value) }
/// We can always safely create ``SyntaxText`` from a ``StaticString`` because
/// ``StaticString`` is guaranteed to be alive for the entire execution
/// duration of the process.
public init(extendedGraphemeClusterLiteral value: StaticString) { self.init(value) }
}
extension SyntaxText: CustomStringConvertible {
/// The contents of this ``SyntaxText`` as a ``String``.
///
/// Note that ``SyntaxText`` can represent invalid Unicode, while ``String``
/// cannot, so if this text contains invalid UTF-8, the conversion is lossy.
public var description: String { String(syntaxText: self) }
}
extension SyntaxText: CustomDebugStringConvertible {
/// The string value of this text, which may be lossy if the text contains
/// invalid Unicode. Don’t rely on this value being stable.
public var debugDescription: String { description.debugDescription }
}
extension String {
/// Creates a `String` from a ``SyntaxText``.
///
/// Ill-formed UTF-8 sequences in `syntaxText` are replaced with the Unicode
/// replacement character `\u{FFFD}`.
@_spi(RawSyntax)
public init(syntaxText: SyntaxText) {
guard !syntaxText.isEmpty else {
self = ""
return
}
if #available(macOS 11.0, iOS 14.0, watchOS 7.0, tvOS 14.0, *) {
self.init(unsafeUninitializedCapacity: syntaxText.count) { strBuffer in
strBuffer.initialize(from: syntaxText.buffer).1
}
} else {
self.init(decoding: syntaxText, as: UTF8.self)
}
}
/// Runs `body` with a ``SyntaxText`` that refers the contiguous memory of this
/// string. Like `String.withUTF8(_:)`, this may mutate the string if this
/// string was not contiguous.
@_spi(RawSyntax)
public mutating func withSyntaxText<R>(
_ body: (SyntaxText) throws -> R
) rethrows -> R {
try withUTF8 { utf8 in
try body(SyntaxText(baseAddress: utf8.baseAddress, count: utf8.count))
}
}
}
private func compareMemory(
_ s1: UnsafePointer<UInt8>,
_ s2: UnsafePointer<UInt8>,
_ count: Int
) -> Bool {
precondition(count >= 0)
#if canImport(Darwin)
return Darwin.memcmp(s1, s2, count) == 0
#elseif canImport(Glibc)
return Glibc.memcmp(s1, s2, count) == 0
#else
return UnsafeBufferPointer(start: s1, count: count)
.elementsEqual(UnsafeBufferPointer(start: s2, count: count))
#endif
}