-
Notifications
You must be signed in to change notification settings - Fork 441
/
Copy pathSyntaxText.swift
230 lines (205 loc) · 8.25 KB
/
SyntaxText.swift
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
/// Represent a string.
///
/// This type does not own the string data. The data reside in some other buffer
/// whose lifetime extends past that of the SyntaxText.
///
/// `SyntaxText` is a `Collection` of `UInt8` which is _expected_ to be a UTF8
/// encoded byte sequence. However, since that is essentialy just a span of a
/// memory buffer, it may contain ill-formed UTF8 sequences. And their
/// comparision (e.g.`==`, hasPrefix()) are purely based on the byte squences,
/// without any Unicode normalization or anything.
///
/// Since it's just a byte sequence, `SyntaxText` can represent the exact source
/// buffer regardless of whether it is a valid UTF8. When creating
/// `Swift.String`, ill-formed UTF8 sequences are replaced with the Unicode
/// replacement character (`\u{FFFD}`).
@_spi(RawSyntax)
public struct SyntaxText {
var buffer: UnsafeBufferPointer<UInt8>
public init(baseAddress: UnsafePointer<UInt8>?, count: Int) {
assert(count == 0 || baseAddress != nil,
"If count is not zero, base address must be exist")
buffer = .init(start: baseAddress, count: count)
}
/// Creates an empty `SyntaxText`
public init() {
self.init(baseAddress: nil, count: 0)
}
/// Creates a `SyntaxText` from a `StaticString`
public init(_ string: StaticString) {
self.init(baseAddress: string.utf8Start, count: string.utf8CodeUnitCount)
}
/// Creates a `SyntaxText` over the same memory as the given slice.
public init(rebasing slice: SubSequence) {
self.init(
baseAddress: slice.base.baseAddress?.advanced(by: slice.startIndex),
count: slice.count)
}
/// Base address of the memory range this string refers to.
///
/// If the `baseAddress` is `nil`, the text is empty. However, text can be
/// `isEmpty` even with a non-`nil` base address.
public var baseAddress: UnsafePointer<UInt8>? {
buffer.baseAddress
}
/// Byte length of this string.
public var count: Int {
buffer.count
}
/// A Boolean value indicating whether a string has no characters.
public var isEmpty: Bool {
buffer.isEmpty
}
/// Returns `true` if the memory range of this string is a part of `other`.
///
/// `SyntaxText(rebasing: text[n ..< m]).isSliceOf(text)` is always true as
/// long as `n` and `m` are valid indices.
public func isSlice(of other: SyntaxText) -> Bool {
// If either of it is empty, returns 'true' only if both are empty.
// Otherwise, returns 'false'.
guard !self.isEmpty && !other.isEmpty else {
return self.isEmpty && other.isEmpty
}
return (other.baseAddress! <= self.baseAddress! &&
self.baseAddress! + count <= other.baseAddress! + other.count)
}
/// Returns `true` if `other` is a substring of this `SyntaxText`.
public func contains(_ other: SyntaxText) -> Bool {
return firstRange(of: other) != nil
}
/// Finds and returns the range of the first occurrence of `other` within this
/// string. Returns `nil` if `other` is not found.
public func firstRange(of other: SyntaxText) -> Range<Index>? {
if other.isEmpty { return nil }
let stop = self.count - other.count
var start = 0
// If 'other' is longer than 'self', 'stop' is less than zero, so the
// condition is never satisfied.
while start <= stop {
// Force unwrappings are safe because we know 'self' and 'other' are both
// not empty.
if compareMemory(self.baseAddress! + start, other.baseAddress!, other.count) {
return start ..< (start + other.count)
} else {
start += 1
}
}
return nil
}
/// Returns `true` if the string begins with the specified prefix.
public func hasPrefix(_ other: SyntaxText) -> Bool {
guard self.count >= other.count else { return false }
guard !other.isEmpty else { return true }
let prefixSlice = self[0 ..< other.count]
return Self(rebasing: prefixSlice) == other
}
/// Returns `true` if the string ends with the specified suffix.
public func hasSuffix(_ other: SyntaxText) -> Bool {
guard self.count >= other.count else { return false }
guard !other.isEmpty else { return true }
let suffixSlice = self[(self.count - other.count) ..< self.count]
return Self(rebasing: suffixSlice) == other
}
}
/// `SyntaxText` is a collection of `UInt8`.
extension SyntaxText: RandomAccessCollection {
public typealias Element = UInt8
public typealias Index = Int
public typealias SubSequence = Slice<SyntaxText>
public var startIndex: Index { buffer.startIndex }
public var endIndex: Index { buffer.endIndex }
public subscript(position: Index) -> Element {
get { return buffer[position] }
}
}
extension SyntaxText: Hashable {
public static func ==(lhs: SyntaxText, rhs: SyntaxText) -> Bool {
if lhs.buffer.count != rhs.buffer.count {
return false
}
guard let lBase = lhs.baseAddress, let rBase = rhs.baseAddress else {
// If either `baseAddress` is `nil`, both are empty so returns `true`.
return true
}
// We don't do `lhs.baseAddress == rhs.baseAddress` shortcut, because in
// SwiftSyntax use cases, comparing the same SyntaxText instances is
// extremely rare, and checking it causes extra branch.
// The most common usage is comparing parsed text with a static text e.g.
// `token.text == "func"`. In such cases `compareMemory`(`memcmp`) is
// optimzed to a `cmp` or similar opcode if either operand is a short static
// text. So the same-baseAddress shortcut doesn't give us a huge performance
// boost even if they actually refer the same memory.
return compareMemory(lBase, rBase, lhs.count)
}
public func hash(into hasher: inout Hasher) {
hasher.combine(bytes: .init(buffer))
}
}
extension SyntaxText: ExpressibleByStringLiteral {
public init(stringLiteral value: StaticString) { self.init(value) }
public init(unicodeScalarLiteral value: StaticString) { self.init(value) }
public init(extendedGraphemeClusterLiteral value: StaticString) { self.init(value) }
}
extension SyntaxText: CustomStringConvertible {
public var description: String { String(syntaxText: self) }
}
extension SyntaxText: CustomDebugStringConvertible {
public var debugDescription: String { description.debugDescription }
}
extension String {
/// Creates a `String` from a `SyntaxText`.
///
/// Ill-formed UTF-8 sequences in `syntaxText` are replaced with the Unicode
/// replacement character `\u{FFFD}`.
@_spi(RawSyntax)
public init(syntaxText: SyntaxText) {
guard !syntaxText.isEmpty else {
self = ""
return
}
if #available(macOS 11.0, iOS 14.0, watchOS 7.0, tvOS 14.0, *) {
self.init(unsafeUninitializedCapacity: syntaxText.count) { strBuffer in
strBuffer.initialize(from: syntaxText.buffer).1
}
} else {
self.init(decoding: syntaxText, as: UTF8.self)
}
}
/// Runs `body` with a `SyntaxText` that refers the contiguous memory of this
/// string. Like `String.withUTF8(_:)`, this may mutates the string if this
/// string was not contiguous.
@_spi(RawSyntax)
public mutating func withSyntaxText<R>(
_ body: (SyntaxText) throws -> R
) rethrows -> R {
try withUTF8 { utf8 in
try body(SyntaxText(baseAddress: utf8.baseAddress, count: utf8.count))
}
}
}
#if os(macOS) || os(iOS) || os(tvOS) || os(watchOS) || os(Linux) || os(FreeBSD) || os(OpenBSD)
@_silgen_name("memcmp")
public func memcmp(_: UnsafeRawPointer?, _: UnsafeRawPointer?, _: Int) -> Int32
#endif
private func compareMemory(
_ s1: UnsafePointer<UInt8>, _ s2: UnsafePointer<UInt8>, _ count: Int
) -> Bool {
assert(count >= 0)
#if os(macOS) || os(iOS) || os(tvOS) || os(watchOS) || os(Linux) || os(FreeBSD) || os(OpenBSD)
return memcmp(s1, s2, count) == 0
#else
return UnsafeBufferPointer(start: s1, count: count)
.elementsEqual(UnsafeBufferPointer(start: s2, count: count))
#endif
}