Skip to content

Commit 7db3952

Browse files
committed
Regex<Captures> -> Regex<Match>
Changes `Regex` and result builder prototype to use `Match` as the generic parameter to make it consistent with the [Strongly Typed Regex Captures](https://forums.swift.org/t/pitch-strongly-typed-regex-captures/53391) pitch. Introduces `Tuple<n>` structs in order to be able to express constraints on capture types (i.e. `Match` dropped first) while being able to filter out empty captures in concatenation. `Tuple<n>` is also needed to implement a prototype of the [proposed matching semantics](#64). As coercion into `Tuple<n>` can no longer use runtime magic like native tuples do, we incorporate child capture type information into RECode's `captureNil` and `captureArray` instructions so that we will always get a concrete type when forming a nil or an empty array capture. The resulting existential tuple capture can then be opened and bitcast to a `Tuple<n>`.
1 parent 9a06a3e commit 7db3952

File tree

19 files changed

+2856
-2504
lines changed

19 files changed

+2856
-2504
lines changed

Sources/Exercises/Participants/RegexParticipant.swift

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ struct RegexLiteralParticipant: Participant {
3333
}
3434

3535
private func extractFromCaptures(
36-
lower: Substring, upper: Substring?, prop: Substring
36+
_ match: Tuple4<Substring, Substring, Substring?, Substring>
3737
) -> GraphemeBreakEntry? {
38-
guard let lowerScalar = Unicode.Scalar(hex: lower),
39-
let upperScalar = upper.map(Unicode.Scalar.init(hex:)) ?? lowerScalar,
40-
let property = Unicode.GraphemeBreakProperty(prop)
38+
guard let lowerScalar = Unicode.Scalar(hex: match.1),
39+
let upperScalar = match.2.map(Unicode.Scalar.init(hex:)) ?? lowerScalar,
40+
let property = Unicode.GraphemeBreakProperty(match.3)
4141
else {
4242
return nil
4343
}
@@ -48,8 +48,8 @@ private func extractFromCaptures(
4848
private func graphemeBreakPropertyData<RP: RegexProtocol>(
4949
forLine line: String,
5050
using regex: RP
51-
) -> GraphemeBreakEntry? where RP.Capture == (Substring, Substring?, Substring) {
52-
line.match(regex).map(\.captures).flatMap(extractFromCaptures)
51+
) -> GraphemeBreakEntry? where RP.Match == Tuple4<Substring, Substring, Substring?, Substring> {
52+
line.match(regex).map(\.match).flatMap(extractFromCaptures)
5353
}
5454

5555
private func graphemeBreakPropertyData(
@@ -75,5 +75,5 @@ private func graphemeBreakPropertyDataLiteral(
7575
return graphemeBreakPropertyData(
7676
forLine: line,
7777
using: r(#"([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*"#,
78-
capturing: (Substring, Substring?, Substring).self))
78+
matching: Tuple4<Substring, Substring, Substring?, Substring>.self))
7979
}

Sources/VariadicsGenerator/VariadicsGenerator.swift

Lines changed: 106 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
// swift run VariadicsGenerator --max-arity 7 > Sources/RegexDSL/Concatenation.swift
1+
// swift run VariadicsGenerator --max-arity 7 > Sources/_StringProcessing/RegexDSL/Concatenation.swift
22

33
import ArgumentParser
44

55
struct Permutation {
66
let arity: Int
77
// 1 -> no extra constraint
8-
// 0 -> where T.Capture: NoCaptureProtocol
8+
// 0 -> where T.Match: NoCaptureProtocol
99
let bits: Int64
1010

1111
func isCaptureless(at index: Int) -> Bool {
@@ -77,8 +77,6 @@ func outputForEach<C: Collection>(
7777
if let lt = lineTerminator {
7878
let indent = needsSep ? " " : " "
7979
output("\(lt)\n\(indent)")
80-
} else if needsSep {
81-
output(" ")
8280
}
8381
}
8482
}
@@ -87,12 +85,13 @@ typealias Counter = Int64
8785
let patternProtocolName = "RegexProtocol"
8886
let concatenationStructTypeBaseName = "Concatenate"
8987
let capturingGroupTypeBaseName = "CapturingGroup"
88+
let matchAssociatedTypeName = "Match"
9089
let captureAssociatedTypeName = "Capture"
9190
let patternBuilderTypeName = "RegexBuilder"
9291
let patternProtocolRequirementName = "regex"
9392
let PatternTypeBaseName = "Regex"
94-
let emptyProtocolName = "EmptyProtocol"
95-
let emptyStructName = "Empty"
93+
let emptyProtocolName = "EmptyCaptureProtocol"
94+
let baseMatchTypeName = "Substring"
9695

9796
@main
9897
struct VariadicsGenerator: ParsableCommand {
@@ -112,8 +111,13 @@ struct VariadicsGenerator: ParsableCommand {
112111
113112
import _MatchingEngine
114113
114+
115115
""")
116116

117+
for arity in 2...maxArity+1 {
118+
emitTupleStruct(arity: arity)
119+
}
120+
117121
for arity in minArity...maxArity {
118122
for permutation in Permutations(arity: arity) {
119123
emitConcatenation(permutation: permutation)
@@ -124,42 +128,121 @@ struct VariadicsGenerator: ParsableCommand {
124128
output("// END AUTO-GENERATED CONTENT")
125129
}
126130

131+
func emitTupleStruct(arity: Int) {
132+
output("""
133+
@frozen @dynamicMemberLookup
134+
public struct Tuple\(arity)<
135+
""")
136+
outputForEach(0..<arity, separator: ", ") {
137+
"_\($0)"
138+
}
139+
output("> {")
140+
// `public typealias Tuple = (_0, ...)`
141+
output("\n public typealias Tuple = (")
142+
outputForEach(0..<arity, separator: ", ") { "_\($0)" }
143+
output(")")
144+
// `public var tuple: Tuple`
145+
output("\n public var tuple: Tuple\n")
146+
// `subscript(dynamicMember:)`
147+
output("""
148+
public subscript<T>(dynamicMember keyPath: WritableKeyPath<Tuple, T>) -> T {
149+
get { tuple[keyPath: keyPath] }
150+
_modify { yield &tuple[keyPath: keyPath] }
151+
}
152+
""")
153+
output("\n}\n")
154+
output("extension Tuple\(arity): \(emptyProtocolName) where ")
155+
outputForEach(1..<arity, separator: ", ") {
156+
"_\($0): \(emptyProtocolName)"
157+
}
158+
output(" {}\n")
159+
output("extension Tuple\(arity): MatchProtocol {\n")
160+
output(" public typealias Capture = ")
161+
if arity == 2 {
162+
output("_1")
163+
} else {
164+
output("Tuple\(arity-1)<")
165+
outputForEach(1..<arity, separator: ", ") {
166+
"_\($0)"
167+
}
168+
output(">")
169+
}
170+
output("\n public init(_ tuple: Tuple) { self.tuple = tuple }")
171+
// `public init(_0: _0, ...) { ... }`
172+
output("\n public init(")
173+
outputForEach(0..<arity, separator: ", ") {
174+
"_ _\($0): _\($0)"
175+
}
176+
output(") {\n")
177+
output(" self.init((")
178+
outputForEach(0..<arity, separator: ", ") { "_\($0)" }
179+
output("))\n")
180+
output(" }")
181+
output("\n}\n")
182+
// Equatable
183+
output("extension Tuple\(arity): Equatable where ")
184+
outputForEach(0..<arity, separator: ", ") {
185+
"_\($0): Equatable"
186+
}
187+
output(" {\n")
188+
output(" public static func == (lhs: Self, rhs: Self) -> Bool {\n")
189+
output(" ")
190+
outputForEach(0..<arity, separator: " && ") {
191+
"lhs.tuple.\($0) == rhs.tuple.\($0)"
192+
}
193+
output("\n }\n")
194+
output("}\n")
195+
}
196+
127197
func emitConcatenation(permutation: Permutation) {
128198
let arity = permutation.arity
199+
200+
func emitGenericParameters(withConstraints: Bool) {
201+
outputForEach(0..<arity, separator: ", ") {
202+
var base = "T\($0)"
203+
if withConstraints {
204+
base += ": \(patternProtocolName)"
205+
}
206+
return base
207+
}
208+
}
209+
129210
// Emit concatenation type declarations.
130211
// public struct Concatenation{n}_{perm}<...>: RegexProtocol {
131-
// public typealias Capture = ...
132-
// public let regex: Regex
212+
// public typealias Match = ...
213+
// public let regex: Regex<Match>
133214
// public init(...) { ... }
134215
// }
135-
let typeName = "\(concatenationStructTypeBaseName)\(arity)_\(permutation.identifier)"
216+
let typeName =
217+
"\(concatenationStructTypeBaseName)\(arity)_\(permutation.identifier)"
136218
output("public struct \(typeName)<\n ")
137-
outputForEach(0..<arity, separator: ",") { "T\($0): \(patternProtocolName)" }
219+
emitGenericParameters(withConstraints: true)
138220
output("\n>: \(patternProtocolName)")
139221
if permutation.hasCaptureless {
140222
output(" where ")
141-
outputForEach(permutation.capturelessIndices, separator: ",") {
142-
"T\($0).\(captureAssociatedTypeName): \(emptyProtocolName)"
223+
outputForEach(permutation.capturelessIndices, separator: ", ") {
224+
"T\($0).\(matchAssociatedTypeName).\(captureAssociatedTypeName): \(emptyProtocolName)"
143225
}
144226
}
145227
output(" {\n")
146228
let captureIndices = permutation.captureIndices
147-
output(" public typealias \(captureAssociatedTypeName) = ")
229+
output(" public typealias \(matchAssociatedTypeName) = ")
148230
let captureElements = captureIndices
149-
.map { "T\($0).\(captureAssociatedTypeName)" }
231+
.map { "T\($0).\(matchAssociatedTypeName).\(captureAssociatedTypeName)" }
150232
if captureElements.isEmpty {
151-
output(emptyStructName)
233+
output(baseMatchTypeName)
152234
} else {
153-
output("(\(captureElements.joined(separator: ", ")))")
235+
let count = captureElements.count + 1
236+
output("Tuple\(count)<\(baseMatchTypeName), \(captureElements.joined(separator: ", "))>")
154237
}
155238
output("\n")
156-
output(" public let \(patternProtocolRequirementName): \(PatternTypeBaseName)<\(captureAssociatedTypeName)>\n")
239+
output(" public let \(patternProtocolRequirementName): \(PatternTypeBaseName)<\(matchAssociatedTypeName)>\n")
157240
output(" init(")
158-
outputForEach(0..<arity, separator: ",") { "_ x\($0): T\($0)" }
241+
outputForEach(0..<arity, separator: ", ") { "_ x\($0): T\($0)" }
159242
output(") {\n")
160243
output(" \(patternProtocolRequirementName) = .init(ast: concat(\n ")
161244
outputForEach(
162-
0..<arity, separator: ",", lineTerminator: ""
245+
0..<arity, separator: ", ", lineTerminator: ""
163246
) { i in
164247
"x\(i).\(patternProtocolRequirementName).ast"
165248
}
@@ -169,14 +252,14 @@ struct VariadicsGenerator: ParsableCommand {
169252
// Emit concatenation builders.
170253
output("extension \(patternBuilderTypeName) {\n")
171254
output(" public static func buildBlock<")
172-
outputForEach(0..<arity, separator: ",") { "T\($0)" }
255+
emitGenericParameters(withConstraints: true)
173256
output(">(\n ")
174-
outputForEach(0..<arity, separator: ",") { "_ x\($0): T\($0)" }
257+
outputForEach(0..<arity, separator: ", ") { "_ x\($0): T\($0)" }
175258
output("\n ) -> \(typeName)<")
176-
outputForEach(0..<arity, separator: ",") { "T\($0)" }
259+
emitGenericParameters(withConstraints: false)
177260
output("> {\n")
178261
output(" \(typeName)(")
179-
outputForEach(0..<arity, separator: ",") { "x\($0)" }
262+
outputForEach(0..<arity, separator: ", ") { "x\($0)" }
180263
output(")\n }\n}\n\n")
181264
}
182265
}

Sources/_MatchingEngine/Regex/Parse/CaptureStructure.swift

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ extension AST {
2525
.reduce(.empty, +)
2626
.map(CaptureStructure.optional)
2727
case .concatenation(let concatenation):
28-
assert(concatenation.children.count > 1)
2928
return concatenation.children.map(\.captureStructure).reduce(.empty, +)
3029
case .group(let group):
3130
let innerCaptures = group.child.captureStructure
@@ -107,6 +106,27 @@ extension CaptureStructure {
107106
}
108107
return false
109108
}
109+
110+
public func type(withAtomType atomType: Any.Type) -> Any.Type {
111+
switch self {
112+
case .atom:
113+
return atomType
114+
case .array(let child):
115+
return TypeConstruction.arrayType(of: child.type(withAtomType: atomType))
116+
case .optional(let child):
117+
return TypeConstruction.optionalType(of: child.type(withAtomType: atomType))
118+
case .tuple(let children):
119+
return TypeConstruction.tupleType(of: children.map {
120+
$0.type(withAtomType: atomType)
121+
})
122+
}
123+
}
124+
125+
public typealias DefaultAtomType = Substring
126+
127+
public var type: Any.Type {
128+
type(withAtomType: DefaultAtomType.self)
129+
}
110130
}
111131

112132
// MARK: - Serialization
@@ -142,6 +162,7 @@ extension CaptureStructure {
142162
/// 〚`name: T` (atom)〛 ==> .atom, `name`, '\0'
143163
/// 〚`[T]`〛 ==> 〚`T`〛, .formArray
144164
/// 〚`T?`〛 ==> 〚`T`〛, .formOptional
165+
/// 〚`(T0, T1, ...)` (top level)〛 ==> 〚`T0`〛, 〚`T1`〛, ...
145166
/// 〚`(T0, T1, ...)`〛 ==> .beginTuple, 〚`T0`〛, 〚`T1`〛, ..., .endTuple
146167
/// ```
147168
///
@@ -163,7 +184,7 @@ extension CaptureStructure {
163184
offset += MemoryLayout<Code>.stride
164185
}
165186
/// Recursively encode the node to the buffer.
166-
func encode(_ node: CaptureStructure) {
187+
func encode(_ node: CaptureStructure, isTopLevel: Bool = false) {
167188
switch node {
168189
// 〚`T` (atom)〛 ==> .atom
169190
case .atom(name: nil):
@@ -184,17 +205,22 @@ extension CaptureStructure {
184205
case .optional(let child):
185206
encode(child)
186207
append(.formOptional)
208+
// 〚`(T0, T1, ...)` (top level)〛 ==> 〚`T0`〛, 〚`T1`〛, ...
187209
// 〚`(T0, T1, ...)`〛 ==> .beginTuple, 〚`T0`〛, 〚`T1`〛, ..., .endTuple
188210
case .tuple(let children):
189-
append(.beginTuple)
211+
if !isTopLevel {
212+
append(.beginTuple)
213+
}
190214
for child in children {
191215
encode(child)
192216
}
193-
append(.endTuple)
217+
if !isTopLevel {
218+
append(.endTuple)
219+
}
194220
}
195221
}
196222
if !isEmpty {
197-
encode(self)
223+
encode(self, isTopLevel: true)
198224
}
199225
append(.end)
200226
}

Sources/_StringProcessing/Utility/TypeConstruction.swift renamed to Sources/_MatchingEngine/Utility/TypeConstruction.swift

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,9 @@ private func swift_getTupleTypeMetadata3(
4848
proposedWitnesses: UnsafeRawPointer?
4949
) -> (value: Any.Type, state: Int)
5050

51-
enum TypeConstruction {
52-
51+
public enum TypeConstruction {
5352
/// Returns a tuple metatype of the given element types.
54-
static func tupleType<
53+
public static func tupleType<
5554
ElementTypes: BidirectionalCollection
5655
>(
5756
of elementTypes: __owned ElementTypes
@@ -104,7 +103,7 @@ enum TypeConstruction {
104103
}
105104

106105
/// Creates a type-erased tuple with the given elements.
107-
static func tuple<Elements: BidirectionalCollection>(
106+
public static func tuple<Elements: BidirectionalCollection>(
108107
of elements: __owned Elements
109108
) -> Any where Elements.Element == Any {
110109
// Open existential on the overall tuple type.
@@ -133,4 +132,18 @@ enum TypeConstruction {
133132
let elementTypes = elements.map { type(of: $0) }
134133
return _openExistential(tupleType(of: elementTypes), do: create)
135134
}
135+
136+
public static func arrayType(of childType: Any.Type) -> Any.Type {
137+
func helper<T>(_: T.Type) -> Any.Type {
138+
[T].self
139+
}
140+
return _openExistential(childType, do: helper)
141+
}
142+
143+
public static func optionalType(of childType: Any.Type) -> Any.Type {
144+
func helper<T>(_: T.Type) -> Any.Type {
145+
T?.self
146+
}
147+
return _openExistential(childType, do: helper)
148+
}
136149
}

0 commit comments

Comments
 (0)