Skip to content

Commit 9d5280e

Browse files
committed
Fully generalize "whole match" in the engine and enable transforming custom types
* Track the whole match as an element of the "capture list" in the matching engine. Do so by emitting code as an implicit `capture` around the root node. * No longer handle `matcher` as a special case within `capture` lowering, because the matcher can be arbitrarily nested within "output-forwarding" nodes, such as a `changeMatchingOptions` non-capturing group. Instead, make the bytecode emitter carry a result value so that a custom output can be propagated through any forwarding nodes. ```swift Regex { Capture( SemanticVersionParser() .ignoringCase() .matchingSemantics(.unicodeScalar) ) // This would not work previously. } ``` * Collapse DSLTree node `transform` into `capture`, because a transform can never be standalone (without a `capture` parent). This greatly simplifies `capture` lowering. * Make the bytecode's capture transform use type `(Input, _StoredCapture) -> Any` so that it can transform any whole match, not just `Substring`. This means you can now transform any captured value, including a custom-consuming regex component's result! ```swift Regex { "version:" OneOrMore(.whitespace) Capture { SemanticVersionParser() // Regex<SemanticVersion> } transform: { // (SemanticVersion) -> SomethingElse } } ``` The transforms of `Capture` and `TryCapture` are now generalized from taking `Substring` to taking generic parameter `W` (the whole match). * Fix an issue where initial options were applied based solely on whether the bytecode had any instructions, failing examples such as `((?i:.))`. It now checks whether the first matchable atom has been emitted.
1 parent a7001b1 commit 9d5280e

27 files changed

+676
-943
lines changed

Sources/RegexBuilder/Variadics.swift

+308-528
Large diffs are not rendered by default.

Sources/VariadicsGenerator/VariadicsGenerator.swift

+28-48
Original file line numberDiff line numberDiff line change
@@ -646,28 +646,23 @@ struct VariadicsGenerator: ParsableCommand {
646646
\(disfavored)\
647647
public init<\(genericParams), NewCapture>(
648648
_ component: R,
649-
transform: @escaping (Substring) throws -> NewCapture
649+
transform: @escaping (W) throws -> NewCapture
650650
) \(whereClauseTransformed) {
651-
self.init(node: .capture(.transform(
652-
CaptureTransform(resultType: NewCapture.self) {
653-
try transform($0) as Any
654-
},
655-
component.regex.root)))
651+
self.init(node: .capture(
652+
component.regex.root,
653+
CaptureTransform(transform)))
656654
}
657655
658656
\(disfavored)\
659657
public init<\(genericParams), NewCapture>(
660658
_ component: R,
661659
as reference: Reference<NewCapture>,
662-
transform: @escaping (Substring) throws -> NewCapture
660+
transform: @escaping (W) throws -> NewCapture
663661
) \(whereClauseTransformed) {
664662
self.init(node: .capture(
665663
reference: reference.id,
666-
.transform(
667-
CaptureTransform(resultType: NewCapture.self) {
668-
try transform($0) as Any
669-
},
670-
component.regex.root)))
664+
component.regex.root,
665+
CaptureTransform(transform)))
671666
}
672667
}
673668
@@ -676,28 +671,23 @@ struct VariadicsGenerator: ParsableCommand {
676671
\(disfavored)\
677672
public init<\(genericParams), NewCapture>(
678673
_ component: R,
679-
transform: @escaping (Substring) throws -> NewCapture?
674+
transform: @escaping (W) throws -> NewCapture?
680675
) \(whereClauseTransformed) {
681-
self.init(node: .capture(.transform(
682-
CaptureTransform(resultType: NewCapture.self) {
683-
try transform($0) as Any?
684-
},
685-
component.regex.root)))
676+
self.init(node: .capture(
677+
component.regex.root,
678+
CaptureTransform(transform)))
686679
}
687680
688681
\(disfavored)\
689682
public init<\(genericParams), NewCapture>(
690683
_ component: R,
691684
as reference: Reference<NewCapture>,
692-
transform: @escaping (Substring) throws -> NewCapture?
685+
transform: @escaping (W) throws -> NewCapture?
693686
) \(whereClauseTransformed) {
694687
self.init(node: .capture(
695688
reference: reference.id,
696-
.transform(
697-
CaptureTransform(resultType: NewCapture.self) {
698-
try transform($0) as Any?
699-
},
700-
component.regex.root)))
689+
component.regex.root,
690+
CaptureTransform(transform)))
701691
}
702692
}
703693
@@ -725,28 +715,23 @@ struct VariadicsGenerator: ParsableCommand {
725715
\(disfavored)\
726716
public init<\(genericParams), NewCapture>(
727717
@\(concatBuilderName) _ component: () -> R,
728-
transform: @escaping (Substring) throws -> NewCapture
718+
transform: @escaping (W) throws -> NewCapture
729719
) \(whereClauseTransformed) {
730-
self.init(node: .capture(.transform(
731-
CaptureTransform(resultType: NewCapture.self) {
732-
try transform($0) as Any
733-
},
734-
component().regex.root)))
720+
self.init(node: .capture(
721+
component().regex.root,
722+
CaptureTransform(transform)))
735723
}
736724
737725
\(disfavored)\
738726
public init<\(genericParams), NewCapture>(
739727
as reference: Reference<NewCapture>,
740728
@\(concatBuilderName) _ component: () -> R,
741-
transform: @escaping (Substring) throws -> NewCapture
729+
transform: @escaping (W) throws -> NewCapture
742730
) \(whereClauseTransformed) {
743731
self.init(node: .capture(
744732
reference: reference.id,
745-
.transform(
746-
CaptureTransform(resultType: NewCapture.self) {
747-
try transform($0) as Any
748-
},
749-
component().regex.root)))
733+
component().regex.root,
734+
CaptureTransform(transform)))
750735
}
751736
}
752737
@@ -755,28 +740,23 @@ struct VariadicsGenerator: ParsableCommand {
755740
\(disfavored)\
756741
public init<\(genericParams), NewCapture>(
757742
@\(concatBuilderName) _ component: () -> R,
758-
transform: @escaping (Substring) throws -> NewCapture?
743+
transform: @escaping (W) throws -> NewCapture?
759744
) \(whereClauseTransformed) {
760-
self.init(node: .capture(.transform(
761-
CaptureTransform(resultType: NewCapture.self) {
762-
try transform($0) as Any?
763-
},
764-
component().regex.root)))
745+
self.init(node: .capture(
746+
component().regex.root,
747+
CaptureTransform(transform)))
765748
}
766749
767750
\(disfavored)\
768751
public init<\(genericParams), NewCapture>(
769752
as reference: Reference<NewCapture>,
770753
@\(concatBuilderName) _ component: () -> R,
771-
transform: @escaping (Substring) throws -> NewCapture?
754+
transform: @escaping (W) throws -> NewCapture?
772755
) \(whereClauseTransformed) {
773756
self.init(node: .capture(
774757
reference: reference.id,
775-
.transform(
776-
CaptureTransform(resultType: NewCapture.self) {
777-
try transform($0) as Any?
778-
},
779-
component().regex.root)))
758+
component().regex.root,
759+
CaptureTransform(transform)))
780760
}
781761
}
782762

Sources/_RegexParser/Regex/Parse/CaptureList.swift

+8-16
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@ public struct CaptureList {
2424
extension CaptureList {
2525
public struct Capture {
2626
public var name: String?
27-
public var type: Any.Type?
27+
public var type: Any.Type
2828
public var optionalDepth: Int
2929
public var location: SourceLocation
3030

3131
public init(
3232
name: String? = nil,
33-
type: Any.Type? = nil,
33+
type: Any.Type = Substring.self,
3434
optionalDepth: Int,
3535
_ location: SourceLocation
3636
) {
@@ -122,18 +122,15 @@ extension AST.Node {
122122
break
123123
}
124124
}
125-
126-
public var _captureList: CaptureList {
127-
var caps = CaptureList()
128-
self._addCaptures(to: &caps, optionalNesting: 0)
129-
return caps
130-
}
131125
}
132126

133127
extension AST {
134-
/// Get the capture list for this AST
128+
/// The capture list (including the whole match) of this AST.
135129
public var captureList: CaptureList {
136-
root._captureList
130+
var caps = CaptureList()
131+
caps.append(.init(optionalDepth: 0, .fake))
132+
root._addCaptures(to: &caps, optionalNesting: 0)
133+
return caps
137134
}
138135
}
139136

@@ -151,12 +148,7 @@ extension CaptureList: Equatable {}
151148

152149
extension CaptureList.Capture: CustomStringConvertible {
153150
public var description: String {
154-
let typeStr: String
155-
if let ty = type {
156-
typeStr = "\(ty)"
157-
} else {
158-
typeStr = "Substring"
159-
}
151+
let typeStr = String(describing: type)
160152
let suffix = String(repeating: "?", count: optionalDepth)
161153
return typeStr + suffix
162154
}

Sources/_RegexParser/Regex/Parse/CaptureStructure.swift

+2-5
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ extension CaptureStructure: CustomStringConvertible {
225225
extension AST {
226226
/// The capture structure of this AST for compiler communication.
227227
var captureStructure: CaptureStructure {
228-
root._captureList._captureStructure(nestOptionals: true)
228+
captureList._captureStructure(nestOptionals: true)
229229
}
230230
}
231231

@@ -246,10 +246,7 @@ extension CaptureList {
246246
extension CaptureList.Capture {
247247
func _captureStructure(nestOptionals: Bool) -> CaptureStructure {
248248
if optionalDepth == 0 {
249-
if let ty = type {
250-
return .atom(name: name, type: .init(ty))
251-
}
252-
return .atom(name: name)
249+
return .atom(name: name, type: type == Substring.self ? nil : .init(type))
253250
}
254251
var copy = self
255252
copy.optionalDepth = 0

Sources/_RegexParser/Regex/Parse/Sema.swift

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ extension RegexValidator {
7777
}
7878
switch ref.kind {
7979
case .absolute(let i):
80-
guard i <= captures.captures.count else {
80+
guard i < captures.captures.count else {
8181
throw error(.invalidReference(i), at: ref.innerLoc)
8282
}
8383
case .named(let name):

0 commit comments

Comments
 (0)