Skip to content

Commit bb5a67f

Browse files
authored
Allow subcripting a match result on a Reference. (#182)
Change `Reference` to generic type over `Capture`. Add a `subscript<T>(_: Reference<T>) -> T` to `RegexMatch` to allow the user to access a capture via a reference. Alternatives considered: - `subscript(_: Reference) -> Any` with non-generic `Reference`. While creating a `Reference` would remain easy, it would lose the benefits of strong types. - `Reference<Capture>.init() where Capture == Substring`. This allows writing `Reference()` to conveniently create a `Reference<Substring>`, but when it’s used on any transformed capture, it causes the “expression is too complex” error. I think this is a harmful outcome as it's hard for developers to figure that the fix is actually specifying a type in the `Reference` initializer. ----- Examples: ```swift let a = Reference(Int.self) let result = regex.match { tryCapture(as: a) { oneOrMore(.digit) } transform: { Int($0) } } result[a] // => Int ```
1 parent 0d65e2e commit bb5a67f

16 files changed

+985
-296
lines changed

Sources/VariadicsGenerator/VariadicsGenerator.swift

+86-28
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,7 @@ var standardError = StandardErrorStream()
9090

9191
typealias Counter = Int64
9292
let regexProtocolName = "RegexProtocol"
93-
let concatenationStructTypeBaseName = "Concatenate"
94-
let capturingGroupTypeBaseName = "CapturingGroup"
9593
let matchAssociatedTypeName = "Match"
96-
let captureAssociatedTypeName = "Capture"
9794
let patternBuilderTypeName = "RegexBuilder"
9895
let patternProtocolRequirementName = "regex"
9996
let regexTypeName = "Regex"
@@ -535,108 +532,169 @@ struct VariadicsGenerator: ParsableCommand {
535532
: "(\(baseMatchTypeName), \(newCaptureType), " + (0..<arity).map { "C\($0)" }.joined(separator: ", ") + ")"
536533
}
537534
let whereClause = "where R.\(matchAssociatedTypeName) == \(matchType)"
535+
let rawNewMatchType = newMatchType(newCaptureType: "W")
536+
let transformedNewMatchType = newMatchType(newCaptureType: "NewCapture")
538537
output("""
539538
// MARK: - Non-builder capture arity \(arity)
540539
541540
public func capture<\(genericParams)>(
542-
_ component: R, as reference: Reference? = nil
543-
) -> \(regexTypeName)<\(newMatchType(newCaptureType: "W"))> \(whereClause) {
544-
.init(node: .group(.capture, component.regex.root, reference?.id))
541+
_ component: R
542+
) -> \(regexTypeName)<\(rawNewMatchType)> \(whereClause) {
543+
.init(node: .group(.capture, component.regex.root))
544+
}
545+
546+
public func capture<\(genericParams)>(
547+
_ component: R, as reference: Reference<W>
548+
) -> \(regexTypeName)<\(rawNewMatchType)> \(whereClause) {
549+
.init(node: .group(.capture, component.regex.root, reference.id))
550+
}
551+
552+
public func capture<\(genericParams), NewCapture>(
553+
_ component: R,
554+
transform: @escaping (Substring) -> NewCapture
555+
) -> \(regexTypeName)<\(transformedNewMatchType)> \(whereClause) {
556+
.init(node: .groupTransform(
557+
.capture,
558+
component.regex.root,
559+
CaptureTransform(resultType: NewCapture.self) {
560+
transform($0) as Any
561+
}))
545562
}
546563
547564
public func capture<\(genericParams), NewCapture>(
548565
_ component: R,
549-
as reference: Reference? = nil,
566+
as reference: Reference<NewCapture>,
550567
transform: @escaping (Substring) -> NewCapture
551-
) -> \(regexTypeName)<\(newMatchType(newCaptureType: "NewCapture"))> \(whereClause) {
568+
) -> \(regexTypeName)<\(transformedNewMatchType)> \(whereClause) {
552569
.init(node: .groupTransform(
553570
.capture,
554571
component.regex.root,
555572
CaptureTransform(resultType: NewCapture.self) {
556573
transform($0) as Any
557574
},
558-
reference?.id))
575+
reference.id))
559576
}
560577
561578
public func tryCapture<\(genericParams), NewCapture>(
562579
_ component: R,
563-
as reference: Reference? = nil,
564580
transform: @escaping (Substring) throws -> NewCapture
565-
) -> \(regexTypeName)<\(newMatchType(newCaptureType: "NewCapture"))> \(whereClause) {
581+
) -> \(regexTypeName)<\(transformedNewMatchType)> \(whereClause) {
582+
.init(node: .groupTransform(
583+
.capture,
584+
component.regex.root,
585+
CaptureTransform(resultType: NewCapture.self) {
586+
try transform($0) as Any
587+
}))
588+
}
589+
590+
public func tryCapture<\(genericParams), NewCapture>(
591+
_ component: R,
592+
as reference: Reference<NewCapture>,
593+
transform: @escaping (Substring) throws -> NewCapture
594+
) -> \(regexTypeName)<\(transformedNewMatchType)> \(whereClause) {
566595
.init(node: .groupTransform(
567596
.capture,
568597
component.regex.root,
569598
CaptureTransform(resultType: NewCapture.self) {
570599
try transform($0) as Any
571600
},
572-
reference?.id))
601+
reference.id))
573602
}
574603
575604
public func tryCapture<\(genericParams), NewCapture>(
576605
_ component: R,
577-
as reference: Reference? = nil,
578606
transform: @escaping (Substring) -> NewCapture?
579-
) -> \(regexTypeName)<\(newMatchType(newCaptureType: "NewCapture"))> \(whereClause) {
607+
) -> \(regexTypeName)<\(transformedNewMatchType)> \(whereClause) {
608+
.init(node: .groupTransform(
609+
.capture,
610+
component.regex.root,
611+
CaptureTransform(resultType: NewCapture.self) {
612+
transform($0) as Any?
613+
}))
614+
}
615+
616+
public func tryCapture<\(genericParams), NewCapture>(
617+
_ component: R,
618+
as reference: Reference<NewCapture>,
619+
transform: @escaping (Substring) -> NewCapture?
620+
) -> \(regexTypeName)<\(transformedNewMatchType)> \(whereClause) {
580621
.init(node: .groupTransform(
581622
.capture,
582623
component.regex.root,
583624
CaptureTransform(resultType: NewCapture.self) {
584625
transform($0) as Any?
585626
},
586-
reference?.id))
627+
reference.id))
587628
}
588629
589630
// MARK: - Builder capture arity \(arity)
590631
591632
public func capture<\(genericParams)>(
592-
as reference: Reference? = nil,
593633
@RegexBuilder _ component: () -> R
594-
) -> \(regexTypeName)<\(newMatchType(newCaptureType: "W"))> \(whereClause) {
595-
.init(node: .group(.capture, component().regex.root, reference?.id))
634+
) -> \(regexTypeName)<\(rawNewMatchType)> \(whereClause) {
635+
.init(node: .group(.capture, component().regex.root))
636+
}
637+
638+
public func capture<\(genericParams)>(
639+
as reference: Reference<W>,
640+
@RegexBuilder _ component: () -> R
641+
) -> \(regexTypeName)<\(rawNewMatchType)> \(whereClause) {
642+
.init(node: .group(.capture, component().regex.root, reference.id))
596643
}
597644
598645
public func capture<\(genericParams), NewCapture>(
599-
as reference: Reference? = nil,
600646
@RegexBuilder _ component: () -> R,
601647
transform: @escaping (Substring) -> NewCapture
602-
) -> \(regexTypeName)<\(newMatchType(newCaptureType: "NewCapture"))> \(whereClause) {
648+
) -> \(regexTypeName)<\(transformedNewMatchType)> \(whereClause) {
603649
.init(node: .groupTransform(
604650
.capture,
605651
component().regex.root,
606652
CaptureTransform(resultType: NewCapture.self) {
607653
transform($0) as Any
608-
},
609-
reference?.id))
654+
}))
610655
}
611656
612657
public func tryCapture<\(genericParams), NewCapture>(
613-
as reference: Reference? = nil,
658+
as reference: Reference<NewCapture>,
614659
@RegexBuilder _ component: () -> R,
615660
transform: @escaping (Substring) throws -> NewCapture
616-
) -> \(regexTypeName)<\(newMatchType(newCaptureType: "NewCapture"))> \(whereClause) {
661+
) -> \(regexTypeName)<\(transformedNewMatchType)> \(whereClause) {
617662
.init(node: .groupTransform(
618663
.capture,
619664
component().regex.root,
620665
CaptureTransform(resultType: NewCapture.self) {
621666
try transform($0) as Any
622667
},
623-
reference?.id))
668+
reference.id))
669+
}
670+
671+
public func tryCapture<\(genericParams), NewCapture>(
672+
@RegexBuilder _ component: () -> R,
673+
transform: @escaping (Substring) -> NewCapture?
674+
) -> \(regexTypeName)<\(transformedNewMatchType)> \(whereClause) {
675+
.init(node: .groupTransform(
676+
.capture,
677+
component().regex.root,
678+
CaptureTransform(resultType: NewCapture.self) {
679+
transform($0) as Any?
680+
}))
624681
}
625682
626683
public func tryCapture<\(genericParams), NewCapture>(
627-
as reference: Reference? = nil,
684+
as reference: Reference<NewCapture>,
628685
@RegexBuilder _ component: () -> R,
629686
transform: @escaping (Substring) -> NewCapture?
630-
) -> \(regexTypeName)<\(newMatchType(newCaptureType: "NewCapture"))> \(whereClause) {
687+
) -> \(regexTypeName)<\(transformedNewMatchType)> \(whereClause) {
631688
.init(node: .groupTransform(
632689
.capture,
633690
component().regex.root,
634691
CaptureTransform(resultType: NewCapture.self) {
635692
transform($0) as Any?
636693
},
637-
reference?.id))
694+
reference.id))
638695
}
639696
697+
640698
""")
641699
}
642700
}

Sources/_StringProcessing/ByteCodeGen.swift

+1-1
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ extension Compiler.ByteCodeGen {
298298
mutating func emitGroup(
299299
_ kind: AST.Group.Kind,
300300
_ child: DSLTree.Node,
301-
_ referenceID: Reference.ID?
301+
_ referenceID: ReferenceID?
302302
) throws -> CaptureRegister? {
303303
guard kind.isCapturing || referenceID == nil else {
304304
throw Unreachable("Reference ID shouldn't exist for non-capturing groups")

Sources/_StringProcessing/Capture.swift

+4-2
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,11 @@ func constructExistentialMatchComponent(
5252
underlying = Optional<Any>(nil) as Any
5353
someCount = optionalCount - 1
5454
}
55-
5655
for _ in 0..<someCount {
57-
underlying = Optional(underlying) as Any
56+
func wrap<T>(_ x: T) {
57+
underlying = Optional(x) as Any
58+
}
59+
_openExistential(underlying, do: wrap)
5860
}
5961
return underlying
6062
}

Sources/_StringProcessing/Engine/Consume.swift

+2-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ extension Engine where Input == String {
6363
guard let result = result else { return nil }
6464

6565
let capList = cpu.storedCaptures
66-
return (result, CaptureList(caps: capList))
66+
return (result, CaptureList(
67+
values: capList, referencedCaptureOffsets: program.referencedCaptureOffsets))
6768
}
6869
}
6970

Sources/_StringProcessing/Engine/MEBuilder.swift

+8-7
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ extension MEProgram where Input.Element: Hashable {
4343
var captureStructure: CaptureStructure = .empty
4444

4545
// Symbolic reference resolution
46-
var unresolvedReferences: [Reference.ID: [InstructionAddress]] = [:]
47-
var captureOffsets: [Reference.ID: Int] = [:]
46+
var unresolvedReferences: [ReferenceID: [InstructionAddress]] = [:]
47+
var referencedCaptureOffsets: [ReferenceID: Int] = [:]
4848
var captureCount: Int {
4949
// We currently deduce the capture count from the capture register number.
5050
nextCaptureRegister.rawValue
@@ -274,7 +274,7 @@ extension MEProgram.Builder {
274274
.init(.backreference, .init(capture: cap)))
275275
}
276276

277-
public mutating func buildUnresolvedReference(id: Reference.ID) {
277+
public mutating func buildUnresolvedReference(id: ReferenceID) {
278278
buildBackreference(.init(0))
279279
unresolvedReferences[id, default: []].append(lastInstructionAddress)
280280
}
@@ -352,7 +352,8 @@ extension MEProgram.Builder {
352352
staticTransformFunctions: transformFunctions,
353353
staticMatcherFunctions: matcherFunctions,
354354
registerInfo: regInfo,
355-
captureStructure: captureStructure)
355+
captureStructure: captureStructure,
356+
referencedCaptureOffsets: referencedCaptureOffsets)
356357
}
357358

358359
public mutating func reset() { self = Self() }
@@ -424,7 +425,7 @@ extension MEProgram.Builder {
424425
fileprivate extension MEProgram.Builder {
425426
mutating func resolveReferences() throws {
426427
for (id, uses) in unresolvedReferences {
427-
guard let offset = captureOffsets[id] else {
428+
guard let offset = referencedCaptureOffsets[id] else {
428429
throw RegexCompilationError.uncapturedReference
429430
}
430431
for use in uses {
@@ -437,11 +438,11 @@ fileprivate extension MEProgram.Builder {
437438

438439
// Register helpers
439440
extension MEProgram.Builder {
440-
public mutating func makeCapture(id: Reference.ID?) -> CaptureRegister {
441+
public mutating func makeCapture(id: ReferenceID?) -> CaptureRegister {
441442
defer { nextCaptureRegister.rawValue += 1 }
442443
// Register the capture for later lookup via symbolic references.
443444
if let id = id {
444-
let preexistingValue = captureOffsets.updateValue(
445+
let preexistingValue = referencedCaptureOffsets.updateValue(
445446
captureCount, forKey: id)
446447
assert(preexistingValue == nil)
447448
}

Sources/_StringProcessing/Engine/MECapture.swift

+3-2
Original file line numberDiff line numberDiff line change
@@ -143,14 +143,15 @@ extension Processor._StoredCapture: CustomStringConvertible {
143143
}
144144

145145
public struct CaptureList {
146-
var caps: Array<Processor<String>._StoredCapture>
146+
var values: Array<Processor<String>._StoredCapture>
147+
var referencedCaptureOffsets: [ReferenceID: Int]
147148

148149
// func extract(from s: String) -> Array<Array<Substring>> {
149150
// caps.map { $0.map { s[$0] } }
150151
// }
151152
//
152153
func latestUntyped(from s: String) -> Array<Substring?> {
153-
caps.map {
154+
values.map {
154155
guard let last = $0.latest else {
155156
return nil
156157
}

Sources/_StringProcessing/Engine/MEProgram.swift

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ public struct MEProgram<Input: Collection> where Input.Element: Equatable {
3535
var enableTracing: Bool = false
3636

3737
let captureStructure: CaptureStructure
38+
let referencedCaptureOffsets: [ReferenceID: Int]
3839
}
3940

4041
extension MEProgram: CustomStringConvertible {

Sources/_StringProcessing/Engine/Processor.swift

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ public enum MatchMode {
1414
case partialFromFront
1515
}
1616

17+
typealias Program = MEProgram<String>
18+
1719
/// A concrete CU. Somehow will run the concrete logic and
1820
/// feed stuff back to generic code
1921
struct Controller {

Sources/_StringProcessing/Engine/StringProcessor.swift

-33
This file was deleted.

Sources/_StringProcessing/Engine/Structuralize.swift

+5-5
Original file line numberDiff line numberDiff line change
@@ -62,17 +62,17 @@ extension CaptureStructure {
6262
}
6363

6464
switch self {
65-
case let .tuple(caps):
66-
assert(list.caps.count == caps.count)
65+
case let .tuple(values):
66+
assert(list.values.count == values.count)
6767
var result = Array<StructuredCapture>()
68-
for (cap, storedCap) in zip(caps, list.caps) {
68+
for (cap, storedCap) in zip(values, list.values) {
6969
result.append(mapCap(cap, storedCap))
7070
}
7171
return result
7272

7373
default:
74-
assert(list.caps.count == 1)
75-
return [mapCap(self, list.caps.first!)]
74+
assert(list.values.count == 1)
75+
return [mapCap(self, list.values.first!)]
7676
}
7777
}
7878
}

0 commit comments

Comments
 (0)