Skip to content

Commit 12b4b4a

Browse files
authored
Add support for quantification kinds to the DSL (#150)
This adds support for specifying eager / reluctant / possessive quantification from within the DSL, like the literal quantification operators with an appended `?`. --- ```swift let regex = Regex { oneOrMore(.word, .reluctantly) CharacterClass.digit.capture() } "aaa1bbb2".match(regex).match // ("aaa1", "1") ```
1 parent 883d358 commit 12b4b4a

File tree

4 files changed

+657
-515
lines changed

4 files changed

+657
-515
lines changed

Sources/VariadicsGenerator/VariadicsGenerator.swift

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -211,19 +211,14 @@ struct VariadicsGenerator: ParsableCommand {
211211
}
212212

213213
func emitConcatenation(leftArity: Int, rightArity: Int) {
214-
func genericParameters(withConstraints: Bool) -> String {
214+
let genericParams: String = {
215215
var result = "W0, W1"
216216
result += (0..<leftArity+rightArity).map {
217217
", C\($0)"
218218
}.joined()
219-
result += ", "
220-
if withConstraints {
221-
result += "R0: \(regexProtocolName), R1: \(regexProtocolName)"
222-
} else {
223-
result += "R0, R1"
224-
}
219+
result += ", R0: \(regexProtocolName), R1: \(regexProtocolName)"
225220
return result
226-
}
221+
}()
227222

228223
// Emit concatenation type declaration.
229224

@@ -260,13 +255,13 @@ struct VariadicsGenerator: ParsableCommand {
260255
// Emit concatenation builder.
261256
output("extension \(patternBuilderTypeName) {\n")
262257
output("""
263-
@_disfavoredOverload
264-
public static func buildBlock<\(genericParameters(withConstraints: true))>(
265-
combining next: R1, into combined: R0
266-
) -> \(regexTypeName)<\(matchType)> \(whereClause) {
267-
.init(node: combined.regex.root.appending(next.regex.root))
268-
}
258+
@_disfavoredOverload
259+
public static func buildBlock<\(genericParams)>(
260+
combining next: R1, into combined: R0
261+
) -> \(regexTypeName)<\(matchType)> \(whereClause) {
262+
.init(node: combined.regex.root.appending(next.regex.root))
269263
}
264+
}
270265
271266
""")
272267
}
@@ -338,19 +333,16 @@ struct VariadicsGenerator: ParsableCommand {
338333

339334
func emitQuantifier(kind: QuantifierKind, arity: Int) {
340335
assert(arity >= 0)
341-
func genericParameters(withConstraints: Bool) -> String {
336+
let genericParams: String = {
342337
var result = ""
343338
if arity > 0 {
344339
result += "W"
345340
result += (0..<arity).map { ", C\($0)" }.joined()
346341
result += ", "
347342
}
348-
result += "Component"
349-
if withConstraints {
350-
result += ": \(regexProtocolName)"
351-
}
343+
result += "Component: \(regexProtocolName)"
352344
return result
353-
}
345+
}()
354346
let captures = (0..<arity).map { "C\($0)" }.joined(separator: ", ")
355347
let capturesTupled = arity == 1 ? captures : "(\(captures))"
356348
let whereClause: String = arity == 0 ? "" :
@@ -366,21 +358,23 @@ struct VariadicsGenerator: ParsableCommand {
366358
let matchType = arity == 0 ? baseMatchTypeName : "(\(baseMatchTypeName), \(quantifiedCaptures))"
367359
output("""
368360
\(arity == 0 ? "@_disfavoredOverload" : "")
369-
public func \(kind.rawValue)<\(genericParameters(withConstraints: true))>(
370-
_ component: Component
361+
public func \(kind.rawValue)<\(genericParams)>(
362+
_ component: Component,
363+
_ behavior: QuantificationBehavior = .eagerly
371364
) -> \(regexTypeName)<\(matchType)> \(whereClause) {
372-
.init(node: .quantification(.\(kind.astQuantifierAmount), .eager, component.regex.root))
365+
.init(node: .quantification(.\(kind.astQuantifierAmount), behavior.astKind, component.regex.root))
373366
}
374367
375368
\(arity == 0 ? "@_disfavoredOverload" : "")
376-
public func \(kind.rawValue)<\(genericParameters(withConstraints: true))>(
369+
public func \(kind.rawValue)<\(genericParams)>(
370+
_ behavior: QuantificationBehavior = .eagerly,
377371
@RegexBuilder _ component: () -> Component
378372
) -> \(regexTypeName)<\(matchType)> \(whereClause) {
379-
.init(node: .quantification(.\(kind.astQuantifierAmount), .eager, component().regex.root))
373+
.init(node: .quantification(.\(kind.astQuantifierAmount), behavior.astKind, component().regex.root))
380374
}
381375
382376
\(arity == 0 ? "@_disfavoredOverload" : "")
383-
public postfix func \(kind.operatorName)<\(genericParameters(withConstraints: true))>(
377+
public postfix func \(kind.operatorName)<\(genericParams)>(
384378
_ component: Component
385379
) -> \(regexTypeName)<\(matchType)> \(whereClause) {
386380
.init(node: .quantification(.\(kind.astQuantifierAmount), .eager, component.regex.root))
@@ -389,7 +383,7 @@ struct VariadicsGenerator: ParsableCommand {
389383
\(kind == .zeroOrOne ?
390384
"""
391385
extension RegexBuilder {
392-
public static func buildLimitedAvailability<\(genericParameters(withConstraints: true))>(
386+
public static func buildLimitedAvailability<\(genericParams)>(
393387
_ component: Component
394388
) -> \(regexTypeName)<\(matchType)> \(whereClause) {
395389
.init(node: .quantification(.\(kind.astQuantifierAmount), .eager, component.regex.root))

Sources/_StringProcessing/RegexDSL/DSL.swift

Lines changed: 53 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,44 @@ extension CharacterClass: RegexProtocol {
6262

6363
// Note: Quantifiers are currently gyb'd.
6464

65+
/// Specifies how much to attempt to match when using a quantifier.
66+
public struct QuantificationBehavior {
67+
internal enum Kind {
68+
case eagerly
69+
case reluctantly
70+
case possessively
71+
}
72+
73+
var kind: Kind
74+
75+
internal var astKind: AST.Quantification.Kind {
76+
switch kind {
77+
case .eagerly: return .eager
78+
case .reluctantly: return .reluctant
79+
case .possessively: return .possessive
80+
}
81+
}
82+
}
83+
84+
extension QuantificationBehavior {
85+
/// Match as much of the input string as possible, backtracking when
86+
/// necessary.
87+
public static var eagerly: QuantificationBehavior {
88+
.init(kind: .eagerly)
89+
}
90+
91+
/// Match as little of the input string as possible, expanding the matched
92+
/// region as necessary to complete a match.
93+
public static var reluctantly: QuantificationBehavior {
94+
.init(kind: .reluctantly)
95+
}
96+
97+
/// Match as much of the input string as possible, performing no backtracking.
98+
public static var possessively: QuantificationBehavior {
99+
.init(kind: .possessively)
100+
}
101+
}
102+
65103
// TODO: Variadic generics
66104
// struct _OneOrMore<W, C..., Component: RegexProtocol>
67105
// where R.Match == (W, C...)
@@ -99,16 +137,25 @@ postfix operator .*
99137
postfix operator .+
100138

101139
// Overloads for quantifying over a character class.
102-
public func zeroOrOne(_ cc: CharacterClass) -> Regex<Substring> {
103-
.init(node: .quantification(.zeroOrOne, .eager, cc.regex.root))
140+
public func zeroOrOne(
141+
_ cc: CharacterClass,
142+
_ behavior: QuantificationBehavior = .eagerly
143+
) -> Regex<Substring> {
144+
.init(node: .quantification(.zeroOrOne, behavior.astKind, cc.regex.root))
104145
}
105146

106-
public func many(_ cc: CharacterClass) -> Regex<Substring> {
107-
.init(node: .quantification(.zeroOrMore, .eager, cc.regex.root))
147+
public func many(
148+
_ cc: CharacterClass,
149+
_ behavior: QuantificationBehavior = .eagerly
150+
) -> Regex<Substring> {
151+
.init(node: .quantification(.zeroOrMore, behavior.astKind, cc.regex.root))
108152
}
109153

110-
public func oneOrMore(_ cc: CharacterClass) -> Regex<Substring> {
111-
.init(node: .quantification(.oneOrMore, .eager, cc.regex.root))
154+
public func oneOrMore(
155+
_ cc: CharacterClass,
156+
_ behavior: QuantificationBehavior = .eagerly
157+
) -> Regex<Substring> {
158+
.init(node: .quantification(.oneOrMore, behavior.astKind, cc.regex.root))
112159
}
113160

114161
// MARK: Alternation

0 commit comments

Comments
 (0)