Skip to content

Commit 4857bc7

Browse files
authored
Allow setting any of the three quant behaviors (#311)
This also moves QuantificationBehavior from the RegexBuilder module down to _StringProcessing, and renames it to RegexRepetitionBehavior.
1 parent f9a4675 commit 4857bc7

File tree

8 files changed

+332
-239
lines changed

8 files changed

+332
-239
lines changed

Sources/RegexBuilder/DSL.swift

+2-42
Original file line numberDiff line numberDiff line change
@@ -94,40 +94,20 @@ extension UnicodeScalar: RegexComponent {
9494

9595
// Note: Quantifiers are currently gyb'd.
9696

97-
/// Specifies how much to attempt to match when using a quantifier.
98-
@available(SwiftStdlib 5.7, *)
99-
public struct QuantificationBehavior {
100-
internal enum Kind {
101-
case eagerly
102-
case reluctantly
103-
case possessively
104-
}
105-
106-
var kind: Kind
107-
108-
internal var astKind: DSLTree._AST.QuantificationKind {
109-
switch kind {
110-
case .eagerly: return .eager
111-
case .reluctantly: return .reluctant
112-
case .possessively: return .possessive
113-
}
114-
}
115-
}
116-
11797
extension DSLTree.Node {
11898
/// Generates a DSLTree node for a repeated range of the given DSLTree node.
11999
/// Individual public API functions are in the generated Variadics.swift file.
120100
@available(SwiftStdlib 5.7, *)
121101
static func repeating(
122102
_ range: Range<Int>,
123-
_ behavior: QuantificationBehavior?,
103+
_ behavior: RegexRepetitionBehavior?,
124104
_ node: DSLTree.Node
125105
) -> DSLTree.Node {
126106
// TODO: Throw these as errors
127107
assert(range.lowerBound >= 0, "Cannot specify a negative lower bound")
128108
assert(!range.isEmpty, "Cannot specify an empty range")
129109

130-
let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.astKind) } ?? .default
110+
let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.dslTreeKind) } ?? .default
131111

132112
switch (range.lowerBound, range.upperBound) {
133113
case (0, Int.max): // 0...
@@ -147,26 +127,6 @@ extension DSLTree.Node {
147127
}
148128
}
149129

150-
@available(SwiftStdlib 5.7, *)
151-
extension QuantificationBehavior {
152-
/// Match as much of the input string as possible, backtracking when
153-
/// necessary.
154-
public static var eagerly: QuantificationBehavior {
155-
.init(kind: .eagerly)
156-
}
157-
158-
/// Match as little of the input string as possible, expanding the matched
159-
/// region as necessary to complete a match.
160-
public static var reluctantly: QuantificationBehavior {
161-
.init(kind: .reluctantly)
162-
}
163-
164-
/// Match as much of the input string as possible, performing no backtracking.
165-
public static var possessively: QuantificationBehavior {
166-
.init(kind: .possessively)
167-
}
168-
}
169-
170130
@available(SwiftStdlib 5.7, *)
171131
public struct OneOrMore<Output>: _BuiltinRegexComponent {
172132
public var regex: Regex<Output>

Sources/RegexBuilder/Variadics.swift

+154-154
Large diffs are not rendered by default.

Sources/VariadicsGenerator/VariadicsGenerator.swift

+5-5
Original file line numberDiff line numberDiff line change
@@ -377,9 +377,9 @@ struct VariadicsGenerator: ParsableCommand {
377377
\(params.disfavored)\
378378
public init<\(params.genericParams)>(
379379
_ component: Component,
380-
_ behavior: QuantificationBehavior? = nil
380+
_ behavior: RegexRepetitionBehavior? = nil
381381
) \(params.whereClauseForInit) {
382-
let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.astKind) } ?? .default
382+
let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.dslTreeKind) } ?? .default
383383
self.init(node: .quantification(.\(kind.astQuantifierAmount), kind, component.regex.root))
384384
}
385385
}
@@ -389,10 +389,10 @@ struct VariadicsGenerator: ParsableCommand {
389389
\(defaultAvailableAttr)
390390
\(params.disfavored)\
391391
public init<\(params.genericParams)>(
392-
_ behavior: QuantificationBehavior? = nil,
392+
_ behavior: RegexRepetitionBehavior? = nil,
393393
@\(concatBuilderName) _ component: () -> Component
394394
) \(params.whereClauseForInit) {
395-
let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.astKind) } ?? .default
395+
let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.dslTreeKind) } ?? .default
396396
self.init(node: .quantification(.\(kind.astQuantifierAmount), kind, component().regex.root))
397397
}
398398
}
@@ -508,7 +508,7 @@ struct VariadicsGenerator: ParsableCommand {
508508
public init<\(params.genericParams), R: RangeExpression>(
509509
_ component: Component,
510510
_ expression: R,
511-
_ behavior: QuantificationBehavior? = nil
511+
_ behavior: RegexRepetitionBehavior? = nil
512512
) \(params.repeatingWhereClause) {
513513
self.init(node: .repeating(expression.relative(to: 0..<Int.max), behavior, component.regex.root))
514514
}

Sources/_RegexParser/Regex/AST/MatchingOptions.swift

+4
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,11 @@ extension AST {
4141
case graphemeClusterSemantics // X
4242
case unicodeScalarSemantics // u
4343
case byteSemantics // b
44+
45+
// Swift-only default possessive quantifier
46+
case possessiveByDefault // t.b.d.
4447
}
48+
4549
public var kind: Kind
4650
public var location: SourceLocation
4751

Sources/_StringProcessing/ByteCodeGen.swift

+1-3
Original file line numberDiff line numberDiff line change
@@ -374,9 +374,7 @@ extension Compiler.ByteCodeGen {
374374
case .syntax(let kind):
375375
updatedKind = kind.ast.applying(options)
376376
case .default:
377-
updatedKind = options.isReluctantByDefault
378-
? .reluctant
379-
: .eager
377+
updatedKind = options.defaultQuantificationKind
380378
}
381379

382380
let (low, high) = amount.bounds

Sources/_StringProcessing/MatchingOptions.swift

+33-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ struct MatchingOptions {
2222
// Must contain exactly one of each mutually exclusive group
2323
assert(stack.last!.intersection(.textSegmentOptions).rawValue.nonzeroBitCount == 1)
2424
assert(stack.last!.intersection(.semanticMatchingLevels).rawValue.nonzeroBitCount == 1)
25+
26+
// Must contain at most one quantifier behavior
27+
assert(stack.last!.intersection(.repetitionBehaviors).rawValue.nonzeroBitCount <= 1)
2528
}
2629
}
2730

@@ -63,6 +66,16 @@ extension MatchingOptions {
6366
stack.last!.contains(.reluctantByDefault)
6467
}
6568

69+
var defaultQuantificationKind: AST.Quantification.Kind {
70+
if stack.last!.contains(.possessiveByDefault) {
71+
return .possessive
72+
} else if stack.last!.contains(.reluctantByDefault) {
73+
return .reluctant
74+
} else {
75+
return .eager
76+
}
77+
}
78+
6679
var dotMatchesNewline: Bool {
6780
stack.last!.contains(.singleLine)
6881
}
@@ -150,6 +163,9 @@ extension MatchingOptions {
150163
case unicodeScalarSemantics
151164
case byteSemantics
152165

166+
// Swift-only default possessive quantifier
167+
case possessiveByDefault
168+
153169
init?(_ astKind: AST.MatchingOption.Kind) {
154170
switch astKind {
155171
case .caseInsensitive:
@@ -184,6 +200,8 @@ extension MatchingOptions {
184200
self = .unicodeScalarSemantics
185201
case .byteSemantics:
186202
self = .byteSemantics
203+
case .possessiveByDefault:
204+
self = .possessiveByDefault
187205

188206
// Whitespace options are only relevant during parsing, not compilation.
189207
case .extended, .extraExtended:
@@ -219,6 +237,9 @@ extension MatchingOptions {
219237
if Self.textSegmentOptions.contains(opt.representation) {
220238
remove(.textSegmentOptions)
221239
}
240+
if Self.repetitionBehaviors.contains(opt.representation) {
241+
remove(.repetitionBehaviors)
242+
}
222243

223244
insert(opt.representation)
224245
}
@@ -241,6 +262,9 @@ extension MatchingOptions {
241262
guard let opt = Option(opt.kind) else {
242263
continue
243264
}
265+
if Self.repetitionBehaviors.contains(opt.representation) {
266+
remove(.repetitionBehaviors)
267+
}
244268
remove(opt.representation)
245269
}
246270
}
@@ -274,7 +298,15 @@ extension MatchingOptions.Representation {
274298
static var semanticMatchingLevels: Self {
275299
[.graphemeClusterSemantics, .unicodeScalarSemantics, .byteSemantics]
276300
}
277-
301+
302+
// Quantification behavior options
303+
static var reluctantByDefault: Self { .init(.reluctantByDefault) }
304+
static var possessiveByDefault: Self { .init(.possessiveByDefault) }
305+
306+
static var repetitionBehaviors: Self {
307+
[.reluctantByDefault, .possessiveByDefault]
308+
}
309+
278310
/// The default set of options.
279311
static var `default`: Self {
280312
[.graphemeClusterSemantics, .textSegmentGraphemeMode]

Sources/_StringProcessing/Regex/Options.swift

+56-10
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ extension RegexComponent {
5858
/// Returns a regular expression where the start and end of input
5959
/// anchors (`^` and `$`) also match against the start and end of a line.
6060
///
61-
/// This method corresponds to applying the `m` option in a regular
62-
/// expression literal. For this behavior in the `RegexBuilder` syntax, see
61+
/// This method corresponds to applying the `m` option in regex syntax. For
62+
/// this behavior in the `RegexBuilder` syntax, see
6363
/// ``Anchor.startOfLine``, ``Anchor.endOfLine``, ``Anchor.startOfInput``,
6464
/// and ``Anchor.endOfInput``.
6565
///
@@ -69,16 +69,22 @@ extension RegexComponent {
6969
wrapInOption(.multiline, addingIf: matchLineEndings)
7070
}
7171

72-
/// Returns a regular expression where quantifiers are reluctant by default
73-
/// instead of eager.
72+
/// Returns a regular expression where quantifiers use the specified behavior
73+
/// by default.
7474
///
75-
/// This method corresponds to applying the `U` option in a regular
76-
/// expression literal.
75+
/// This setting does not affect calls to quantifier methods, such as
76+
/// `OneOrMore`, that include an explicit `behavior` parameter.
7777
///
78-
/// - Parameter useReluctantQuantifiers: A Boolean value indicating whether
79-
/// quantifiers should be reluctant by default.
80-
public func reluctantQuantifiers(_ useReluctantQuantifiers: Bool = true) -> Regex<RegexOutput> {
81-
wrapInOption(.reluctantByDefault, addingIf: useReluctantQuantifiers)
78+
/// Passing `.eager` or `.reluctant` to this method corresponds to applying
79+
/// the `(?-U)` or `(?U)` option in regex syntax, respectively.
80+
///
81+
/// - Parameter behavior: The default behavior to use for quantifiers.
82+
public func repetitionBehavior(_ behavior: RegexRepetitionBehavior) -> Regex<RegexOutput> {
83+
if behavior == .possessive {
84+
return wrapInOption(.possessiveByDefault, addingIf: true)
85+
} else {
86+
return wrapInOption(.reluctantByDefault, addingIf: behavior == .reluctant)
87+
}
8288
}
8389

8490
/// Returns a regular expression that matches with the specified semantic
@@ -183,6 +189,46 @@ public struct RegexWordBoundaryKind: Hashable {
183189
}
184190
}
185191

192+
/// Specifies how much to attempt to match when using a quantifier.
193+
@available(SwiftStdlib 5.7, *)
194+
public struct RegexRepetitionBehavior: Hashable {
195+
internal enum Kind {
196+
case eager
197+
case reluctant
198+
case possessive
199+
}
200+
201+
var kind: Kind
202+
203+
@_spi(RegexBuilder) public var dslTreeKind: DSLTree._AST.QuantificationKind {
204+
switch kind {
205+
case .eager: return .eager
206+
case .reluctant: return .reluctant
207+
case .possessive: return .possessive
208+
}
209+
}
210+
}
211+
212+
@available(SwiftStdlib 5.7, *)
213+
extension RegexRepetitionBehavior {
214+
/// Match as much of the input string as possible, backtracking when
215+
/// necessary.
216+
public static var eager: Self {
217+
.init(kind: .eager)
218+
}
219+
220+
/// Match as little of the input string as possible, expanding the matched
221+
/// region as necessary to complete a match.
222+
public static var reluctant: Self {
223+
.init(kind: .reluctant)
224+
}
225+
226+
/// Match as much of the input string as possible, performing no backtracking.
227+
public static var possessive: Self {
228+
.init(kind: .possessive)
229+
}
230+
}
231+
186232
// MARK: - Helper method
187233

188234
@available(SwiftStdlib 5.7, *)

0 commit comments

Comments
 (0)