Skip to content

[5.7] Update API for congruence with Unicode proposal #316

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Sources/RegexBuilder/CharacterClass.swift
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ extension RegexComponent where Self == CharacterClass {
public static var anyGrapheme: CharacterClass {
.init(unconverted: .anyGrapheme)
}

public static var anyUnicodeScalar: CharacterClass {
.init(unconverted: .anyUnicodeScalar)
}

public static var whitespace: CharacterClass {
.init(unconverted: .whitespace)
Expand Down
44 changes: 2 additions & 42 deletions Sources/RegexBuilder/DSL.swift
Original file line number Diff line number Diff line change
Expand Up @@ -94,40 +94,20 @@ extension UnicodeScalar: RegexComponent {

// Note: Quantifiers are currently gyb'd.

/// Specifies how much to attempt to match when using a quantifier.
@available(SwiftStdlib 5.7, *)
public struct QuantificationBehavior {
internal enum Kind {
case eagerly
case reluctantly
case possessively
}

var kind: Kind

internal var astKind: AST.Quantification.Kind {
switch kind {
case .eagerly: return .eager
case .reluctantly: return .reluctant
case .possessively: return .possessive
}
}
}

extension DSLTree.Node {
/// Generates a DSLTree node for a repeated range of the given DSLTree node.
/// Individual public API functions are in the generated Variadics.swift file.
@available(SwiftStdlib 5.7, *)
static func repeating(
_ range: Range<Int>,
_ behavior: QuantificationBehavior?,
_ behavior: RegexRepetitionBehavior?,
_ node: DSLTree.Node
) -> DSLTree.Node {
// TODO: Throw these as errors
assert(range.lowerBound >= 0, "Cannot specify a negative lower bound")
assert(!range.isEmpty, "Cannot specify an empty range")

let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.astKind) } ?? .default
let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.dslTreeKind) } ?? .default

switch (range.lowerBound, range.upperBound) {
case (0, Int.max): // 0...
Expand All @@ -147,26 +127,6 @@ extension DSLTree.Node {
}
}

@available(SwiftStdlib 5.7, *)
extension QuantificationBehavior {
/// Match as much of the input string as possible, backtracking when
/// necessary.
public static var eagerly: QuantificationBehavior {
.init(kind: .eagerly)
}

/// Match as little of the input string as possible, expanding the matched
/// region as necessary to complete a match.
public static var reluctantly: QuantificationBehavior {
.init(kind: .reluctantly)
}

/// Match as much of the input string as possible, performing no backtracking.
public static var possessively: QuantificationBehavior {
.init(kind: .possessively)
}
}

@available(SwiftStdlib 5.7, *)
public struct OneOrMore<Output>: _BuiltinRegexComponent {
public var regex: Regex<Output>
Expand Down
308 changes: 154 additions & 154 deletions Sources/RegexBuilder/Variadics.swift

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions Sources/VariadicsGenerator/VariadicsGenerator.swift
Original file line number Diff line number Diff line change
Expand Up @@ -378,9 +378,9 @@ struct VariadicsGenerator: ParsableCommand {
\(params.disfavored)\
public init<\(params.genericParams)>(
_ component: Component,
_ behavior: QuantificationBehavior? = nil
_ behavior: RegexRepetitionBehavior? = nil
) \(params.whereClauseForInit) {
let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.astKind) } ?? .default
let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.dslTreeKind) } ?? .default
self.init(node: .quantification(.\(kind.astQuantifierAmount), kind, component.regex.root))
}
}
Expand All @@ -390,10 +390,10 @@ struct VariadicsGenerator: ParsableCommand {
\(defaultAvailableAttr)
\(params.disfavored)\
public init<\(params.genericParams)>(
_ behavior: QuantificationBehavior? = nil,
_ behavior: RegexRepetitionBehavior? = nil,
@\(concatBuilderName) _ component: () -> Component
) \(params.whereClauseForInit) {
let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.astKind) } ?? .default
let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.dslTreeKind) } ?? .default
self.init(node: .quantification(.\(kind.astQuantifierAmount), kind, component().regex.root))
}
}
Expand Down Expand Up @@ -509,7 +509,7 @@ struct VariadicsGenerator: ParsableCommand {
public init<\(params.genericParams), R: RangeExpression>(
_ component: Component,
_ expression: R,
_ behavior: QuantificationBehavior? = nil
_ behavior: RegexRepetitionBehavior? = nil
) \(params.repeatingWhereClause) {
self.init(node: .repeating(expression.relative(to: 0..<Int.max), behavior, component.regex.root))
}
Expand Down
4 changes: 4 additions & 0 deletions Sources/_RegexParser/Regex/AST/MatchingOptions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@ extension AST {
case graphemeClusterSemantics // X
case unicodeScalarSemantics // u
case byteSemantics // b

// Swift-only default possessive quantifier
case possessiveByDefault // t.b.d.
}

public var kind: Kind
public var location: SourceLocation

Expand Down
4 changes: 1 addition & 3 deletions Sources/_StringProcessing/ByteCodeGen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -371,9 +371,7 @@ extension Compiler.ByteCodeGen {
case .syntax(let kind):
updatedKind = kind.applying(options)
case .default:
updatedKind = options.isReluctantByDefault
? .reluctant
: .eager
updatedKind = options.defaultQuantificationKind
}

let (low, high) = amount.bounds
Expand Down
34 changes: 33 additions & 1 deletion Sources/_StringProcessing/MatchingOptions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ struct MatchingOptions {
// Must contain exactly one of each mutually exclusive group
assert(stack.last!.intersection(.textSegmentOptions).rawValue.nonzeroBitCount == 1)
assert(stack.last!.intersection(.semanticMatchingLevels).rawValue.nonzeroBitCount == 1)

// Must contain at most one quantifier behavior
assert(stack.last!.intersection(.repetitionBehaviors).rawValue.nonzeroBitCount <= 1)
}
}

Expand Down Expand Up @@ -63,6 +66,16 @@ extension MatchingOptions {
stack.last!.contains(.reluctantByDefault)
}

var defaultQuantificationKind: AST.Quantification.Kind {
if stack.last!.contains(.possessiveByDefault) {
return .possessive
} else if stack.last!.contains(.reluctantByDefault) {
return .reluctant
} else {
return .eager
}
}

var dotMatchesNewline: Bool {
stack.last!.contains(.singleLine)
}
Expand Down Expand Up @@ -150,6 +163,9 @@ extension MatchingOptions {
case unicodeScalarSemantics
case byteSemantics

// Swift-only default possessive quantifier
case possessiveByDefault

init?(_ astKind: AST.MatchingOption.Kind) {
switch astKind {
case .caseInsensitive:
Expand Down Expand Up @@ -184,6 +200,8 @@ extension MatchingOptions {
self = .unicodeScalarSemantics
case .byteSemantics:
self = .byteSemantics
case .possessiveByDefault:
self = .possessiveByDefault

// Whitespace options are only relevant during parsing, not compilation.
case .extended, .extraExtended:
Expand Down Expand Up @@ -219,6 +237,9 @@ extension MatchingOptions {
if Self.textSegmentOptions.contains(opt.representation) {
remove(.textSegmentOptions)
}
if Self.repetitionBehaviors.contains(opt.representation) {
remove(.repetitionBehaviors)
}

insert(opt.representation)
}
Expand All @@ -241,6 +262,9 @@ extension MatchingOptions {
guard let opt = Option(opt.kind) else {
continue
}
if Self.repetitionBehaviors.contains(opt.representation) {
remove(.repetitionBehaviors)
}
remove(opt.representation)
}
}
Expand Down Expand Up @@ -274,7 +298,15 @@ extension MatchingOptions.Representation {
static var semanticMatchingLevels: Self {
[.graphemeClusterSemantics, .unicodeScalarSemantics, .byteSemantics]
}


// Quantification behavior options
static var reluctantByDefault: Self { .init(.reluctantByDefault) }
static var possessiveByDefault: Self { .init(.possessiveByDefault) }

static var repetitionBehaviors: Self {
[.reluctantByDefault, .possessiveByDefault]
}

/// The default set of options.
static var `default`: Self {
[.graphemeClusterSemantics, .textSegmentGraphemeMode]
Expand Down
66 changes: 56 additions & 10 deletions Sources/_StringProcessing/Regex/Options.swift
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ extension RegexComponent {
/// Returns a regular expression where the start and end of input
/// anchors (`^` and `$`) also match against the start and end of a line.
///
/// This method corresponds to applying the `m` option in a regular
/// expression literal. For this behavior in the `RegexBuilder` syntax, see
/// This method corresponds to applying the `m` option in regex syntax. For
/// this behavior in the `RegexBuilder` syntax, see
/// ``Anchor.startOfLine``, ``Anchor.endOfLine``, ``Anchor.startOfInput``,
/// and ``Anchor.endOfInput``.
///
Expand All @@ -69,16 +69,22 @@ extension RegexComponent {
wrapInOption(.multiline, addingIf: matchLineEndings)
}

/// Returns a regular expression where quantifiers are reluctant by default
/// instead of eager.
/// Returns a regular expression where quantifiers use the specified behavior
/// by default.
///
/// This method corresponds to applying the `U` option in a regular
/// expression literal.
/// This setting does not affect calls to quantifier methods, such as
/// `OneOrMore`, that include an explicit `behavior` parameter.
///
/// - Parameter useReluctantQuantifiers: A Boolean value indicating whether
/// quantifiers should be reluctant by default.
public func reluctantQuantifiers(_ useReluctantQuantifiers: Bool = true) -> Regex<RegexOutput> {
wrapInOption(.reluctantByDefault, addingIf: useReluctantQuantifiers)
/// Passing `.eager` or `.reluctant` to this method corresponds to applying
/// the `(?-U)` or `(?U)` option in regex syntax, respectively.
///
/// - Parameter behavior: The default behavior to use for quantifiers.
public func repetitionBehavior(_ behavior: RegexRepetitionBehavior) -> Regex<RegexOutput> {
if behavior == .possessive {
return wrapInOption(.possessiveByDefault, addingIf: true)
} else {
return wrapInOption(.reluctantByDefault, addingIf: behavior == .reluctant)
}
}

/// Returns a regular expression that matches with the specified semantic
Expand Down Expand Up @@ -183,6 +189,46 @@ public struct RegexWordBoundaryKind: Hashable {
}
}

/// Specifies how much to attempt to match when using a quantifier.
@available(SwiftStdlib 5.7, *)
public struct RegexRepetitionBehavior: Hashable {
internal enum Kind {
case eager
case reluctant
case possessive
}

var kind: Kind

@_spi(RegexBuilder) public var dslTreeKind: AST.Quantification.Kind {
switch kind {
case .eager: return .eager
case .reluctant: return .reluctant
case .possessive: return .possessive
}
}
}

@available(SwiftStdlib 5.7, *)
extension RegexRepetitionBehavior {
/// Match as much of the input string as possible, backtracking when
/// necessary.
public static var eager: Self {
.init(kind: .eager)
}

/// Match as little of the input string as possible, expanding the matched
/// region as necessary to complete a match.
public static var reluctant: Self {
.init(kind: .reluctant)
}

/// Match as much of the input string as possible, performing no backtracking.
public static var possessive: Self {
.init(kind: .possessive)
}
}

// MARK: - Helper method

@available(SwiftStdlib 5.7, *)
Expand Down
15 changes: 14 additions & 1 deletion Sources/_StringProcessing/_CharacterClassModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ public struct _CharacterClassModel: Hashable {
case any
/// Any grapheme cluster
case anyGrapheme
/// Any Unicode scalar
case anyScalar
/// Character.isDigit
case digit
/// Character.isHexDigit
Expand Down Expand Up @@ -155,8 +157,12 @@ public struct _CharacterClassModel: Hashable {
case .graphemeCluster:
let c = str[i]
var matched: Bool
var next = str.index(after: i)
switch cc {
case .any, .anyGrapheme: matched = true
case .anyScalar:
matched = true
next = str.unicodeScalars.index(after: i)
case .digit:
matched = c.isNumber && (c.isASCII || !options.usesASCIIDigits)
case .hexDigit:
Expand All @@ -174,12 +180,13 @@ public struct _CharacterClassModel: Hashable {
if isInverted {
matched.toggle()
}
return matched ? str.index(after: i) : nil
return matched ? next : nil
case .unicodeScalar:
let c = str.unicodeScalars[i]
var matched: Bool
switch cc {
case .any: matched = true
case .anyScalar: matched = true
case .anyGrapheme: fatalError("Not matched in this mode")
case .digit:
matched = c.properties.numericType != nil && (c.isASCII || !options.usesASCIIDigits)
Expand Down Expand Up @@ -224,6 +231,10 @@ extension _CharacterClassModel {
.init(cc: .anyGrapheme, matchLevel: .graphemeCluster)
}

public static var anyUnicodeScalar: _CharacterClassModel {
.init(cc: .any, matchLevel: .unicodeScalar)
}

public static var whitespace: _CharacterClassModel {
.init(cc: .whitespace, matchLevel: .graphemeCluster)
}
Expand Down Expand Up @@ -275,6 +286,7 @@ extension _CharacterClassModel.Representation: CustomStringConvertible {
switch self {
case .any: return "<any>"
case .anyGrapheme: return "<any grapheme>"
case .anyScalar: return "<any scalar>"
case .digit: return "<digit>"
case .hexDigit: return "<hex digit>"
case .horizontalWhitespace: return "<horizontal whitespace>"
Expand Down Expand Up @@ -431,6 +443,7 @@ extension AST.Atom.EscapedBuiltin {
case .notWordCharacter: return .word.inverted

case .graphemeCluster: return .anyGrapheme
case .trueAnychar: return .anyUnicodeScalar

default:
return nil
Expand Down
Loading