Skip to content

Commit 49418d2

Browse files
authored
Merge pull request swiftlang#98 from hamishknight/time-and-place
2 parents e9e8e08 + bdeec8e commit 49418d2

15 files changed

+288
-115
lines changed

Sources/_MatchingEngine/Regex/AST/AST.swift

+12-3
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,20 @@ extension AST {
9292

9393
public struct Alternation: Hashable, _ASTNode {
9494
public let children: [AST]
95-
public let location: SourceLocation
95+
public let pipes: [SourceLocation]
96+
97+
public init(_ mems: [AST], pipes: [SourceLocation]) {
98+
// An alternation must have at least two branches (though the branches
99+
// may be empty AST nodes), and n - 1 pipes.
100+
precondition(mems.count >= 2)
101+
precondition(pipes.count == mems.count - 1)
96102

97-
public init(_ mems: [AST], _ location: SourceLocation) {
98103
self.children = mems
99-
self.location = location
104+
self.pipes = pipes
105+
}
106+
107+
public var location: SourceLocation {
108+
.init(children.first!.location.start ..< children.last!.location.end)
100109
}
101110
}
102111

Sources/_MatchingEngine/Regex/AST/Atom.swift

+1-1
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ extension AST.Atom {
314314

315315
public var _dumpBase: String {
316316
// FIXME: better printing...
317-
"\(kind)\(isInverted)"
317+
"\(kind)\(isInverted)\(isPOSIX)"
318318
}
319319
}
320320
}

Sources/_MatchingEngine/Regex/AST/CustomCharClass.swift

+27-1
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,25 @@ extension AST {
2121
case custom(CustomCharacterClass)
2222

2323
/// A character range `a-z`
24-
case range(Atom, Atom)
24+
case range(Range)
2525

2626
/// A single character or escape
2727
case atom(Atom)
2828

2929
/// A binary operator applied to sets of members `abc&&def`
3030
case setOperation([Member], Located<SetOp>, [Member])
3131
}
32+
public struct Range: Hashable {
33+
public var lhs: Atom
34+
public var dashLoc: SourceLocation
35+
public var rhs: Atom
36+
37+
public init(_ lhs: Atom, _ dashLoc: SourceLocation, _ rhs: Atom) {
38+
self.lhs = lhs
39+
self.dashLoc = dashLoc
40+
self.rhs = rhs
41+
}
42+
}
3243
public enum SetOp: String, Hashable {
3344
case subtraction = "--"
3445
case intersection = "&&"
@@ -45,3 +56,18 @@ extension AST {
4556
extension AST.CustomCharacterClass {
4657
public var isInverted: Bool { start.value == .inverted }
4758
}
59+
60+
extension CustomCC.Member {
61+
private var _associatedValue: Any {
62+
switch self {
63+
case .custom(let c): return c
64+
case .range(let r): return r
65+
case .atom(let a): return a
66+
case .setOperation(let lhs, let op, let rhs): return (lhs, op, rhs)
67+
}
68+
}
69+
70+
func `as`<T>(_ t: T.Type = T.self) -> T? {
71+
_associatedValue as? T
72+
}
73+
}

Sources/_MatchingEngine/Regex/AST/Group.swift

+8-9
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,9 @@ extension AST {
5454
case atomicScriptRun
5555

5656
// (?iJmnsUxxxDPSWy{..}-iJmnsUxxxDPSW:)
57-
// If hasImplicitScope is true, it was written as e.g (?i), and implicitly
58-
// forms a group containing all the following elements of the current
59-
// group.
60-
case changeMatchingOptions(MatchingOptionSequence, hasImplicitScope: Bool)
57+
// Isolated options are written as e.g (?i), and implicitly form a group
58+
// containing all the following elements of the current group.
59+
case changeMatchingOptions(MatchingOptionSequence, isIsolated: Bool)
6160

6261
// NOTE: Comments appear to be groups, but are not parsed
6362
// the same. They parse more like quotes, so are not
@@ -74,16 +73,16 @@ extension AST.Group.Kind {
7473
}
7574
}
7675

77-
/// Whether this is a group with an implicit scope, e.g matching options
78-
/// written as (?i) implicitly become parent groups for the rest of the
79-
/// elements in the current group:
76+
/// Whether this is a group with an implicit scope, e.g isolated matching
77+
/// options implicitly become parent groups for the rest of the elements in
78+
/// the current group:
8079
///
8180
/// (a(?i)bc)de -> (a(?i:bc))de
8281
///
8382
public var hasImplicitScope: Bool {
8483
switch self {
85-
case .changeMatchingOptions(_, let hasImplicitScope):
86-
return hasImplicitScope
84+
case .changeMatchingOptions(_, let isIsolated):
85+
return isIsolated
8786
default:
8887
return false
8988
}

Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift

+10-5
Original file line numberDiff line numberDiff line change
@@ -602,7 +602,7 @@ extension Source {
602602
// Matching option changing group (?iJmnsUxxxDPSWy{..}-iJmnsUxxxDPSW:).
603603
if let seq = try src.lexMatchingOptionSequence() {
604604
if src.tryEat(":") {
605-
return .changeMatchingOptions(seq, hasImplicitScope: false)
605+
return .changeMatchingOptions(seq, isIsolated: false)
606606
}
607607
// If this isn't start of an explicit group, we should have an
608608
// implicit group that covers the remaining elements of the current
@@ -611,7 +611,7 @@ extension Source {
611611
// also does it across alternations, which will require additional
612612
// handling.
613613
try src.expect(")")
614-
return .changeMatchingOptions(seq, hasImplicitScope: true)
614+
return .changeMatchingOptions(seq, isIsolated: true)
615615
}
616616

617617
guard let next = src.peek() else {
@@ -1026,14 +1026,19 @@ extension Source {
10261026
/// of a '-' character followed by an atom.
10271027
mutating func lexCustomCharClassRangeEnd(
10281028
priorGroupCount: Int
1029-
) throws -> AST.Atom? {
1029+
) throws -> (dashLoc: SourceLocation, AST.Atom)? {
10301030
// Make sure we don't have a binary operator e.g '--', and the '-' is not
10311031
// ending the custom character class (in which case it is literal).
1032+
let start = currentPosition
10321033
guard peekCCBinOp() == nil && !starts(with: "-]") && tryEat("-") else {
10331034
return nil
10341035
}
1035-
return try lexAtom(isInCustomCharacterClass: true,
1036-
priorGroupCount: priorGroupCount)
1036+
let dashLoc = Location(start ..< currentPosition)
1037+
guard let end = try lexAtom(isInCustomCharacterClass: true,
1038+
priorGroupCount: priorGroupCount) else {
1039+
return nil
1040+
}
1041+
return (dashLoc, end)
10371042
}
10381043
}
10391044

Sources/_MatchingEngine/Regex/Parse/Parse.swift

+9-6
Original file line numberDiff line numberDiff line change
@@ -89,17 +89,20 @@ extension Parser {
8989

9090
if source.isEmpty { return .empty(.init(loc(_start))) }
9191

92-
var result = Array<AST>(singleElement: try parseConcatenation())
93-
while source.tryEat("|") {
94-
// TODO: track pipe locations too...
92+
var result = [try parseConcatenation()]
93+
var pipes: [SourceLocation] = []
94+
while true {
95+
let pipeStart = source.currentPosition
96+
guard source.tryEat("|") else { break }
97+
pipes.append(loc(pipeStart))
9598
result.append(try parseConcatenation())
9699
}
97100

98101
if result.count == 1 {
99102
return result[0]
100103
}
101104

102-
return .alternation(.init(result, loc(_start)))
105+
return .alternation(.init(result, pipes: pipes))
103106
}
104107

105108
/// Parse a term, potentially separated from others by `|`
@@ -266,14 +269,14 @@ extension Parser {
266269
else { break }
267270

268271
// Range between atoms.
269-
if let rhs = try source.lexCustomCharClassRangeEnd(
272+
if let (dashLoc, rhs) = try source.lexCustomCharClassRangeEnd(
270273
priorGroupCount: priorGroupCount
271274
) {
272275
guard atom.literalCharacterValue != nil &&
273276
rhs.literalCharacterValue != nil else {
274277
throw ParseError.invalidCharacterClassRangeOperand
275278
}
276-
members.append(.range(atom, rhs))
279+
members.append(.range(.init(atom, dashLoc, rhs)))
277280
continue
278281
}
279282

Sources/_MatchingEngine/Regex/Printing/DumpAST.swift

+9-4
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ extension AST.Group.Kind: _ASTPrintable {
120120
case .nonAtomicLookbehind: return "nonAtomicLookbehind"
121121
case .scriptRun: return "scriptRun"
122122
case .atomicScriptRun: return "atomicScriptRun"
123-
case .changeMatchingOptions(let seq, let hasImplicitScope):
124-
return "changeMatchingOptions<\(seq), \(hasImplicitScope)>"
123+
case .changeMatchingOptions(let seq, let isIsolated):
124+
return "changeMatchingOptions<\(seq), \(isIsolated)>"
125125
}
126126
}
127127
}
@@ -185,10 +185,15 @@ extension AST.CustomCharacterClass.Member: _ASTPrintable {
185185
switch self {
186186
case .custom(let cc): return "\(cc)"
187187
case .atom(let a): return "\(a)"
188-
case .range(let lhs, let rhs):
189-
return "range \(lhs) to \(rhs)"
188+
case .range(let r): return "\(r)"
190189
case .setOperation(let lhs, let op, let rhs):
191190
return "op \(lhs) \(op.value) \(rhs)"
192191
}
193192
}
194193
}
194+
195+
extension AST.CustomCharacterClass.Range: _ASTPrintable {
196+
public var _dumpBase: String {
197+
"\(lhs)-\(rhs)"
198+
}
199+
}

Sources/_MatchingEngine/Regex/Printing/PrintAsCanonical.swift

+3-3
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,10 @@ extension PrettyPrinter {
8585
switch member {
8686
case .custom(let ccc):
8787
outputAsCanonical(ccc)
88-
case .range(let a, let b):
89-
output(a._canonicalBase)
88+
case .range(let r):
89+
output(r.lhs._canonicalBase)
9090
output("-")
91-
output(b._canonicalBase)
91+
output(r.rhs._canonicalBase)
9292
case .atom(let a):
9393
output(a._canonicalBase)
9494
case .setOperation:

Sources/_MatchingEngine/Regex/Printing/PrintAsPattern.swift

+4-4
Original file line numberDiff line numberDiff line change
@@ -139,16 +139,16 @@ extension PrettyPrinter {
139139
switch member {
140140
case .custom(let ccc):
141141
printAsPattern(ccc)
142-
case .range(let a, let b):
143-
if let lhs = a.literalStringValue,
144-
let rhs = b.literalStringValue {
142+
case .range(let r):
143+
if let lhs = r.lhs.literalStringValue,
144+
let rhs = r.rhs.literalStringValue {
145145
indent()
146146
output(lhs._quoted)
147147
output("...")
148148
output(rhs._quoted)
149149
terminateLine()
150150
} else {
151-
print("// TODO: Range \(a) to \(b)")
151+
print("// TODO: Range \(r.lhs) to \(r.rhs)")
152152
}
153153
case .atom(let a):
154154
if let s = a.literalStringValue {

Sources/_MatchingEngine/Utility/Misc_2.swift

+4
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ extension Collection {
9393
distance(from: startIndex, to: i)
9494
}
9595

96+
public func offsets(of r: Range<Index>) -> Range<Int> {
97+
offset(of: r.lowerBound) ..< offset(of: r.upperBound)
98+
}
99+
96100
public func convertByOffset<
97101
C: Collection
98102
>(_ range: Range<Index>, in c: C) -> Range<C.Index> {

Sources/_StringProcessing/ASTBuilder.swift

+11-5
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ AST.
1717
import _MatchingEngine
1818

1919
func alt(_ asts: [AST]) -> AST {
20-
.alternation(.init(asts, .fake))
20+
return .alternation(
21+
.init(asts, pipes: Array(repeating: .fake, count: asts.count - 1))
22+
)
2123
}
2224
func alt(_ asts: AST...) -> AST {
2325
alt(asts)
@@ -90,9 +92,9 @@ public func atomicScriptRun(_ child: AST) -> AST {
9092
group(.atomicScriptRun, child)
9193
}
9294
func changeMatchingOptions(
93-
_ seq: AST.MatchingOptionSequence, hasImplicitScope: Bool, _ child: AST
95+
_ seq: AST.MatchingOptionSequence, isIsolated: Bool, _ child: AST
9496
) -> AST {
95-
group(.changeMatchingOptions(seq, hasImplicitScope: hasImplicitScope), child)
97+
group(.changeMatchingOptions(seq, isIsolated: isIsolated), child)
9698
}
9799

98100
func matchingOptions(
@@ -261,9 +263,13 @@ func prop_m(
261263
) -> AST.CustomCharacterClass.Member {
262264
atom_m(.property(.init(kind, isInverted: inverted, isPOSIX: false)))
263265
}
266+
func range_m(
267+
_ lower: AST.Atom, _ upper: AST.Atom
268+
) -> AST.CustomCharacterClass.Member {
269+
.range(.init(lower, .fake, upper))
270+
}
264271
func range_m(
265272
_ lower: AST.Atom.Kind, _ upper: AST.Atom.Kind
266273
) -> AST.CustomCharacterClass.Member {
267-
.range(atom_a(lower), atom_a(upper))
274+
range_m(atom_a(lower), atom_a(upper))
268275
}
269-

Sources/_StringProcessing/CharacterClass.swift

+3-3
Original file line numberDiff line numberDiff line change
@@ -384,10 +384,10 @@ extension AST.CustomCharacterClass {
384384
return nil
385385
}
386386
result.append(.characterClass(cc))
387-
case .range(let lhs, let rhs):
387+
case .range(let r):
388388
result.append(.range(
389-
lhs.literalCharacterValue! ...
390-
rhs.literalCharacterValue!))
389+
r.lhs.literalCharacterValue! ...
390+
r.rhs.literalCharacterValue!))
391391

392392
case .atom(let a):
393393
if let cc = a.characterClass {

Sources/_StringProcessing/ConsumerInterface.swift

+5-5
Original file line numberDiff line numberDiff line change
@@ -117,12 +117,12 @@ extension AST.CustomCharacterClass.Member {
117117
case .custom(let ccc):
118118
return try ccc.generateConsumer(opts)
119119

120-
case .range(let lower, let upper):
121-
guard let lhs = lower.literalCharacterValue else {
122-
throw unsupported("\(lower) in range")
120+
case .range(let r):
121+
guard let lhs = r.lhs.literalCharacterValue else {
122+
throw unsupported("\(r.lhs) in range")
123123
}
124-
guard let rhs = upper.literalCharacterValue else {
125-
throw unsupported("\(upper) in range")
124+
guard let rhs = r.rhs.literalCharacterValue else {
125+
throw unsupported("\(r.rhs) in range")
126126
}
127127

128128
return { input, bounds in

0 commit comments

Comments
 (0)