diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index 3cf6bd6f2..00afca28b 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -308,31 +308,48 @@ fileprivate extension Compiler.ByteCodeGen { try emitNode(node) } - mutating func emitLookaround( - _ kind: (forwards: Bool, positive: Bool), - _ child: DSLTree.Node - ) throws { - guard kind.forwards else { - throw Unsupported("backwards assertions") - } + mutating func emitPositiveLookahead(_ child: DSLTree.Node) throws { + /* + save(restoringAt: success) + save(restoringAt: intercept) + // failure restores at intercept + clearThrough(intercept) // remove intercept and any leftovers from + fail(preservingCaptures: true) // ->success + intercept: + clearSavePoint // remove success + fail // propagate failure + success: + ... + */ + let intercept = builder.makeAddress() + let success = builder.makeAddress() + + builder.buildSave(success) + builder.buildSave(intercept) + try emitNode(child) + builder.buildClearThrough(intercept) + builder.buildFail(preservingCaptures: true) // Lookahead succeeds here + + builder.label(intercept) + builder.buildClear() + builder.buildFail() - let positive = kind.positive + builder.label(success) + } + + mutating func emitNegativeLookahead(_ child: DSLTree.Node) throws { /* save(restoringAt: success) save(restoringAt: intercept) // failure restores at intercept clearThrough(intercept) // remove intercept and any leftovers from - : - clearSavePoint // remove success - fail // positive->success, negative propagates + clearSavePoint // remove success + fail // propagate failure intercept: - : - clearSavePoint // remove success - fail // positive propagates, negative->success + fail // ->success success: ... */ - let intercept = builder.makeAddress() let success = builder.makeAddress() @@ -340,19 +357,28 @@ fileprivate extension Compiler.ByteCodeGen { builder.buildSave(intercept) try emitNode(child) builder.buildClearThrough(intercept) - if !positive { - builder.buildClear() - } + builder.buildClear() builder.buildFail() builder.label(intercept) - if positive { - builder.buildClear() - } builder.buildFail() builder.label(success) } + + mutating func emitLookaround( + _ kind: (forwards: Bool, positive: Bool), + _ child: DSLTree.Node + ) throws { + guard kind.forwards else { + throw Unsupported("backwards assertions") + } + if kind.positive { + try emitPositiveLookahead(child) + } else { + try emitNegativeLookahead(child) + } + } mutating func emitAtomicNoncapturingGroup( _ child: DSLTree.Node @@ -361,8 +387,8 @@ fileprivate extension Compiler.ByteCodeGen { save(continuingAt: success) save(restoringAt: intercept) // failure restores at intercept - clearThrough(intercept) // remove intercept and any leftovers from - fail // ->success + clearThrough(intercept) // remove intercept and any leftovers from + fail(preservingCaptures: true) // ->success intercept: clearSavePoint // remove success fail // propagate failure @@ -377,7 +403,7 @@ fileprivate extension Compiler.ByteCodeGen { builder.buildSave(intercept) try emitNode(child) builder.buildClearThrough(intercept) - builder.buildFail() + builder.buildFail(preservingCaptures: true) // Atomic group succeeds here builder.label(intercept) builder.buildClear() diff --git a/Sources/_StringProcessing/Engine/InstPayload.swift b/Sources/_StringProcessing/Engine/InstPayload.swift index 5f038449c..452ed8bc3 100644 --- a/Sources/_StringProcessing/Engine/InstPayload.swift +++ b/Sources/_StringProcessing/Engine/InstPayload.swift @@ -211,11 +211,11 @@ extension Instruction.Payload { self.rawValue == 1 } - init(bool: BoolRegister) { - self.init(bool) + init(bool: Bool) { + self.init(bool ? 1 : 0, 0) } - var bool: BoolRegister { - interpret() + var boolPayload: Bool { + interpret(as: TypedInt.self) == 1 } init(element: ElementRegister, isCaseInsensitive: Bool) { diff --git a/Sources/_StringProcessing/Engine/MEBuilder.swift b/Sources/_StringProcessing/Engine/MEBuilder.swift index 1f5611f98..844ead08e 100644 --- a/Sources/_StringProcessing/Engine/MEBuilder.swift +++ b/Sources/_StringProcessing/Engine/MEBuilder.swift @@ -146,8 +146,8 @@ extension MEProgram.Builder { instructions.append(.init(.clearThrough)) fixup(to: t) } - mutating func buildFail() { - instructions.append(.init(.fail)) + mutating func buildFail(preservingCaptures: Bool = false) { + instructions.append(.init(.fail, .init(bool: preservingCaptures))) } mutating func buildAdvance(_ n: Distance) { diff --git a/Sources/_StringProcessing/Engine/MECapture.swift b/Sources/_StringProcessing/Engine/MECapture.swift index 9bb4ecb06..1964c9b8e 100644 --- a/Sources/_StringProcessing/Engine/MECapture.swift +++ b/Sources/_StringProcessing/Engine/MECapture.swift @@ -37,7 +37,7 @@ extension Processor { var value: Any? = nil // An in-progress capture start - fileprivate var currentCaptureBegin: Position? = nil + var currentCaptureBegin: Position? = nil fileprivate func _invariantCheck() { if range == nil { diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index 0de7c2f70..48c79fc8d 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -331,7 +331,7 @@ extension Processor { return true } - mutating func signalFailure() { + mutating func signalFailure(preservingCaptures: Bool = false) { guard !savePoints.isEmpty else { state = .fail return @@ -362,10 +362,14 @@ extension Processor { controller.pc = pc currentPosition = pos ?? currentPosition callStack.removeLast(callStack.count - stackEnd.rawValue) - storedCaptures = capEnds registers.ints = intRegisters registers.positions = posRegisters + if !preservingCaptures { + // Reset all capture information + storedCaptures = capEnds + } + metrics.addBacktrack() } @@ -479,7 +483,8 @@ extension Processor { tryAccept() case .fail: - signalFailure() + let preservingCaptures = payload.boolPayload + signalFailure(preservingCaptures: preservingCaptures) case .advance: let (isScalar, distance) = payload.distance diff --git a/Tests/RegexBuilderTests/AlgorithmsTests.swift b/Tests/RegexBuilderTests/AlgorithmsTests.swift index b85395d19..d9d27e7a1 100644 --- a/Tests/RegexBuilderTests/AlgorithmsTests.swift +++ b/Tests/RegexBuilderTests/AlgorithmsTests.swift @@ -256,6 +256,28 @@ class AlgorithmsResultBuilderTests: XCTestCase { "+" int } + + let ref1 = Reference() + let ref2 = Reference() + try expectMatch( + .first, + ("ABBAB", ("ABBAB", "A", "B")), + ("defABBAdefB", ("defABBAdefB", "A", "B")), + matchType: (Substring, Substring, Substring).self, + equivalence: == + ) { + Anchor.startOfSubject + Lookahead { + ZeroOrMore(.any) + Capture(as: ref1) { One(.any) } + Capture(as: ref2) { One(.any) } + ref2 + ref1 + } + OneOrMore(.any) + ref2 + Anchor.endOfSubject + } } func testStartsAndContains() throws { diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 0de71f532..7788f9538 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -1804,8 +1804,7 @@ extension RegexTests { firstMatchTests( #"(?>(\d+))\w+\1"#, (input: "23x23", match: "23x23"), - (input: "123x23", match: "23x23"), - xfail: true) + (input: "123x23", match: "23x23")) // Backreferences in scalar mode // In scalar mode the backreference should not match @@ -1823,12 +1822,10 @@ extension RegexTests { (input: "abbba", match: nil), (input: "ABBA", match: nil), (input: "defABBAdef", match: nil)) - // FIXME: Backreferences don't escape positive lookaheads firstMatchTests( #"^(?=.*(.)(.)\2\1).+\2$"#, (input: "ABBAB", match: "ABBAB"), - (input: "defABBAdefB", match: "defABBAdefB"), - xfail: true) + (input: "defABBAdefB", match: "defABBAdefB")) firstMatchTests( #"^(?!.*(.)(.)\2\1).+$"#, @@ -2771,6 +2768,23 @@ extension RegexTests { } } + func testIssue713() throws { + // Original report from https://github.com/apple/swift-experimental-string-processing/issues/713 + let originalInput = "Something 9a" + let originalRegex = #/(?=([1-9]|(a|b)))/# + let originalOutput = originalInput.matches(of: originalRegex).map(\.output) + XCTAssert(originalOutput[0] == ("", "9", nil)) + XCTAssert(originalOutput[1] == ("", "a", "a")) + + let simplifiedRegex = #/(?=(9))/# + let simplifiedOutput = originalInput.matches(of: simplifiedRegex).map(\.output) + XCTAssert(simplifiedOutput[0] == ("", "9")) + + let additionalRegex = #/(a+)b(a+)/# + let additionalInput = "abaaba" + XCTAssertNil(additionalInput.wholeMatch(of: additionalRegex)) + } + func testNSRECompatibility() throws { // NSRE-compatibility includes scalar matching, so `[\r\n]` should match // either `\r` or `\n`.