diff --git a/Sources/_StringProcessing/Unicode/WordBreaking.swift b/Sources/_StringProcessing/Unicode/WordBreaking.swift index 10aadde32..a34cf2e96 100644 --- a/Sources/_StringProcessing/Unicode/WordBreaking.swift +++ b/Sources/_StringProcessing/Unicode/WordBreaking.swift @@ -79,18 +79,18 @@ extension String { } if #available(SwiftStdlib 5.7, *) { - var indices: Set = [] + if cache == nil { + cache = [] + } var j = maxIndex ?? range.lowerBound while j < range.upperBound, j <= i { - indices.insert(j) + cache!.insert(j) j = _wordIndex(after: j) } - cache = indices maxIndex = j - - return indices.contains(i) + return cache!.contains(i) } else { return false } diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index e910ac318..1b5c67e0d 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -2381,7 +2381,18 @@ extension RegexTests { XCTAssertTrue("cafe".contains(caseInsensitiveRegex)) XCTAssertTrue("CaFe".contains(caseInsensitiveRegex)) } - + + // https://github.com/swiftlang/swift-experimental-string-processing/issues/768 + func testWordBoundaryCaching() throws { + // This will first find word boundaries up til the middle before failing, + // then it will find word boundaries til late in the string, then fail, + // and finally should succeed on a word boundary cached from the first + // attempt. + let input = "first second third fourth" + let regex = try Regex(#".*second\bX|.*third\bX|.*first\b"#) + XCTAssertTrue(input.contains(regex)) + } + // MARK: Character Semantics var eComposed: String { "é" }