Skip to content

Commit 858839e

Browse files
committed
Fix bug in word boundary caching
1 parent c75631d commit 858839e

File tree

2 files changed

+17
-6
lines changed

2 files changed

+17
-6
lines changed

Sources/_StringProcessing/Unicode/WordBreaking.swift

+5-5
Original file line numberDiff line numberDiff line change
@@ -79,18 +79,18 @@ extension String {
7979
}
8080

8181
if #available(SwiftStdlib 5.7, *) {
82-
var indices: Set<String.Index> = []
82+
if cache == nil {
83+
cache = []
84+
}
8385
var j = maxIndex ?? range.lowerBound
8486

8587
while j < range.upperBound, j <= i {
86-
indices.insert(j)
88+
cache!.insert(j)
8789
j = _wordIndex(after: j)
8890
}
8991

90-
cache = indices
9192
maxIndex = j
92-
93-
return indices.contains(i)
93+
return cache!.contains(i)
9494
} else {
9595
return false
9696
}

Tests/RegexTests/MatchTests.swift

+12-1
Original file line numberDiff line numberDiff line change
@@ -2381,7 +2381,18 @@ extension RegexTests {
23812381
XCTAssertTrue("cafe".contains(caseInsensitiveRegex))
23822382
XCTAssertTrue("CaFe".contains(caseInsensitiveRegex))
23832383
}
2384-
2384+
2385+
// https://github.com/swiftlang/swift-experimental-string-processing/issues/768
2386+
func testWordBoundaryCaching() throws {
2387+
// This will first find word boundaries up til the middle before failing,
2388+
// then it will find word boundaries til late in the string, then fail,
2389+
// and finally should succeed on a word boundary cached from the first
2390+
// attempt.
2391+
let input = "first second third fourth"
2392+
let regex = try Regex(#".*second\bX|.*third\bX|.*first\b"#)
2393+
XCTAssertTrue(input.contains(regex))
2394+
}
2395+
23852396
// MARK: Character Semantics
23862397

23872398
var eComposed: String { "é" }

0 commit comments

Comments
 (0)