Skip to content

Commit 2e8d955

Browse files
authored
Fix word boundary matching in matches(of:) (#744) (#745)
A prior change incorrectly treated the start of the `searchBounds` as an always-valid word boundary, which resulted in a word boundary at the start of a pattern unconditionally matching at the restart point when finding all matches in a string. This change corrects the usage to recognize word boundaries only at the start of the `subjectBounds`. rdar://129417643
1 parent 5c93227 commit 2e8d955

File tree

2 files changed

+38
-2
lines changed

2 files changed

+38
-2
lines changed

Sources/_StringProcessing/Engine/MEBuiltins.swift

+2-2
Original file line numberDiff line numberDiff line change
@@ -105,14 +105,14 @@ extension Processor {
105105
if payload.usesSimpleUnicodeBoundaries {
106106
return atSimpleBoundary(payload.usesASCIIWord, payload.semanticLevel)
107107
} else {
108-
return input.isOnWordBoundary(at: currentPosition, in: searchBounds, using: &wordIndexCache, &wordIndexMaxIndex)
108+
return input.isOnWordBoundary(at: currentPosition, in: subjectBounds, using: &wordIndexCache, &wordIndexMaxIndex)
109109
}
110110

111111
case .notWordBoundary:
112112
if payload.usesSimpleUnicodeBoundaries {
113113
return !atSimpleBoundary(payload.usesASCIIWord, payload.semanticLevel)
114114
} else {
115-
return !input.isOnWordBoundary(at: currentPosition, in: searchBounds, using: &wordIndexCache, &wordIndexMaxIndex)
115+
return !input.isOnWordBoundary(at: currentPosition, in: subjectBounds, using: &wordIndexCache, &wordIndexMaxIndex)
116116
}
117117
}
118118
}

Tests/RegexTests/MatchTests.swift

+36
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,33 @@ func matchTest(
256256

257257
// TODO: Adjust below to also check captures
258258

259+
/// Test all matches in a string, using `matches(of:)`.
260+
func allMatchesTest(
261+
_ regex: String,
262+
input: String,
263+
matches: [Substring],
264+
xfail: Bool = false,
265+
semanticLevel: RegexSemanticLevel = .graphemeCluster,
266+
file: StaticString = #filePath,
267+
line: UInt = #line
268+
) {
269+
do {
270+
let regex = try Regex(regex).matchingSemantics(semanticLevel)
271+
let allMatches = input.matches(of: regex).map(\.0)
272+
273+
if xfail {
274+
XCTAssertNotEqual(allMatches, matches, file: file, line: line)
275+
} else {
276+
XCTAssertEqual(allMatches, matches, "Incorrect match", file: file, line: line)
277+
}
278+
} catch {
279+
if !xfail {
280+
XCTFail("\(error)", file: file, line: line)
281+
}
282+
return
283+
}
284+
}
285+
259286
/// Test the first match in a string, via `firstRange(of:)`
260287
func firstMatchTest(
261288
_ regex: String,
@@ -1667,6 +1694,15 @@ extension RegexTests {
16671694
("123", "23"),
16681695
(" 123", "23"),
16691696
("123 456", "23"))
1697+
1698+
allMatchesTest(
1699+
#"\b\w"#,
1700+
input: "ab cd efgh",
1701+
matches: ["a", "c", "e"])
1702+
allMatchesTest(
1703+
#"\B\w"#,
1704+
input: "ab cd efgh",
1705+
matches: ["b", "d", "f", "g", "h"])
16701706

16711707
let defaultBoundaryRegex = try Regex(#"\b.{3}X.{3}\b"#)
16721708
// Default word boundaries match at the start/end of a string/line.

0 commit comments

Comments
 (0)