Skip to content

Commit 2d7cadc

Browse files
author
Jacob Hearst
committed
Address PR comments
1 parent 25d1e6a commit 2d7cadc

File tree

4 files changed

+51
-38
lines changed

4 files changed

+51
-38
lines changed

Sources/_StringProcessing/ByteCodeGen.swift

+6-3
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ fileprivate extension Compiler.ByteCodeGen {
109109
}
110110

111111
mutating func emitQuotedLiteral(_ s: String) {
112+
assert(!reverse)
112113
guard options.semanticLevel == .graphemeCluster else {
113114
for char in s {
114115
for scalar in char.unicodeScalars {
@@ -137,6 +138,7 @@ fileprivate extension Compiler.ByteCodeGen {
137138
}
138139

139140
mutating func emitReverseQuotedLiteral(_ s: String) {
141+
assert(reverse)
140142
guard options.semanticLevel == .graphemeCluster else {
141143
for char in s {
142144
for scalar in char.unicodeScalars.reversed() {
@@ -408,13 +410,14 @@ fileprivate extension Compiler.ByteCodeGen {
408410
_ kind: (forwards: Bool, positive: Bool),
409411
_ child: DSLTree.Node
410412
) throws {
413+
let previousReverse = reverse
411414
reverse = !kind.forwards
412415
if kind.positive {
413416
try emitPositiveLookaround(child)
414417
} else {
415418
try emitNegativeLookaround(child)
416419
}
417-
reverse = false
420+
reverse = previousReverse
418421
}
419422

420423
mutating func emitAtomicNoncapturingGroup(
@@ -1116,9 +1119,9 @@ fileprivate extension Compiler.ByteCodeGen {
11161119
if let asciiBitset = ccc.asAsciiBitset(options),
11171120
optimizationsEnabled {
11181121
if options.semanticLevel == .unicodeScalar {
1119-
builder.buildScalarMatchAsciiBitset(asciiBitset)
1122+
builder.buildScalarMatchAsciiBitset(asciiBitset, reverse: reverse)
11201123
} else {
1121-
builder.buildMatchAsciiBitset(asciiBitset)
1124+
builder.buildMatchAsciiBitset(asciiBitset, reverse: reverse)
11221125
}
11231126
return
11241127
}

Sources/_StringProcessing/Engine/MEBuilder.swift

+8-4
Original file line numberDiff line numberDiff line change
@@ -194,17 +194,21 @@ extension MEProgram.Builder {
194194
}
195195

196196
mutating func buildMatchAsciiBitset(
197-
_ b: DSLTree.CustomCharacterClass.AsciiBitset
197+
_ b: DSLTree.CustomCharacterClass.AsciiBitset,
198+
reverse: Bool
198199
) {
200+
let opcode = reverse ? Instruction.OpCode.reverseMatchBitset : .matchBitset
199201
instructions.append(.init(
200-
.matchBitset, .init(bitset: makeAsciiBitset(b), isScalar: false)))
202+
opcode, .init(bitset: makeAsciiBitset(b), isScalar: false)))
201203
}
202204

203205
mutating func buildScalarMatchAsciiBitset(
204-
_ b: DSLTree.CustomCharacterClass.AsciiBitset
206+
_ b: DSLTree.CustomCharacterClass.AsciiBitset,
207+
reverse: Bool
205208
) {
209+
let opcode = reverse ? Instruction.OpCode.reverseMatchBitset : .matchBitset
206210
instructions.append(.init(
207-
.matchBitset, .init(bitset: makeAsciiBitset(b), isScalar: true)))
211+
opcode, .init(bitset: makeAsciiBitset(b), isScalar: true)))
208212
}
209213

210214
mutating func buildMatchBuiltin(model: _CharacterClassModel, reverse: Bool) {

Sources/_StringProcessing/Engine/MEBuiltins.swift

+8-2
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,10 @@ extension String {
163163
/// - Returns: The character at `pos`, bounded by `end`, if it exists, along
164164
/// with the upper bound of that character. The upper bound is always
165165
/// scalar-aligned.
166-
func characterAndEnd(at pos: String.Index, limitedBy end: String.Index) -> (Character, String.Index)? {
166+
func characterAndEnd(
167+
at pos: String.Index,
168+
limitedBy end: String.Index
169+
) -> (Character, characterEnd: String.Index)? {
167170
// FIXME: Sink into the stdlib to avoid multiple boundary calculations
168171
guard pos < end else { return nil }
169172
let next = index(after: pos)
@@ -204,7 +207,10 @@ extension String {
204207
/// - Returns: The character at `pos`, bounded by `start`, if it exists, along
205208
/// with the lower bound of that character. The lower bound is always
206209
/// scalar-aligned.
207-
func characterAndStart(at pos: String.Index, limitedBy start: String.Index) -> (Character, String.Index)? {
210+
func characterAndStart(
211+
at pos: String.Index,
212+
limitedBy start: String.Index
213+
) -> (Character, characterStart: String.Index)? {
208214
// FIXME: Sink into the stdlib to avoid multiple boundary calculations
209215
guard pos > start else { return nil }
210216
let previous = index(before: pos)

Tests/RegexTests/MatchTests.swift

+29-29
Original file line numberDiff line numberDiff line change
@@ -1587,9 +1587,8 @@ extension RegexTests {
15871587
#"(*positive_lookbehind:USD)\d+"#,
15881588
input: "Price: USD100", match: "100")
15891589

1590-
// TODO: Why is a match not found when unoptimized?
15911590
firstMatchTest(
1592-
#"\d{3}(?<=USD\d{3})"#, input: "Price: USD100", match: "100", validateOptimizations: false)
1591+
#"\d{3}(?<=USD\d{3})"#, input: "Price: USD100", match: "100")
15931592

15941593
firstMatchTest(
15951594
#"(?<!USD)\d+"#, input: "Price: JYP100", match: "100")
@@ -1602,33 +1601,34 @@ extension RegexTests {
16021601
firstMatchTest(
16031602
#"\d{3}(?<!USD\d{3})"#, input: "Price: JYP100", match: "100")
16041603

1605-
firstMatchTest(#"(?<=abc)def"#, input: "abcdefg", match: "def", validateOptimizations: false)
1606-
firstMatchTests(
1607-
#"(?<=az|b|c)def"#,
1608-
("azdefg", "def"),
1609-
("bdefg", "def"),
1610-
("cdefg", "def"),
1611-
("123defg", nil),
1612-
validateOptimizations: false
1613-
)
1614-
1615-
// FIXME: quickMatch and thoroughMatch have different results
1616-
firstMatchTest(
1617-
#"(?<=\d{1,3}-.{1,3}-\d{1,3})suffix"#,
1618-
input: "123-_+/-789suffix",
1619-
match: "suffix",
1620-
validateOptimizations: false
1621-
)
1622-
1623-
firstMatchTests(
1624-
#"(?<=^\d{1,3})abc"#,
1625-
("123abc", "abc"),
1626-
("12abc", "abc"),
1627-
("1abc", "abc"),
1628-
("1234abc", nil), // FIXME: Shouldn't match but does because `^` assertions are broken
1629-
("z123abc", nil), // FIXME: Same as above
1630-
validateOptimizations: false
1631-
)
1604+
firstMatchTest(#"(?<=abc)def"#, input: "abcdefg", match: "def")
1605+
firstMatchTests(
1606+
#"(?<=az|b|c)def"#,
1607+
("azdefg", "def"),
1608+
("bdefg", "def"),
1609+
("cdefg", "def"),
1610+
("123defg", nil),
1611+
validateOptimizations: false
1612+
)
1613+
1614+
firstMatchTest(#"abcd(?<=bc(?=d).)"#, input: "abcdefg", match: "abcd")
1615+
1616+
// FIXME: quickMatch and thoroughMatch have different results
1617+
// firstMatchTest(
1618+
// #"(?<=\d{1,3}-.{1,3}-\d{1,3})suffix"#,
1619+
// input: "123-_+/-789suffix",
1620+
// match: "suffix",
1621+
// validateOptimizations: false
1622+
// )
1623+
1624+
firstMatchTests(
1625+
#"(?<=^\d{1,3})abc"#,
1626+
("123abc", "abc"),
1627+
("12abc", "abc"),
1628+
("1abc", "abc"),
1629+
("1234abc", nil), // FIXME: Shouldn't match but does because `^` assertions are broken
1630+
("z123abc", nil) // FIXME: Same as above
1631+
)
16321632
}
16331633

16341634
func testMatchAnchors() throws {

0 commit comments

Comments
 (0)