Skip to content

Commit 56cddfa

Browse files
committed
Merge pull request #738 from Azoy/fix-som-refactors
Support change matching options in Regex refactoring
1 parent db96f7c commit 56cddfa

File tree

3 files changed

+160
-10
lines changed

3 files changed

+160
-10
lines changed

Sources/_RegexParser/Regex/Printing/PrettyPrinter.swift

+21
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ public struct PrettyPrinter {
4343

4444
// The current default quantification behavior
4545
public var quantificationBehavior: AST.Quantification.Kind = .eager
46+
47+
// A stack of the current added inline matching options, e.g. (?s) and a
48+
// boolean indicating true = added (?s) and false = removed (?-s).
49+
public var inlineMatchingOptions: [([AST.MatchingOption], Bool)] = []
4650
}
4751

4852
// MARK: - Raw interface
@@ -142,4 +146,21 @@ extension PrettyPrinter {
142146
printIndented(f)
143147
print(endDelimiter)
144148
}
149+
150+
/// Pushes the list of matching options to the current stack of other matching
151+
/// options and increases the indentation level by 1.
152+
public mutating func pushMatchingOptions(
153+
_ options: [AST.MatchingOption],
154+
isAdded: Bool
155+
) {
156+
indentLevel += 1
157+
inlineMatchingOptions.append((options, isAdded))
158+
}
159+
160+
/// Pops the most recent list of matching options from the printer and
161+
/// decreases the indentation level by 1.
162+
public mutating func popMatchingOptions() -> ([AST.MatchingOption], Bool) {
163+
indentLevel -= 1
164+
return inlineMatchingOptions.removeLast()
165+
}
145166
}

Sources/_StringProcessing/PrintAsPattern.swift

+85-7
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,51 @@ extension PrettyPrinter {
7474
printBlock("Regex") { printer in
7575
printer.printAsPattern(convertedFromAST: node, isTopLevel: true)
7676
}
77+
78+
printInlineMatchingOptions()
79+
}
80+
81+
mutating func printInlineMatchingOptions() {
82+
while !inlineMatchingOptions.isEmpty {
83+
let (options, condition) = popMatchingOptions()
84+
85+
printIndented { printer in
86+
for option in options {
87+
switch option.kind {
88+
case .asciiOnlyDigit:
89+
printer.print(".asciiOnlyDigits(\(condition))")
90+
91+
case .asciiOnlyPOSIXProps:
92+
printer.print(".asciiOnlyCharacterClasses(\(condition))")
93+
94+
case .asciiOnlySpace:
95+
printer.print(".asciiOnlyWhitespace(\(condition))")
96+
97+
case .asciiOnlyWord:
98+
printer.print(".asciiOnlyWordCharacters(\(condition))")
99+
100+
case .caseInsensitive:
101+
printer.print(".ignoresCase(\(condition))")
102+
103+
case .multiline:
104+
printer.print(".anchorsMatchLineEndings(\(condition))")
105+
106+
case .reluctantByDefault:
107+
// This is handled by altering every OneOrMore, etc by changing each
108+
// individual repetition behavior instead of creating a nested regex.
109+
continue
110+
111+
case .singleLine:
112+
printer.print(".dotMatchesNewlines(\(condition))")
113+
114+
default:
115+
break
116+
}
117+
}
118+
}
119+
120+
print("}")
121+
}
77122
}
78123

79124
// FIXME: Use of back-offs like height and depth
@@ -424,7 +469,7 @@ extension PrettyPrinter {
424469
// Also in the same vein, if we have a few atom members but no
425470
// nonAtomMembers, then we can emit a single .anyOf(...) for them.
426471
if !charMembers.isEmpty, nonCharMembers.isEmpty {
427-
let anyOf = ".anyOf(\(charMembers))"
472+
let anyOf = "CharacterClass.anyOf(\(charMembers))"
428473

429474
indent()
430475

@@ -502,15 +547,15 @@ extension PrettyPrinter {
502547
if wrap {
503548
output("One(.anyOf(\(String(c)._quoted)))")
504549
} else {
505-
output(".anyOf(\(String(c)._quoted))")
550+
output("CharacterClass.anyOf(\(String(c)._quoted))")
506551
}
507552

508553
case let .scalar(s):
509554

510555
if wrap {
511556
output("One(.anyOf(\(s._dslBase._bareQuoted)))")
512557
} else {
513-
output(".anyOf(\(s._dslBase._bareQuoted))")
558+
output("CharacterClass.anyOf(\(s._dslBase._bareQuoted))")
514559
}
515560

516561
case let .unconverted(a):
@@ -538,7 +583,7 @@ extension PrettyPrinter {
538583
if wrap {
539584
output("One(.anyOf(\(s._quoted)))")
540585
} else {
541-
output(".anyOf(\(s._quoted))")
586+
output("CharacterClass.anyOf(\(s._quoted))")
542587
}
543588

544589
case .trivia(_):
@@ -1281,14 +1326,47 @@ extension DSLTree.Atom {
12811326
return ("/* TODO: symbolic references */", false)
12821327

12831328
case .changeMatchingOptions(let matchingOptions):
1284-
for add in matchingOptions.ast.adding {
1285-
switch add.kind {
1329+
let options: [AST.MatchingOption]
1330+
let isAdd: Bool
1331+
1332+
if matchingOptions.ast.removing.isEmpty {
1333+
options = matchingOptions.ast.adding
1334+
isAdd = true
1335+
} else {
1336+
options = matchingOptions.ast.removing
1337+
isAdd = false
1338+
}
1339+
1340+
for option in options {
1341+
switch option.kind {
1342+
case .extended:
1343+
// We don't currently support (?x) in the DSL, so if we see it, just
1344+
// do nothing.
1345+
if options.count == 1 {
1346+
return nil
1347+
}
1348+
12861349
case .reluctantByDefault:
1287-
printer.quantificationBehavior = .reluctant
1350+
if isAdd {
1351+
printer.quantificationBehavior = .reluctant
1352+
} else {
1353+
printer.quantificationBehavior = .eager
1354+
}
1355+
1356+
1357+
// Don't create a nested Regex for (?U), we handle this by altering
1358+
// every individual repetitionBehavior for things like OneOrMore.
1359+
if options.count == 1 {
1360+
return nil
1361+
}
1362+
12881363
default:
12891364
break
12901365
}
12911366
}
1367+
1368+
printer.print("Regex {")
1369+
printer.pushMatchingOptions(options, isAdded: isAdd)
12921370
}
12931371

12941372
return nil

Tests/RegexTests/RenderDSLTests.swift

+54-3
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ extension RenderDSLTests {
230230
"""#)
231231
try testConversion(#"[abc\u{301}]"#, #"""
232232
Regex {
233-
One(.anyOf("abc\u{301}"))
233+
One(CharacterClass.anyOf("abc\u{301}"))
234234
}
235235
"""#)
236236

@@ -248,7 +248,7 @@ extension RenderDSLTests {
248248

249249
try testConversion(#"(?x) [ a b c \u{301} ] "#, #"""
250250
Regex {
251-
One(.anyOf("abc\u{301}"))
251+
One(CharacterClass.anyOf("abc\u{301}"))
252252
}
253253
"""#)
254254

@@ -306,7 +306,7 @@ extension RenderDSLTests {
306306
func testCharacterClass() throws {
307307
try testConversion(#"[abc]+"#, #"""
308308
Regex {
309-
OneOrMore(.anyOf("abc"))
309+
OneOrMore(CharacterClass.anyOf("abc"))
310310
}
311311
"""#)
312312

@@ -337,5 +337,56 @@ extension RenderDSLTests {
337337
}
338338
}
339339
"""#)
340+
341+
try testConversion(#"[^i]*"#, #"""
342+
Regex {
343+
ZeroOrMore(CharacterClass.anyOf("i").inverted)
344+
}
345+
"""#)
346+
}
347+
348+
func testChangeMatchingOptions() throws {
349+
try testConversion(#"(?s).*(?-s).*"#, #"""
350+
Regex {
351+
Regex {
352+
ZeroOrMore {
353+
/./
354+
}
355+
Regex {
356+
ZeroOrMore {
357+
/./
358+
}
359+
}
360+
.dotMatchesNewlines(false)
361+
}
362+
.dotMatchesNewlines(true)
363+
}
364+
"""#)
365+
366+
try testConversion(#"(?U)a+(?-U)a+"#, #"""
367+
Regex {
368+
OneOrMore(.reluctant) {
369+
"a"
370+
}
371+
OneOrMore {
372+
"a"
373+
}
374+
}
375+
"""#)
376+
377+
try testConversion(#"(?sim)hello(?-s)world"#, #"""
378+
Regex {
379+
Regex {
380+
"hello"
381+
Regex {
382+
"world"
383+
}
384+
.dotMatchesNewlines(false)
385+
}
386+
.dotMatchesNewlines(true)
387+
.ignoresCase(true)
388+
.anchorsMatchLineEndings(true)
389+
}
390+
"""#)
340391
}
341392
}

0 commit comments

Comments
 (0)