diff --git a/Sources/_MatchingEngine/Regex/AST/ASTProtocols.swift b/Sources/_MatchingEngine/Regex/AST/ASTProtocols.swift index 6db35dc8c..b6c17f399 100644 --- a/Sources/_MatchingEngine/Regex/AST/ASTProtocols.swift +++ b/Sources/_MatchingEngine/Regex/AST/ASTProtocols.swift @@ -51,7 +51,13 @@ extension _ASTPrintable { public var debugDescription: String { _dump() } var _children: [AST]? { - (self as? _ASTParent)?.children + if let children = (self as? _ASTParent)?.children { + return children + } + if let children = (self as? AST)?.children { + return children + } + return nil } func _print() -> String { @@ -62,8 +68,13 @@ extension _ASTPrintable { guard let children = _children else { return _dumpBase } - let sub = children.lazy.map { - $0._dump() + let sub = children.lazy.compactMap { + // Exclude trivia for now, as we don't want it to appear when performing + // comparisons of dumped output in tests. + // TODO: We should eventually have some way of filtering out trivia for + // tests, so that it can appear in regular dumps. + if $0.isTrivia { return nil } + return $0._dump() }.joined(separator: ",") return "\(_dumpBase)(\(sub))" } diff --git a/Sources/_MatchingEngine/Regex/AST/CustomCharClass.swift b/Sources/_MatchingEngine/Regex/AST/CustomCharClass.swift index b935d0a31..8c8b356f0 100644 --- a/Sources/_MatchingEngine/Regex/AST/CustomCharClass.swift +++ b/Sources/_MatchingEngine/Regex/AST/CustomCharClass.swift @@ -53,8 +53,19 @@ extension CustomCC { extension CustomCC: _ASTNode { public var _dumpBase: String { - // FIXME: print out members... - "customCharacterClass" + "customCharacterClass(\(members))" } } +extension CustomCC.Member: _ASTPrintable { + public var _dumpBase: String { + switch self { + case .custom(let cc): return "\(cc)" + case .atom(let a): return "\(a)" + case .range(let lhs, let rhs): + return "range \(lhs) to \(rhs)" + case .setOperation(let lhs, let op, let rhs): + return "op \(lhs) \(op.value) \(rhs)" + } + } +} diff --git a/Sources/_MatchingEngine/Regex/AST/Group.swift b/Sources/_MatchingEngine/Regex/AST/Group.swift index bb0aa8194..c8713ca49 100644 --- a/Sources/_MatchingEngine/Regex/AST/Group.swift +++ b/Sources/_MatchingEngine/Regex/AST/Group.swift @@ -71,7 +71,7 @@ extension AST.Group.Kind: _ASTPrintable { public var _dumpBase: String { switch self { case .capture: return "capture" - case .namedCapture(let s): return "capture<\(s)>" + case .namedCapture(let s): return "capture<\(s.value)>" case .nonCapture: return "nonCapture" case .nonCaptureReset: return "nonCaptureReset" case .atomicNonCapturing: return "atomicNonCapturing" diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift index c44225249..e918c5aec 100644 --- a/Tests/RegexTests/ParseTests.swift +++ b/Tests/RegexTests/ParseTests.swift @@ -57,6 +57,19 @@ func parseWithDelimitersTest(_ input: String, _ expecting: AST) { } } +/// Make sure the AST for two regex strings get compared differently. +func parseNotEqualTest(_ lhs: String, _ rhs: String, + syntax: SyntaxOptions = .traditional) { + let lhsAST = try! parse(lhs, syntax) + let rhsAST = try! parse(rhs, syntax) + if lhsAST == rhsAST || lhsAST._dump() == rhsAST._dump() { + XCTFail(""" + AST: \(lhsAST._dump()) + Should not be equal to: \(rhsAST._dump()) + """) + } +} + extension RegexTests { func testParse() { parseTest( @@ -194,6 +207,16 @@ extension RegexTests { parseTest("[[:word:]]", charClass(posixProp_m(.posix(.word)))) parseTest("[[:xdigit:]]", charClass(posixProp_m(.posix(.xdigit)))) + parseTest("[[:ascii:]]", charClass(posixProp_m(.ascii))) + parseTest("[[:cntrl:]]", charClass(posixProp_m(.generalCategory(.control)))) + parseTest("[[:digit:]]", charClass(posixProp_m(.generalCategory(.decimalNumber)))) + parseTest("[[:lower:]]", charClass(posixProp_m(.binary(.lowercase)))) + parseTest("[[:punct:]]", charClass(posixProp_m(.generalCategory(.punctuation)))) + parseTest("[[:space:]]", charClass(posixProp_m(.binary(.whitespace)))) + parseTest("[[:upper:]]", charClass(posixProp_m(.binary(.uppercase)))) + + parseTest("[[:UPPER:]]", charClass(posixProp_m(.binary(.uppercase)))) + parseTest("[[:isALNUM:]]", charClass(posixProp_m(.posix(.alnum)))) parseTest("[[:AL_NUM:]]", charClass(posixProp_m(.posix(.alnum)))) parseTest("[[:script=Greek:]]", charClass(posixProp_m(.script(.greek)))) @@ -436,6 +459,24 @@ extension RegexTests { parseWithDelimitersTest("'/a b/'", concat("a", " ", "b")) parseWithDelimitersTest("'|a b|'", concat("a", "b")) + // Make sure dumping output correctly reflects differences in AST. + parseNotEqualTest(#"abc"#, #"abd"#) + + parseNotEqualTest(#"[abc[:space:]\d]+"#, + #"[abc[:upper:]\d]+"#) + + parseNotEqualTest(#"[abc[:space:]\d]+"#, + #"[ac[:space:]\d]+"#) + + parseNotEqualTest(#"[abc[:space:]\d]+"#, + #"[acc[:space:]\s]+"#) + + parseNotEqualTest(#"[abc[:space:]\d]+"#, + #"[acc[:space:]\d]*"#) + + parseNotEqualTest(#"([a-c&&e]*)+"#, + #"([a-d&&e]*)+"#) + // TODO: failure tests } diff --git a/Tests/RegexTests/SyntaxOptionsTests.swift b/Tests/RegexTests/SyntaxOptionsTests.swift index f771e84e6..34c8c240a 100644 --- a/Tests/RegexTests/SyntaxOptionsTests.swift +++ b/Tests/RegexTests/SyntaxOptionsTests.swift @@ -7,6 +7,8 @@ private let dplus = oneOrMore( .greedy, atom(.escaped(.decimalDigit))) private let dotAST = concat( dplus, ".", dplus, ".", dplus, ".", dplus) +private let dotASTQuoted = concat( + dplus, quote("."), dplus, quote("."), dplus, quote("."), dplus) extension RegexTests { @@ -42,7 +44,7 @@ extension RegexTests { dotAST, syntax: .modern) parseTest( #" \d+ "." \d+ "." \d+ "." \d+ "#, - dotAST, syntax: .modern) + dotASTQuoted, syntax: .modern) } func testModernRanges() {