diff --git a/Sources/_RegexParser/Regex/AST/Atom.swift b/Sources/_RegexParser/Regex/AST/Atom.swift index 9cc2e9a96..1f6043d72 100644 --- a/Sources/_RegexParser/Regex/AST/Atom.swift +++ b/Sources/_RegexParser/Regex/AST/Atom.swift @@ -401,9 +401,6 @@ extension AST.Atom.CharacterProperty { /// Some special properties implemented by PCRE and Oniguruma. case pcreSpecial(PCRESpecialCategory) case onigurumaSpecial(OnigurumaSpecialProperty) - - /// Unhandled properties. - case other(key: String?, value: String) } // TODO: erm, separate out or fold into something? splat it in? diff --git a/Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift b/Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift index e5b65a46c..911312121 100644 --- a/Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift +++ b/Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift @@ -397,8 +397,9 @@ extension Source { return .pcreSpecial(pcreSpecial) } - // Otherwise we don't know what this is. - return .other(key: nil, value: value) + // TODO: This should be versioned, and do we want a more lax behavior for + // the runtime? + throw ParseError.unknownProperty(key: nil, value: value) } static func classifyCharacterProperty( @@ -435,6 +436,8 @@ extension Source { if let match = match { return match } - return .other(key: key, value: value) + // TODO: This should be versioned, and do we want a more lax behavior for + // the runtime? + throw ParseError.unknownProperty(key: key, value: value) } } diff --git a/Sources/_RegexParser/Regex/Parse/Diagnostics.swift b/Sources/_RegexParser/Regex/Parse/Diagnostics.swift index 621d6ea11..c3d74c30b 100644 --- a/Sources/_RegexParser/Regex/Parse/Diagnostics.swift +++ b/Sources/_RegexParser/Regex/Parse/Diagnostics.swift @@ -57,8 +57,8 @@ enum ParseError: Error, Hashable { case expectedCustomCharacterClassMembers case invalidCharacterClassRangeOperand - case invalidPOSIXSetName(String) case emptyProperty + case unknownProperty(key: String?, value: String) case expectedGroupSpecifier case unbalancedEndOfGroup @@ -142,10 +142,13 @@ extension ParseError: CustomStringConvertible { return "expected custom character class members" case .invalidCharacterClassRangeOperand: return "invalid character class range" - case let .invalidPOSIXSetName(n): - return "invalid character set name: '\(n)'" case .emptyProperty: return "empty property" + case .unknownProperty(let key, let value): + if let key = key { + return "unknown character property '\(key)=\(value)'" + } + return "unknown character property '\(value)'" case .expectedGroupSpecifier: return "expected group specifier" case .unbalancedEndOfGroup: diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift index b49804ca1..3c84195aa 100644 --- a/Sources/_StringProcessing/ConsumerInterface.swift +++ b/Sources/_StringProcessing/ConsumerInterface.swift @@ -423,10 +423,6 @@ extension AST.Atom.CharacterProperty { case .onigurumaSpecial(let s): throw Unsupported("TODO: map Oniguruma special: \(s)") - - case let .other(key, value): - throw Unsupported( - "TODO: map other \(key ?? "")=\(value)") } }() diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift index bdae250ba..4043e4ccb 100644 --- a/Tests/RegexTests/ParseTests.swift +++ b/Tests/RegexTests/ParseTests.swift @@ -501,7 +501,6 @@ extension RegexTests { parseTest(#"[[:a]]"#, charClass(charClass(":", "a"))) parseTest(#"[[:}]]"#, charClass(charClass(":", "}"))) parseTest(#"[[:{]]"#, charClass(charClass(":", "{"))) - parseTest(#"[[:{:]]"#, charClass(posixProp_m(.other(key: nil, value: "{")))) parseTest(#"[[:}:]]"#, charClass(charClass(":", "}", ":"))) parseTest( @@ -1141,14 +1140,6 @@ extension RegexTests { #"\p{C}+"#, oneOrMore(of: prop(.generalCategory(.other)))) - // TODO: Start erroring on these? - parseTest(#"\p{Lx}"#, prop(.other(key: nil, value: "Lx"))) - parseTest(#"\p{gcL}"#, prop(.other(key: nil, value: "gcL"))) - parseTest(#"\p{x=y}"#, prop(.other(key: "x", value: "y"))) - parseTest(#"\p{aaa(b)}"#, prop(.other(key: nil, value: "aaa(b)"))) - parseTest("[[:a():]]", charClass(posixProp_m(.other(key: nil, value: "a()")))) - parseTest(#"\p{aaa\p{b}}"#, concat(prop(.other(key: nil, value: #"aaa\p{b"#)), "}")) - // UAX44-LM3 means all of the below are equivalent. let lowercaseLetter = prop(.generalCategory(.lowercaseLetter)) parseTest(#"\p{ll}"#, lowercaseLetter) @@ -2231,12 +2222,12 @@ extension RegexTests { diagnosticTest(#"\x{5"#, .expected("}")) diagnosticTest(#"\N{A"#, .expected("}")) diagnosticTest(#"\N{U+A"#, .expected("}")) - diagnosticTest(#"\p{a"#, .expected("}")) + diagnosticTest(#"\p{a"#, .unknownProperty(key: nil, value: "a")) diagnosticTest(#"\p{a="#, .emptyProperty) diagnosticTest(#"\p{a=}"#, .emptyProperty) - diagnosticTest(#"\p{a=b"#, .expected("}")) - diagnosticTest(#"\p{aaa[b]}"#, .expected("}")) - diagnosticTest(#"\p{a=b=c}"#, .expected("}")) + diagnosticTest(#"\p{a=b"#, .unknownProperty(key: "a", value: "b")) + diagnosticTest(#"\p{aaa[b]}"#, .unknownProperty(key: nil, value: "aaa")) + diagnosticTest(#"\p{a=b=c}"#, .unknownProperty(key: "a", value: "b")) diagnosticTest(#"(?#"#, .expected(")")) diagnosticTest(#"(?x"#, .expected(")")) @@ -2321,6 +2312,16 @@ extension RegexTests { diagnosticTest(#"\\#u{E9}"#, .invalidEscape("é")) diagnosticTest(#"\˂"#, .invalidEscape("˂")) + // MARK: Character properties + + diagnosticTest(#"\p{Lx}"#, .unknownProperty(key: nil, value: "Lx")) + diagnosticTest(#"\p{gcL}"#, .unknownProperty(key: nil, value: "gcL")) + diagnosticTest(#"\p{x=y}"#, .unknownProperty(key: "x", value: "y")) + diagnosticTest(#"\p{aaa(b)}"#, .unknownProperty(key: nil, value: "aaa(b)")) + diagnosticTest("[[:a():]]", .unknownProperty(key: nil, value: "a()")) + diagnosticTest(#"\p{aaa\p{b}}"#, .unknownProperty(key: nil, value: #"aaa\p{b"#)) + diagnosticTest(#"[[:{:]]"#, .unknownProperty(key: nil, value: "{")) + // MARK: Matching options diagnosticTest("(?-y{g})", .cannotRemoveTextSegmentOptions)