swiftlang · hamishknight · Apr 19, 2022 · Apr 19, 2022
diff --git a/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift b/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift
@@ -1176,6 +1176,14 @@ extension Source {
         // character property name anyway, and it's nice not to have diverging
         // logic for these cases.
         return true
+      case "\\":
+        // An escape sequence, which may include e.g '\Q :] \E'. ICU bails here
+        // for all its known escape sequences (e.g '\a', '\e' '\f', ...). It
+        // seems character class escapes e.g '\d' are excluded, however it's not
+        // clear that is intentional. Let's apply the rule for any escape, as a
+        // backslash would never be a valid character property name, and we can
+        // diagnose any invalid escapes when parsing as a character class.
+        return true
       default:
         // We may want to handle other metacharacters here, e.g '{', '(', ')',
         // as they're not valid character property names. However for now

diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift
@@ -503,6 +503,25 @@ extension RegexTests {
     parseTest(#"[[:{]]"#, charClass(charClass(":", "{")))
     parseTest(#"[[:}:]]"#, charClass(charClass(":", "}", ":")))
 
+    parseTest(
+      #"[:[:space:]:]"#,
+      charClass(":", posixProp_m(.binary(.whitespace)), ":")
+    )
+    parseTest(
+      #"[:a[:space:]b:]"#,
+      charClass(":", "a", posixProp_m(.binary(.whitespace)), "b", ":")
+    )
+
+    // ICU parses a custom character class if it sees any of its known escape
+    // sequences in a POSIX character property (though it appears to exclude
+    // character class escapes e.g '\d'). We do so for any escape sequence as
+    // '\' is not a valid character property character.
+    parseTest(#"[:\Q:]\E]"#, charClass(":", quote_m(":]")))
+    parseTest(#"[:\a:]"#, charClass(":", atom_m(.escaped(.alarm)), ":"))
+    parseTest(#"[:\d:]"#, charClass(":", atom_m(.escaped(.decimalDigit)), ":"))
+    parseTest(#"[:\\:]"#, charClass(":", "\\", ":"))
+    parseTest(#"[:\:]"#, charClass(":", ":"))
+
     parseTest(
       #"\D\S\W"#,
       concat(
@@ -2319,7 +2338,7 @@ extension RegexTests {
     diagnosticTest(#"\p{x=y}"#, .unknownProperty(key: "x", value: "y"))
     diagnosticTest(#"\p{aaa(b)}"#, .unknownProperty(key: nil, value: "aaa(b)"))
     diagnosticTest("[[:a():]]", .unknownProperty(key: nil, value: "a()"))
-    diagnosticTest(#"\p{aaa\p{b}}"#, .unknownProperty(key: nil, value: #"aaa\p{b"#))
+    diagnosticTest(#"\p{aaa\p{b}}"#, .unknownProperty(key: nil, value: "aaa"))
     diagnosticTest(#"[[:{:]]"#, .unknownProperty(key: nil, value: "{"))
 
     // MARK: Matching options