Skip to content

Commit 8d59eb0

Browse files
committed
[Parse] Ban trailing space for /.../ regex literals
Ban space and tab as the last character of a `/.../` regex literal, unless escaped with a backslash. This matches the banning of space and tab as the first character, and helps avoid breaking source in even more cases.
1 parent 91e27ad commit 8d59eb0

8 files changed

+231
-118
lines changed

include/swift/AST/DiagnosticsParse.def

+3
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,9 @@ ERROR(lex_invalid_closing_delimiter,none,
143143
ERROR(lex_regex_literal_invalid_starting_char,none,
144144
"regex literal may not start with %0; add backslash to escape",
145145
(StringRef))
146+
ERROR(lex_regex_literal_invalid_ending_char,none,
147+
"regex literal may not end with %0; use extended literal instead",
148+
(StringRef))
146149
ERROR(lex_regex_literal_unterminated,none,
147150
"unterminated regex literal", ())
148151

lib/Parse/Lexer.cpp

+83-69
Original file line numberDiff line numberDiff line change
@@ -2048,6 +2048,14 @@ const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
20482048

20492049
bool IsForwardSlash = (*TokStart == '/');
20502050

2051+
auto spaceOrTabDescription = [](char c) -> StringRef {
2052+
switch (c) {
2053+
case ' ': return "space";
2054+
case '\t': return "tab";
2055+
default: llvm_unreachable("Unhandled case");
2056+
}
2057+
};
2058+
20512059
// Check if we're able to lex a `/.../` regex.
20522060
if (IsForwardSlash) {
20532061
// For `/.../` regex literals, we need to ban space and tab at the start of
@@ -2063,33 +2071,17 @@ const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
20632071
// TODO: This heuristic should be sunk into the Swift library once we have a
20642072
// way of doing fix-its from there.
20652073
auto *RegexContentStart = TokStart + 1;
2066-
switch (*RegexContentStart) {
2067-
case ' ':
2068-
case '\t': {
2074+
if (*RegexContentStart == ' ' || *RegexContentStart == '\t') {
20692075
if (!MustBeRegex)
20702076
return nullptr;
20712077

20722078
if (Diags) {
20732079
// We must have a regex, so emit an error for space and tab.
2074-
StringRef DiagChar;
2075-
switch (*RegexContentStart) {
2076-
case ' ':
2077-
DiagChar = "space";
2078-
break;
2079-
case '\t':
2080-
DiagChar = "tab";
2081-
break;
2082-
default:
2083-
llvm_unreachable("Unhandled case");
2084-
}
20852080
Diags->diagnose(getSourceLoc(RegexContentStart),
2086-
diag::lex_regex_literal_invalid_starting_char, DiagChar)
2081+
diag::lex_regex_literal_invalid_starting_char,
2082+
spaceOrTabDescription(*RegexContentStart))
20872083
.fixItInsert(getSourceLoc(RegexContentStart), "\\");
20882084
}
2089-
break;
2090-
}
2091-
default:
2092-
break;
20932085
}
20942086
}
20952087

@@ -2106,60 +2098,82 @@ const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
21062098
if (Ptr == TokStart)
21072099
return nullptr;
21082100

2109-
// If we're lexing `/.../`, error if we ended on the opening of a comment.
2110-
// We prefer to lex the comment as it's more likely than not that is what
2111-
// the user is expecting.
2112-
// TODO: This should be sunk into the Swift library.
2113-
if (IsForwardSlash && Ptr[-1] == '/' && (*Ptr == '*' || *Ptr == '/')) {
2114-
if (!MustBeRegex)
2115-
return nullptr;
2101+
// Perform some additional heuristics to see if we can lex `/.../`.
2102+
// TODO: These should all be sunk into the Swift library.
2103+
if (IsForwardSlash) {
2104+
// If we're lexing `/.../`, error if we ended on the opening of a comment.
2105+
// We prefer to lex the comment as it's more likely than not that is what
2106+
// the user is expecting.
2107+
if (Ptr[-1] == '/' && (*Ptr == '*' || *Ptr == '/')) {
2108+
if (!MustBeRegex)
2109+
return nullptr;
21162110

2117-
if (Diags) {
2118-
Diags->diagnose(getSourceLoc(TokStart),
2119-
diag::lex_regex_literal_unterminated);
2120-
}
2121-
// Move the pointer back to the '/' of the comment.
2122-
Ptr--;
2123-
}
2124-
2125-
// If we're tentatively lexing `/.../`, scan to make sure we don't have any
2126-
// unbalanced ')'s. This helps avoid ambiguity with unapplied operator
2127-
// references e.g `reduce(1, /)` and `foo(/, 0) / 2`. This would be invalid
2128-
// regex syntax anyways. This ensures users can surround their operator ref
2129-
// in parens `(/)` to fix the issue. This also applies to prefix operators
2130-
// that can be disambiguated as e.g `(/S.foo)`. Note we need to track whether
2131-
// or not we're in a custom character class `[...]`, as parens are literal
2132-
// there.
2133-
// TODO: This should be sunk into the Swift library.
2134-
if (IsForwardSlash && !MustBeRegex) {
2135-
unsigned CharClassDepth = 0;
2136-
unsigned GroupDepth = 0;
2137-
for (auto *Cursor = TokStart + 1; Cursor < Ptr - 1; Cursor++) {
2138-
switch (*Cursor) {
2139-
case '\\':
2140-
// Skip over the next character of an escape.
2141-
Cursor++;
2142-
break;
2143-
case '(':
2144-
if (CharClassDepth == 0)
2145-
GroupDepth += 1;
2146-
break;
2147-
case ')':
2148-
if (CharClassDepth != 0)
2111+
if (Diags) {
2112+
Diags->diagnose(getSourceLoc(TokStart),
2113+
diag::lex_regex_literal_unterminated);
2114+
}
2115+
// Move the pointer back to the '/' of the comment.
2116+
Ptr--;
2117+
}
2118+
auto *TokEnd = Ptr - 1;
2119+
auto *ContentEnd = TokEnd - 1;
2120+
2121+
// We also ban unescaped space and tab at the end of a `/.../` literal.
2122+
if (*TokEnd == '/' && (TokEnd - TokStart > 2) && ContentEnd[-1] != '\\' &&
2123+
(*ContentEnd == ' ' || *ContentEnd == '\t')) {
2124+
if (!MustBeRegex)
2125+
return nullptr;
2126+
2127+
if (Diags) {
2128+
// Diagnose and suggest using a `#/.../#` literal instead. We could
2129+
// suggest escaping, but that would be wrong if the user has written (?x).
2130+
// TODO: Should we suggest this for space-as-first character too?
2131+
Diags->diagnose(getSourceLoc(ContentEnd),
2132+
diag::lex_regex_literal_invalid_ending_char,
2133+
spaceOrTabDescription(*ContentEnd))
2134+
.fixItInsert(getSourceLoc(TokStart), "#")
2135+
.fixItInsert(getSourceLoc(Ptr), "#");
2136+
}
2137+
}
2138+
2139+
// If we're tentatively lexing `/.../`, scan to make sure we don't have any
2140+
// unbalanced ')'s. This helps avoid ambiguity with unapplied operator
2141+
// references e.g `reduce(1, /)` and `foo(/, 0) / 2`. This would be invalid
2142+
// regex syntax anyways. This ensures users can surround their operator ref
2143+
// in parens `(/)` to fix the issue. This also applies to prefix operators
2144+
// that can be disambiguated as e.g `(/S.foo)`. Note we need to track whether
2145+
// or not we're in a custom character class `[...]`, as parens are literal
2146+
// there.
2147+
if (!MustBeRegex) {
2148+
unsigned CharClassDepth = 0;
2149+
unsigned GroupDepth = 0;
2150+
for (auto *Cursor = TokStart + 1; Cursor < TokEnd; Cursor++) {
2151+
switch (*Cursor) {
2152+
case '\\':
2153+
// Skip over the next character of an escape.
2154+
Cursor++;
2155+
break;
2156+
case '(':
2157+
if (CharClassDepth == 0)
2158+
GroupDepth += 1;
21492159
break;
2160+
case ')':
2161+
if (CharClassDepth != 0)
2162+
break;
21502163

2151-
// Invalid, so bail.
2152-
if (GroupDepth == 0)
2153-
return nullptr;
2164+
// Invalid, so bail.
2165+
if (GroupDepth == 0)
2166+
return nullptr;
21542167

2155-
GroupDepth -= 1;
2156-
break;
2157-
case '[':
2158-
CharClassDepth += 1;
2159-
break;
2160-
case ']':
2161-
if (CharClassDepth != 0)
2162-
CharClassDepth -= 1;
2168+
GroupDepth -= 1;
2169+
break;
2170+
case '[':
2171+
CharClassDepth += 1;
2172+
break;
2173+
case ']':
2174+
if (CharClassDepth != 0)
2175+
CharClassDepth -= 1;
2176+
}
21632177
}
21642178
}
21652179
}

test/StringProcessing/Parse/forward-slash-regex-skipping-allowed.swift

+1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ func f() {
4343
(/E.e).foo(/0)
4444

4545
func foo<T, U>(_ x: T, _ y: U) {}
46+
foo(/E.e, /E.e)
4647
foo((/E.e), /E.e)
4748
foo((/)(E.e), /E.e)
4849

test/StringProcessing/Parse/forward-slash-regex-skipping-invalid.swift

+22
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,28 @@ func m() {
5858
// Unbalanced `}`, make sure we don't consider the string literal `{`.
5959
func n() { / "{"}/ } // expected-error {{regex literal may not start with space; add backslash to escape}}
6060
61+
func o() {
62+
_ = {
63+
0
64+
/x}}} /
65+
2
66+
} // expected-error {{extraneous '}' at top level}}
67+
// expected-error@-3 {{extraneous '}' at top level}}
68+
// expected-error@-4 {{consecutive statements on a line must be separated by ';'}}
69+
// expected-error@-5 {{unterminated regex literal}}
70+
// expected-warning@-6 {{regular expression literal is unused}}
71+
// expected-warning@-6 {{integer literal is unused}}
72+
} // expected-error {{extraneous '}' at top level}}
73+
74+
func p() {
75+
_ = 2
76+
/x} /
77+
.bitWidth
78+
// expected-error@-2 {{consecutive statements on a line must be separated by ';'}}
79+
// expected-error@-3 {{unterminated regex literal}}
80+
// expected-error@-3 {{value of type 'Regex<Substring>' has no member 'bitWidth'}}
81+
} // expected-error {{extraneous '}' at top level}}
82+
6183
func err1() { _ = / 0xG}/ }
6284
// expected-error@-1 {{regex literal may not start with space; add backslash to escape}}
6385
func err2() { _ = / 0oG}/ }

test/StringProcessing/Parse/forward-slash-regex-skipping.swift

+2-7
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ func i() {
4949
func j() {
5050
_ = {
5151
0
52-
/x}}} /
52+
/x}}}/
5353
2
5454
}
5555
}
@@ -69,7 +69,7 @@ func m() {
6969
}
7070
func n() {
7171
_ = 2
72-
/x} /
72+
/x}/
7373
.bitWidth
7474
}
7575
func o() {
@@ -105,11 +105,6 @@ enum E {
105105
func foo<T>(_ x: T) {}
106106
}
107107

108-
func a6() {
109-
func foo<T, U>(_ x: T, _ y: U) {}
110-
foo(/E.e, /E.e) // expected-error {{expected ',' separator}}
111-
}
112-
113108
func a7() { _ = /\/}/ }
114109

115110
// Make sure we don't emit errors for these.

0 commit comments

Comments
 (0)