Skip to content

Commit 4ee6a84

Browse files
authored
Merge pull request #58835 from hamishknight/allow-prefix-slash
2 parents 49148e6 + 350a01a commit 4ee6a84

File tree

9 files changed

+225
-103
lines changed

9 files changed

+225
-103
lines changed

include/swift/AST/DiagnosticsParse.def

-3
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,6 @@ ERROR(forbidden_extended_escaping_string,none,
9494
ERROR(regex_literal_parsing_error,none,
9595
"%0", (StringRef))
9696

97-
ERROR(prefix_slash_not_allowed,none,
98-
"prefix operator may not contain '/'", ())
99-
10097
//------------------------------------------------------------------------------
10198
// MARK: Lexer diagnostics
10299
//------------------------------------------------------------------------------

include/swift/Parse/Parser.h

+1-4
Original file line numberDiff line numberDiff line change
@@ -1763,10 +1763,7 @@ class Parser {
17631763
/// Try re-lex a '/' operator character as a regex literal. This should be
17641764
/// called when parsing in an expression position to ensure a regex literal is
17651765
/// correctly parsed.
1766-
///
1767-
/// If \p mustBeRegex is set to true, a regex literal will always be lexed if
1768-
/// enabled. Otherwise, it will not be lexed if it may be ambiguous.
1769-
void tryLexRegexLiteral(bool mustBeRegex);
1766+
void tryLexRegexLiteral(bool forUnappliedOperator);
17701767

17711768
void validateCollectionElement(ParserResult<Expr> element);
17721769

lib/Parse/Lexer.cpp

+43-13
Original file line numberDiff line numberDiff line change
@@ -2000,23 +2000,11 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20002000
// 2
20012001
// }
20022002
//
2003-
// This takes advantage of the consistent operator spacing rule. We also
2004-
// need to ban ')' to avoid ambiguity with unapplied operator references e.g
2005-
// `reduce(1, /)`. This would be invalid regex syntax anyways. Note this
2006-
// doesn't totally save us from e.g `foo(/, 0)`, but it should at least
2007-
// help, and it ensures users can always surround their operator ref in
2008-
// parens `(/)` to fix the issue.
2003+
// This takes advantage of the consistent operator spacing rule.
20092004
// TODO: This heuristic should be sunk into the Swift library once we have a
20102005
// way of doing fix-its from there.
20112006
auto *RegexContentStart = TokStart + 1;
20122007
switch (*RegexContentStart) {
2013-
case ')': {
2014-
if (!MustBeRegex)
2015-
return false;
2016-
2017-
// ')' is invalid anyway, so we can let the parser diagnose it.
2018-
break;
2019-
}
20202008
case ' ':
20212009
case '\t': {
20222010
if (!MustBeRegex)
@@ -2072,6 +2060,48 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20722060
Ptr--;
20732061
}
20742062

2063+
// If we're tentatively lexing `/.../`, scan to make sure we don't have any
2064+
// unbalanced ')'s. This helps avoid ambiguity with unapplied operator
2065+
// references e.g `reduce(1, /)` and `foo(/, 0) / 2`. This would be invalid
2066+
// regex syntax anyways. This ensures users can surround their operator ref
2067+
// in parens `(/)` to fix the issue. This also applies to prefix operators
2068+
// that can be disambiguated as e.g `(/S.foo)`. Note we need to track whether
2069+
// or not we're in a custom character class `[...]`, as parens are literal
2070+
// there.
2071+
// TODO: This should be sunk into the Swift library.
2072+
if (IsForwardSlash && !MustBeRegex) {
2073+
unsigned CharClassDepth = 0;
2074+
unsigned GroupDepth = 0;
2075+
for (auto *Cursor = TokStart + 1; Cursor < Ptr - 1; Cursor++) {
2076+
switch (*Cursor) {
2077+
case '\\':
2078+
// Skip over the next character of an escape.
2079+
Cursor++;
2080+
break;
2081+
case '(':
2082+
if (CharClassDepth == 0)
2083+
GroupDepth += 1;
2084+
break;
2085+
case ')':
2086+
if (CharClassDepth != 0)
2087+
break;
2088+
2089+
// Invalid, so bail.
2090+
if (GroupDepth == 0)
2091+
return false;
2092+
2093+
GroupDepth -= 1;
2094+
break;
2095+
case '[':
2096+
CharClassDepth += 1;
2097+
break;
2098+
case ']':
2099+
if (CharClassDepth != 0)
2100+
CharClassDepth -= 1;
2101+
}
2102+
}
2103+
}
2104+
20752105
// Update to point to where we ended regex lexing.
20762106
assert(Ptr > TokStart && Ptr <= BufferEnd);
20772107
CurPtr = Ptr;

lib/Parse/ParseDecl.cpp

-7
Original file line numberDiff line numberDiff line change
@@ -8541,13 +8541,6 @@ Parser::parseDeclOperator(ParseDeclOptions Flags, DeclAttributes &Attributes) {
85418541
Tok.getRawText().front() == '!'))
85428542
diagnose(Tok, diag::postfix_operator_name_cannot_start_with_unwrap);
85438543

8544-
// Prefix operators may not contain the `/` character when `/.../` regex
8545-
// literals are enabled.
8546-
if (Context.LangOpts.EnableBareSlashRegexLiterals) {
8547-
if (Attributes.hasAttribute<PrefixAttr>() && Tok.getText().contains("/"))
8548-
diagnose(Tok, diag::prefix_slash_not_allowed);
8549-
}
8550-
85518544
// A common error is to try to define an operator with something in the
85528545
// unicode plane considered to be an operator, or to try to define an
85538546
// operator like "not". Analyze and diagnose this specifically.

lib/Parse/ParseExpr.cpp

+60-46
Original file line numberDiff line numberDiff line change
@@ -513,7 +513,7 @@ ParserResult<Expr> Parser::parseExprUnary(Diag<> Message, bool isExprBasic) {
513513
UnresolvedDeclRefExpr *Operator;
514514

515515
// First check to see if we have the start of a regex literal `/.../`.
516-
tryLexRegexLiteral(/*mustBeRegex*/ true);
516+
tryLexRegexLiteral(/*forUnappliedOperator*/ false);
517517

518518
switch (Tok.getKind()) {
519519
default:
@@ -880,56 +880,64 @@ UnresolvedDeclRefExpr *Parser::parseExprOperator() {
880880
return new (Context) UnresolvedDeclRefExpr(name, refKind, DeclNameLoc(loc));
881881
}
882882

883-
void Parser::tryLexRegexLiteral(bool mustBeRegex) {
883+
void Parser::tryLexRegexLiteral(bool forUnappliedOperator) {
884884
if (!Context.LangOpts.EnableBareSlashRegexLiterals)
885885
return;
886886

887887
// Check to see if we have a regex literal `/.../`, optionally with a prefix
888888
// operator e.g `!/.../`.
889+
bool mustBeRegex = false;
889890
switch (Tok.getKind()) {
890891
case tok::oper_prefix:
892+
// Prefix operators may contain `/` characters, so this may not be a regex,
893+
// and as such need to make sure we have a closing `/`.
894+
break;
891895
case tok::oper_binary_spaced:
892-
case tok::oper_binary_unspaced: {
893-
// Check to see if we have an operator containing '/'.
894-
auto slashIdx = Tok.getText().find("/");
895-
if (slashIdx == StringRef::npos)
896-
break;
896+
case tok::oper_binary_unspaced:
897+
// When re-lexing for a unary expression, binary operators are always
898+
// invalid, so we can be confident in always lexing a regex literal.
899+
mustBeRegex = !forUnappliedOperator;
900+
break;
901+
default:
902+
// We only re-lex regex literals for operator tokens.
903+
return;
904+
}
897905

898-
CancellableBacktrackingScope backtrack(*this);
899-
{
900-
Optional<Lexer::ForwardSlashRegexRAII> regexScope;
901-
regexScope.emplace(*L, mustBeRegex);
902-
903-
// Try re-lex as a `/.../` regex literal, this will split an operator if
904-
// necessary.
905-
L->restoreState(getParserPosition().LS, /*enableDiagnostics*/ true);
906-
907-
// If we didn't split a prefix operator, reset the regex lexing scope.
908-
// Otherwise, we want to keep it in place for the next token.
909-
auto didSplit = L->peekNextToken().getLength() == slashIdx;
910-
if (!didSplit)
911-
regexScope.reset();
912-
913-
// Discard the current token, which will be replaced by the re-lexed
914-
// token, which will either be a regex literal token, a prefix operator,
915-
// or the original unchanged token.
916-
discardToken();
917-
918-
// If we split a prefix operator from the regex literal, and are not sure
919-
// whether this should be a regex, backtrack if we didn't end up lexing a
920-
// regex literal.
921-
if (didSplit && !mustBeRegex &&
922-
!L->peekNextToken().is(tok::regex_literal)) {
923-
return;
924-
}
906+
// Check to see if we have an operator containing '/'.
907+
auto slashIdx = Tok.getText().find("/");
908+
if (slashIdx == StringRef::npos)
909+
return;
910+
911+
CancellableBacktrackingScope backtrack(*this);
912+
{
913+
Optional<Lexer::ForwardSlashRegexRAII> regexScope;
914+
regexScope.emplace(*L, mustBeRegex);
925915

926-
// Otherwise, accept the result.
927-
backtrack.cancelBacktrack();
916+
// Try re-lex as a `/.../` regex literal, this will split an operator if
917+
// necessary.
918+
L->restoreState(getParserPosition().LS, /*enableDiagnostics*/ true);
919+
920+
// If we didn't split a prefix operator, reset the regex lexing scope.
921+
// Otherwise, we want to keep it in place for the next token.
922+
auto didSplit = L->peekNextToken().getLength() == slashIdx;
923+
if (!didSplit)
924+
regexScope.reset();
925+
926+
// Discard the current token, which will be replaced by the re-lexed
927+
// token, which will either be a regex literal token, a prefix operator,
928+
// or the original unchanged token.
929+
discardToken();
930+
931+
// If we split a prefix operator from the regex literal, and are not sure
932+
// whether this should be a regex, backtrack if we didn't end up lexing a
933+
// regex literal.
934+
if (didSplit && !mustBeRegex &&
935+
!L->peekNextToken().is(tok::regex_literal)) {
936+
return;
928937
}
929-
break;
930-
}
931-
default:
932-
break;
938+
939+
// Otherwise, accept the result.
940+
backtrack.cancelBacktrack();
933941
}
934942
}
935943

@@ -3223,17 +3231,23 @@ ParserStatus Parser::parseExprList(tok leftTok, tok rightTok,
32233231
SourceLoc FieldNameLoc;
32243232
parseOptionalArgumentLabel(FieldName, FieldNameLoc);
32253233

3226-
// First check to see if we have the start of a regex literal `/.../`. We
3227-
// need to do this before handling unapplied operator references, as e.g
3228-
// `(/, /)` might be a regex literal.
3229-
tryLexRegexLiteral(/*mustBeRegex*/ false);
3230-
32313234
// See if we have an operator decl ref '(<op>)'. The operator token in
32323235
// this case lexes as a binary operator because it neither leads nor
32333236
// follows a proper subexpression.
3237+
auto isUnappliedOperator = [&]() {
3238+
return Tok.isBinaryOperator() && peekToken().isAny(rightTok, tok::comma);
3239+
};
3240+
3241+
if (isUnappliedOperator()) {
3242+
// Check to see if we have the start of a regex literal `/.../`. We need
3243+
// to do this for an unapplied operator reference, as e.g `(/, /)` might
3244+
// be a regex literal.
3245+
tryLexRegexLiteral(/*forUnappliedOperator*/ true);
3246+
}
3247+
32343248
ParserStatus Status;
32353249
Expr *SubExpr = nullptr;
3236-
if (Tok.isBinaryOperator() && peekToken().isAny(rightTok, tok::comma)) {
3250+
if (isUnappliedOperator()) {
32373251
SyntaxParsingContext operatorContext(SyntaxContext,
32383252
SyntaxKind::IdentifierExpr);
32393253
DeclNameLoc Loc;

test/StringProcessing/Frontend/enable-flag.swift

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
// REQUIRES: swift_in_compiler
66

7-
prefix operator / // expected-error {{prefix operator may not contain '/'}}
7+
prefix operator /
88

99
_ = /x/
1010
_ = #/x/#

0 commit comments

Comments
 (0)