Skip to content

Commit ba28b6a

Browse files
committed
[Parse] Split prefix operators from regex in the lexer
Teach the lexer not to consider `/` an operator character when attempting to re-lex a regex literal. This allows us to split off a prefix operator. Previously this was done after-the-fact in the parser, but that didn't cover the unapplied infix operator case, and didn't form a `tok::amp_prefix` for `foo(&/.../)`, which led to a suboptimal diagnostic. This also now means we'll split an operator for cases such as `foo(!/^/)` rather than treating it as an unapplied infix operator. rdar://92469917
1 parent 1a86c1e commit ba28b6a

File tree

5 files changed

+86
-75
lines changed

5 files changed

+86
-75
lines changed

include/swift/Parse/Lexer.h

+11-4
Original file line numberDiff line numberDiff line change
@@ -565,10 +565,17 @@ class Lexer {
565565
void operator=(const SILBodyRAII&) = delete;
566566
};
567567

568-
/// Attempt to re-lex a regex literal with forward slashes `/.../` from a
569-
/// given lexing state. If \p mustBeRegex is set to true, a regex literal will
570-
/// always be lexed. Otherwise, it will not be lexed if it may be ambiguous.
571-
void tryLexForwardSlashRegexLiteralFrom(State S, bool mustBeRegex);
568+
/// A RAII object for switching the lexer into forward slash regex `/.../`
569+
/// lexing mode.
570+
class ForwardSlashRegexRAII final {
571+
llvm::SaveAndRestore<LexerForwardSlashRegexMode> Scope;
572+
573+
public:
574+
ForwardSlashRegexRAII(Lexer &L, bool MustBeRegex)
575+
: Scope(L.ForwardSlashRegexMode,
576+
MustBeRegex ? LexerForwardSlashRegexMode::Always
577+
: LexerForwardSlashRegexMode::Tentative) {}
578+
};
572579

573580
private:
574581
/// Nul character meaning kind.

include/swift/Parse/Parser.h

-1
Original file line numberDiff line numberDiff line change
@@ -1758,7 +1758,6 @@ class Parser {
17581758
ParserResult<Expr>
17591759
parseExprPoundCodeCompletion(Optional<StmtKind> ParentKind);
17601760

1761-
UnresolvedDeclRefExpr *makeExprOperator(const Token &opToken);
17621761
UnresolvedDeclRefExpr *parseExprOperator();
17631762

17641763
/// Try re-lex a '/' operator character as a regex literal. This should be

lib/Parse/Lexer.cpp

+7-9
Original file line numberDiff line numberDiff line change
@@ -815,6 +815,13 @@ void Lexer::lexOperatorIdentifier() {
815815
rangeContainsPlaceholderEnd(CurPtr + 2, BufferEnd)) {
816816
break;
817817
}
818+
819+
// If we are lexing a `/.../` regex literal, we don't consider `/` to be an
820+
// operator character.
821+
if (ForwardSlashRegexMode != LexerForwardSlashRegexMode::None &&
822+
*CurPtr == '/') {
823+
break;
824+
}
818825
} while (advanceIfValidContinuationOfOperator(CurPtr, BufferEnd));
819826

820827
if (CurPtr-TokStart > 2) {
@@ -2080,15 +2087,6 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20802087
return true;
20812088
}
20822089

2083-
void Lexer::tryLexForwardSlashRegexLiteralFrom(State S, bool mustBeRegex) {
2084-
// Try re-lex with forward slash enabled.
2085-
llvm::SaveAndRestore<LexerForwardSlashRegexMode> RegexLexingScope(
2086-
ForwardSlashRegexMode, mustBeRegex
2087-
? LexerForwardSlashRegexMode::Always
2088-
: LexerForwardSlashRegexMode::Tentative);
2089-
restoreState(S, /*enableDiagnostics*/ true);
2090-
}
2091-
20922090
/// lexEscapedIdentifier:
20932091
/// identifier ::= '`' identifier '`'
20942092
///

lib/Parse/ParseExpr.cpp

+40-33
Original file line numberDiff line numberDiff line change
@@ -546,19 +546,6 @@ ParserResult<Expr> Parser::parseExprUnary(Diag<> Message, bool isExprBasic) {
546546
break;
547547
}
548548
case tok::oper_prefix: {
549-
// Check to see if we can split a prefix operator containing `/`, e.g `!/`,
550-
// which might be a prefix operator on a regex literal.
551-
if (Context.LangOpts.EnableBareSlashRegexLiterals) {
552-
auto slashIdx = Tok.getText().find("/");
553-
if (slashIdx != StringRef::npos) {
554-
auto prefix = Tok.getText().take_front(slashIdx);
555-
if (!prefix.empty()) {
556-
Operator = makeExprOperator({Tok.getKind(), prefix});
557-
consumeStartingCharacterOfCurrentToken(Tok.getKind(), prefix.size());
558-
break;
559-
}
560-
}
561-
}
562549
Operator = parseExprOperator();
563550
break;
564551
}
@@ -880,45 +867,65 @@ static DeclRefKind getDeclRefKindForOperator(tok kind) {
880867
}
881868
}
882869

883-
UnresolvedDeclRefExpr *Parser::makeExprOperator(const Token &Tok) {
870+
/// parseExprOperator - Parse an operator reference expression. These
871+
/// are not "proper" expressions; they can only appear in binary/unary
872+
/// operators.
873+
UnresolvedDeclRefExpr *Parser::parseExprOperator() {
884874
assert(Tok.isAnyOperator());
885875
DeclRefKind refKind = getDeclRefKindForOperator(Tok.getKind());
886876
SourceLoc loc = Tok.getLoc();
887877
DeclNameRef name(Context.getIdentifier(Tok.getText()));
878+
consumeToken();
888879
// Bypass local lookup.
889880
return new (Context) UnresolvedDeclRefExpr(name, refKind, DeclNameLoc(loc));
890881
}
891882

892-
/// parseExprOperator - Parse an operator reference expression. These
893-
/// are not "proper" expressions; they can only appear in binary/unary
894-
/// operators.
895-
UnresolvedDeclRefExpr *Parser::parseExprOperator() {
896-
auto *op = makeExprOperator(Tok);
897-
consumeToken();
898-
return op;
899-
}
900-
901883
void Parser::tryLexRegexLiteral(bool mustBeRegex) {
902884
if (!Context.LangOpts.EnableBareSlashRegexLiterals)
903885
return;
904886

905-
// Check to see if we have the start of a regex literal `/.../`.
887+
// Check to see if we have a regex literal `/.../`, optionally with a prefix
888+
// operator e.g `!/.../`.
906889
switch (Tok.getKind()) {
907890
case tok::oper_prefix:
908891
case tok::oper_binary_spaced:
909892
case tok::oper_binary_unspaced: {
910-
if (!Tok.getText().startswith("/"))
893+
// Check to see if we have an operator containing '/'.
894+
auto slashIdx = Tok.getText().find("/");
895+
if (slashIdx == StringRef::npos)
911896
break;
912897

913-
// Try re-lex as a `/.../` regex literal.
914-
auto state = getParserPosition().LS;
915-
L->tryLexForwardSlashRegexLiteralFrom(state, mustBeRegex);
916-
917-
// Discard the current token, which will be replaced by the re-lexed token,
918-
// which may or may not be a regex literal token.
919-
discardToken();
898+
CancellableBacktrackingScope backtrack(*this);
899+
{
900+
Optional<Lexer::ForwardSlashRegexRAII> regexScope;
901+
regexScope.emplace(*L, mustBeRegex);
902+
903+
// Try re-lex as a `/.../` regex literal, this will split an operator if
904+
// necessary.
905+
L->restoreState(getParserPosition().LS, /*enableDiagnostics*/ true);
906+
907+
// If we didn't split a prefix operator, reset the regex lexing scope.
908+
// Otherwise, we want to keep it in place for the next token.
909+
auto didSplit = L->peekNextToken().getLength() == slashIdx;
910+
if (!didSplit)
911+
regexScope.reset();
912+
913+
// Discard the current token, which will be replaced by the re-lexed
914+
// token, which will either be a regex literal token, a prefix operator,
915+
// or the original unchanged token.
916+
discardToken();
917+
918+
// If we split a prefix operator from the regex literal, and are not sure
919+
// whether this should be a regex, backtrack if we didn't end up lexing a
920+
// regex literal.
921+
if (didSplit && !mustBeRegex &&
922+
!L->peekNextToken().is(tok::regex_literal)) {
923+
return;
924+
}
920925

921-
assert(Tok.getText().startswith("/"));
926+
// Otherwise, accept the result.
927+
backtrack.cancelBacktrack();
928+
}
922929
break;
923930
}
924931
default:

test/StringProcessing/Parse/forward-slash-regex.swift

+28-28
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,23 @@ _ = /x/.self
3535
_ = /\//
3636
_ = /\\/
3737

38-
// These unfortunately become infix `=/`. We could likely improve the diagnostic
39-
// though.
40-
let z=/0/
41-
// expected-error@-1 {{type annotation missing in pattern}}
42-
// expected-error@-2 {{consecutive statements on a line must be separated by ';'}}
43-
// expected-error@-3 {{expected expression after unary operator}}
44-
// expected-error@-4 {{cannot find operator '=/' in scope}}
45-
// expected-error@-5 {{'/' is not a postfix unary operator}}
46-
_=/0/
47-
// expected-error@-1 {{'_' can only appear in a pattern or on the left side of an assignment}}
48-
// expected-error@-2 {{cannot find operator '=/' in scope}}
49-
// expected-error@-3 {{'/' is not a postfix unary operator}}
38+
// This is just here to appease typo correction.
39+
let y = 0
40+
41+
// These unfortunately become prefix `=` and infix `=/` respectively. We could
42+
// likely improve the diagnostic though.
43+
do {
44+
let z=/0/
45+
// expected-error@-1 {{type annotation missing in pattern}}
46+
// expected-error@-2 {{consecutive statements on a line must be separated by ';'}}
47+
// expected-error@-3 {{expected expression}}
48+
}
49+
do {
50+
_=/0/
51+
// expected-error@-1 {{'_' can only appear in a pattern or on the left side of an assignment}}
52+
// expected-error@-2 {{cannot find operator '=/' in scope}}
53+
// expected-error@-3 {{'/' is not a postfix unary operator}}
54+
}
5055

5156
_ = /x
5257
// expected-error@-1 {{unterminated regex literal}}
@@ -58,9 +63,11 @@ _ = (!/x/)
5863
// expected-error@-1 {{cannot convert value of type 'Regex<Substring>' to expected argument type 'Bool'}}
5964

6065
_ = !/ /
61-
// expected-error@-1 {{unary operator cannot be separated from its operand}}
62-
// expected-error@-2 {{cannot find operator '!/' in scope}}
63-
// expected-error@-3 {{unterminated regex literal}}
66+
// expected-error@-1 {{regex literal may not start with space; add backslash to escape}}
67+
// expected-error@-2 {{cannot convert value of type 'Regex<Substring>' to expected argument type 'Bool'}}
68+
69+
_ = !!/ /
70+
// expected-error@-1 {{regex literal may not start with space; add backslash to escape}}
6471

6572
_ = !!/x/
6673
_ = (!!/x/)
@@ -75,7 +82,6 @@ _ = /x/ + /y/ // expected-error {{binary operator '+' cannot be applied to two '
7582
_ = /x/+/y/
7683
// expected-error@-1 {{cannot find operator '+/' in scope}}
7784
// expected-error@-2 {{'/' is not a postfix unary operator}}
78-
// expected-error@-3 {{cannot find 'y' in scope}}
7985

8086
_ = /x/?.blah
8187
// expected-error@-1 {{cannot use optional chaining on non-optional value of type 'Regex<Substring>'}}
@@ -102,7 +108,6 @@ _ = /x/ ... /y/ // expected-error {{referencing operator function '...' on 'Comp
102108
_ = /x/.../y/
103109
// expected-error@-1 {{missing whitespace between '...' and '/' operators}}
104110
// expected-error@-2 {{'/' is not a postfix unary operator}}
105-
// expected-error@-3 {{cannot find 'y' in scope}}
106111

107112
_ = /x /...
108113
// expected-error@-1 {{unary operator '...' cannot be applied to an operand of type 'Regex<Substring>'}}
@@ -120,12 +125,7 @@ func foo<T>(_ x: T, y: T) {}
120125
foo(/abc/, y: /abc /)
121126

122127
func bar<T>(_ x: inout T) {}
123-
124-
// TODO: We split this into a prefix '&', but inout is handled specially when
125-
// parsing an argument list. This shouldn't matter anyway, but we should at
126-
// least have a custom diagnostic.
127-
bar(&/x/)
128-
// expected-error@-1 {{'&' is not a prefix unary operator}}
128+
bar(&/x/) // expected-error {{cannot pass immutable value as inout argument: literals are not mutable}}
129129

130130
struct S {
131131
subscript(x: Regex<Substring>) -> Void { () }
@@ -259,7 +259,7 @@ _ = /x/*comment*/
259259
// expected-error@-1 {{unterminated regex literal}}
260260

261261
// These become regex literals, unless surrounded in parens.
262-
func baz(_ x: (Int, Int) -> Int, _ y: (Int, Int) -> Int) {} // expected-note 3{{'baz' declared here}}
262+
func baz(_ x: (Int, Int) -> Int, _ y: (Int, Int) -> Int) {} // expected-note 4{{'baz' declared here}}
263263
baz(/, /)
264264
// expected-error@-1 {{cannot convert value of type 'Regex<Substring>' to expected argument type '(Int, Int) -> Int'}}
265265
// expected-error@-2 {{missing argument for parameter #2 in call}}
@@ -278,8 +278,7 @@ do {
278278
// expected-note@-2 {{to match this opening '('}}
279279
} // expected-error {{expected ')' in expression list}}
280280

281-
// TODO: Should we do prefix operator splitting here?
282-
baz(^^/, /)
281+
baz(^^/, /) // expected-error {{missing argument for parameter #2 in call}}
283282
baz((^^/), /)
284283

285284
func bazbaz(_ x: (Int, Int) -> Int, _ y: Int) {}
@@ -305,8 +304,9 @@ quxqux(/^/) // expected-error {{cannot convert value of type 'Regex<Substring>'
305304
quxqux((/^/)) // expected-error {{cannot convert value of type 'Regex<Substring>' to expected argument type '(Int, Int) -> Int'}}
306305
quxqux({ $0 /^/ $1 })
307306

308-
// FIXME: We should be able to do operator splitting here and form `!(/^/)`.
309-
quxqux(!/^/) // expected-error {{unary operators must not be juxtaposed; parenthesize inner expression}}
307+
quxqux(!/^/)
308+
// expected-error@-1 {{cannot convert value of type 'Bool' to expected argument type '(Int, Int) -> Int'}}
309+
// expected-error@-2 {{cannot convert value of type 'Regex<Substring>' to expected argument type 'Bool'}}
310310

311311
quxqux(/^)
312312

0 commit comments

Comments
 (0)