Skip to content

Commit b888dc0

Browse files
committed
[Parser] Don't modify the current token kind when cutting off parsing
Previously, when we reached the maximum nesting level, we changed the current token’s kind to an EOF token. A lot of places in the parser are not set up to expect this token change. The intended workaround was to check whether pushing a structure marker failed (which would change the token kind) and bail out parsing if this happened. This was fragile and caused assertion failures in assert builds. Instead of changing the current token’s kind, and failing to push the structure marker, let the lexer know that it should cut off lexing, essentially making the input buffer stop at the current position. The parser will continue to consume its current token (`Parser.Tok`) and the next token that’s already lexed in the lexer (`Lexer.NextToken`) before reaching the emulated EOF token. Thus two more tokens are parsed than before, but that shouldn’t make much of a difference.
1 parent 2b41bee commit b888dc0

File tree

10 files changed

+863
-76
lines changed

10 files changed

+863
-76
lines changed

include/swift/Parse/Lexer.h

+27
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,11 @@ class Lexer {
133133
/// the next token doesn't have a comment.
134134
const char *CommentStart;
135135

136+
/// If this is not \c nullptr, all tokens after this point are treated as eof.
137+
/// Used to cut off lexing early when we detect that the nesting level is too
138+
/// deep.
139+
const char *LexerCutOffPoint = nullptr;
140+
136141
Lexer(const Lexer&) = delete;
137142
void operator=(const Lexer&) = delete;
138143

@@ -222,6 +227,28 @@ class Lexer {
222227
lexImpl();
223228
}
224229

230+
/// Cut off lexing at the current position. The next token to be lexed will
231+
/// be an EOF token, even if there is still source code to be lexed.
232+
/// The current and next token (returned by \c peekNextToken ) are not
233+
/// modified. The token after \c NextToken will be the EOF token.
234+
void cutOffLexing() {
235+
// If we already have a cut off point, don't push it further towards the
236+
// back.
237+
if (LexerCutOffPoint == nullptr || LexerCutOffPoint >= CurPtr) {
238+
LexerCutOffPoint = CurPtr;
239+
}
240+
}
241+
242+
/// If a lexer cut off point has been set returns the offset in the buffer at
243+
/// which lexing is being cut off.
244+
Optional<size_t> lexingCutOffOffset() const {
245+
if (LexerCutOffPoint) {
246+
return LexerCutOffPoint - BufferStart;
247+
} else {
248+
return None;
249+
}
250+
}
251+
225252
bool isKeepingComments() const {
226253
return RetainComments == CommentRetentionMode::ReturnAsTokens;
227254
}

include/swift/Parse/Parser.h

+5-25
Original file line numberDiff line numberDiff line change
@@ -221,12 +221,6 @@ class Parser {
221221
/// The location of the previous token.
222222
SourceLoc PreviousLoc;
223223

224-
/// Stop parsing immediately.
225-
void cutOffParsing() {
226-
// Cut off parsing by acting as if we reached the end-of-file.
227-
Tok.setKind(tok::eof);
228-
}
229-
230224
/// Use this to assert that the parser has advanced the lexing location, e.g.
231225
/// before a specific parser function has returned.
232226
class AssertParserMadeProgressBeforeLeavingScopeRAII {
@@ -329,35 +323,21 @@ class Parser {
329323

330324
/// An RAII object that notes when we have seen a structure marker.
331325
class StructureMarkerRAII {
332-
Parser *const P;
326+
Parser &P;
333327

334328
/// Max nesting level
335329
// TODO: customizable.
336330
enum { MaxDepth = 256 };
337331

338-
StructureMarkerRAII(Parser *parser) : P(parser) {}
339-
340-
/// Have the parser start the new Structure or fail if already too deep.
341-
bool pushStructureMarker(Parser &parser, SourceLoc loc,
342-
StructureMarkerKind kind);
332+
StructureMarkerRAII(Parser &parser) : P(parser) {}
343333

344334
public:
345-
StructureMarkerRAII(Parser &parser, SourceLoc loc, StructureMarkerKind kind)
346-
: StructureMarkerRAII(
347-
pushStructureMarker(parser, loc, kind) ? &parser : nullptr) {}
335+
StructureMarkerRAII(Parser &parser, SourceLoc loc,
336+
StructureMarkerKind kind);
348337

349338
StructureMarkerRAII(Parser &parser, const Token &tok);
350339

351-
/// Did we fail to push the new structure?
352-
bool isFailed() {
353-
return P == nullptr;
354-
}
355-
356-
~StructureMarkerRAII() {
357-
if (P != nullptr) {
358-
P->StructureMarkers.pop_back();
359-
}
360-
}
340+
~StructureMarkerRAII() { P.StructureMarkers.pop_back(); }
361341
};
362342
friend class StructureMarkerRAII;
363343

lib/Parse/Lexer.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -2349,7 +2349,11 @@ void Lexer::lexImpl() {
23492349

23502350
// Remember the start of the token so we can form the text range.
23512351
const char *TokStart = CurPtr;
2352-
2352+
2353+
if (LexerCutOffPoint && CurPtr >= LexerCutOffPoint) {
2354+
return formToken(tok::eof, TokStart);
2355+
}
2356+
23532357
switch (*CurPtr++) {
23542358
default: {
23552359
char const *Tmp = CurPtr-1;

lib/Parse/ParseExpr.cpp

-4
Original file line numberDiff line numberDiff line change
@@ -3121,10 +3121,6 @@ ParserStatus Parser::parseExprList(tok leftTok, tok rightTok,
31213121
SourceLoc &rightLoc, SyntaxKind Kind) {
31223122
StructureMarkerRAII ParsingExprList(*this, Tok);
31233123

3124-
if (ParsingExprList.isFailed()) {
3125-
return makeParserError();
3126-
}
3127-
31283124
leftLoc = consumeToken(leftTok);
31293125
return parseList(rightTok, leftLoc, rightLoc, /*AllowSepAfterLast=*/false,
31303126
rightTok == tok::r_paren ? diag::expected_rparen_expr_list

lib/Parse/ParseGeneric.cpp

-4
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,6 @@ Parser::parseGenericParametersBeforeWhere(SourceLoc LAngleLoc,
5757
// Note that we're parsing a declaration.
5858
StructureMarkerRAII ParsingDecl(*this, Tok.getLoc(),
5959
StructureMarkerKind::Declaration);
60-
61-
if (ParsingDecl.isFailed()) {
62-
return makeParserError();
63-
}
6460

6561
// Parse attributes.
6662
DeclAttributes attributes;

lib/Parse/ParsePattern.cpp

-3
Original file line numberDiff line numberDiff line change
@@ -1216,9 +1216,6 @@ ParserResult<Pattern> Parser::parsePatternTuple() {
12161216
SyntaxParsingContext TuplePatternCtxt(SyntaxContext,
12171217
SyntaxKind::TuplePattern);
12181218
StructureMarkerRAII ParsingPatternTuple(*this, Tok);
1219-
if (ParsingPatternTuple.isFailed()) {
1220-
return makeParserError();
1221-
}
12221219
SourceLoc LPLoc = consumeToken(tok::l_paren);
12231220
SourceLoc RPLoc;
12241221

lib/Parse/ParseStmt.cpp

-4
Original file line numberDiff line numberDiff line change
@@ -1280,10 +1280,6 @@ ParserResult<PoundAvailableInfo> Parser::parseStmtConditionPoundAvailable() {
12801280

12811281
StructureMarkerRAII ParsingAvailabilitySpecList(*this, Tok);
12821282

1283-
if (ParsingAvailabilitySpecList.isFailed()) {
1284-
return makeParserError();
1285-
}
1286-
12871283
SourceLoc LParenLoc = consumeToken(tok::l_paren);
12881284

12891285
SmallVector<AvailabilitySpec *, 5> Specs;

lib/Parse/ParseType.cpp

-4
Original file line numberDiff line numberDiff line change
@@ -1017,10 +1017,6 @@ ParserResult<TypeRepr> Parser::parseTypeTupleBody() {
10171017
TypeContext.setCreateSyntax(SyntaxKind::TupleType);
10181018
Parser::StructureMarkerRAII ParsingTypeTuple(*this, Tok);
10191019

1020-
if (ParsingTypeTuple.isFailed()) {
1021-
return makeParserError();
1022-
}
1023-
10241020
SourceLoc RPLoc, LPLoc = consumeToken(tok::l_paren);
10251021
SourceLoc EllipsisLoc;
10261022
unsigned EllipsisIdx;

lib/Parse/Parser.cpp

+34-31
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,9 @@ namespace {
405405
/// underlying corrected token stream.
406406
class TokenRecorder: public ConsumeTokenReceiver {
407407
ASTContext &Ctx;
408+
/// The lexer that is being used to lex the source file. Used to query whether
409+
/// lexing has been cut off.
410+
Lexer &BaseLexer;
408411
unsigned BufferID;
409412

410413
// Token list ordered by their appearance in the source file.
@@ -425,11 +428,19 @@ class TokenRecorder: public ConsumeTokenReceiver {
425428
void relexComment(CharSourceRange CommentRange,
426429
llvm::SmallVectorImpl<Token> &Scratch) {
427430
auto &SM = Ctx.SourceMgr;
431+
auto EndOffset = SM.getLocOffsetInBuffer(CommentRange.getEnd(), BufferID);
432+
if (auto LexerCutOffOffset = BaseLexer.lexingCutOffOffset()) {
433+
if (*LexerCutOffOffset < EndOffset) {
434+
// If lexing was cut off due to a too deep nesting level, adjust the end
435+
// offset to not point past the cut off point.
436+
EndOffset = *LexerCutOffOffset;
437+
}
438+
}
428439
Lexer L(Ctx.LangOpts, SM, BufferID, nullptr, LexerMode::Swift,
429440
HashbangMode::Disallowed, CommentRetentionMode::ReturnAsTokens,
430441
TriviaRetentionMode::WithoutTrivia,
431442
SM.getLocOffsetInBuffer(CommentRange.getStart(), BufferID),
432-
SM.getLocOffsetInBuffer(CommentRange.getEnd(), BufferID));
443+
EndOffset);
433444
while(true) {
434445
Token Result;
435446
L.lex(Result);
@@ -441,8 +452,8 @@ class TokenRecorder: public ConsumeTokenReceiver {
441452
}
442453

443454
public:
444-
TokenRecorder(ASTContext &ctx, unsigned BufferID)
445-
: Ctx(ctx), BufferID(BufferID) {}
455+
TokenRecorder(ASTContext &ctx, Lexer &BaseLexer)
456+
: Ctx(ctx), BaseLexer(BaseLexer), BufferID(BaseLexer.getBufferID()) {}
446457

447458
Optional<std::vector<Token>> finalize() override {
448459
auto &SM = Ctx.SourceMgr;
@@ -516,19 +527,14 @@ class TokenRecorder: public ConsumeTokenReceiver {
516527
Parser::Parser(std::unique_ptr<Lexer> Lex, SourceFile &SF,
517528
SILParserStateBase *SIL, PersistentParserState *PersistentState,
518529
std::shared_ptr<SyntaxParseActions> SPActions)
519-
: SourceMgr(SF.getASTContext().SourceMgr),
520-
Diags(SF.getASTContext().Diags),
521-
SF(SF),
522-
L(Lex.release()),
523-
SIL(SIL),
524-
CurDeclContext(&SF),
525-
Context(SF.getASTContext()),
526-
TokReceiver(SF.shouldCollectTokens() ?
527-
new TokenRecorder(SF.getASTContext(), L->getBufferID()) :
528-
new ConsumeTokenReceiver()),
529-
SyntaxContext(new SyntaxParsingContext(SyntaxContext, SF,
530-
L->getBufferID(),
531-
std::move(SPActions))) {
530+
: SourceMgr(SF.getASTContext().SourceMgr), Diags(SF.getASTContext().Diags),
531+
SF(SF), L(Lex.release()), SIL(SIL), CurDeclContext(&SF),
532+
Context(SF.getASTContext()),
533+
TokReceiver(SF.shouldCollectTokens()
534+
? new TokenRecorder(SF.getASTContext(), *L)
535+
: new ConsumeTokenReceiver()),
536+
SyntaxContext(new SyntaxParsingContext(
537+
SyntaxContext, SF, L->getBufferID(), std::move(SPActions))) {
532538
State = PersistentState;
533539
if (!State) {
534540
OwnedState.reset(new PersistentParserState());
@@ -880,28 +886,25 @@ getStructureMarkerKindForToken(const Token &tok) {
880886
}
881887
}
882888

883-
Parser::StructureMarkerRAII::StructureMarkerRAII(Parser &parser,
884-
const Token &tok)
885-
: StructureMarkerRAII(parser, tok.getLoc(),
886-
getStructureMarkerKindForToken(tok)) {}
887-
888-
bool Parser::StructureMarkerRAII::pushStructureMarker(
889-
Parser &parser, SourceLoc loc,
890-
StructureMarkerKind kind) {
891-
892-
if (parser.StructureMarkers.size() < MaxDepth) {
893-
parser.StructureMarkers.push_back({loc, kind, None});
894-
return true;
895-
} else {
889+
Parser::StructureMarkerRAII::StructureMarkerRAII(Parser &parser, SourceLoc loc,
890+
StructureMarkerKind kind)
891+
: StructureMarkerRAII(parser) {
892+
parser.StructureMarkers.push_back({loc, kind, None});
893+
if (parser.StructureMarkers.size() > MaxDepth) {
896894
parser.diagnose(loc, diag::structure_overflow, MaxDepth);
897895
// We need to cut off parsing or we will stack-overflow.
898896
// But `cutOffParsing` changes the current token to eof, and we may be in
899897
// a place where `consumeToken()` will be expecting e.g. '[',
900898
// since we need that to get to the callsite, so this can cause an assert.
901-
parser.cutOffParsing();
902-
return false;
899+
parser.L->cutOffLexing();
903900
}
904901
}
902+
903+
Parser::StructureMarkerRAII::StructureMarkerRAII(Parser &parser,
904+
const Token &tok)
905+
: StructureMarkerRAII(parser, tok.getLoc(),
906+
getStructureMarkerKindForToken(tok)) {}
907+
905908
//===----------------------------------------------------------------------===//
906909
// Primitive Parsing
907910
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)