Skip to content

Commit f5fc6f0

Browse files
author
Harlan Haskins
committed
[Lexer] Handle SwiftInterface files as well as SIL
Previously, the Lexer kept a single flag whether we’re lexing Swift or SIL. Instead, keep track if we’re parsing Swift, SIL, or a Swiftinterface file. .swiftinterface files allow $-prefixed identifiers anywhere.
1 parent 5b3f128 commit f5fc6f0

File tree

7 files changed

+73
-52
lines changed

7 files changed

+73
-52
lines changed

include/swift/Parse/Lexer.h

+15-8
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,12 @@ enum class HashbangMode : bool {
5555
Allowed,
5656
};
5757

58+
enum class LexerMode {
59+
Swift,
60+
SwiftInterface,
61+
SIL
62+
};
63+
5864
/// Kinds of conflict marker which the lexer might encounter.
5965
enum class ConflictMarkerKind {
6066
/// A normal or diff3 conflict marker, initiated by at least 7 "<"s,
@@ -98,9 +104,10 @@ class Lexer {
98104

99105
Token NextToken;
100106

101-
/// This is true if we're lexing a .sil file instead of a .swift
102-
/// file. This enables the 'sil' keyword.
103-
const bool InSILMode;
107+
/// The kind of source we're lexing. This either enables special behavior for
108+
/// parseable interfaces, or enables things like the 'sil' keyword if lexing
109+
/// a .sil file.
110+
const LexerMode LexMode;
104111

105112
/// True if we should skip past a `#!` line at the start of the file.
106113
const bool IsHashbangAllowed;
@@ -135,8 +142,8 @@ class Lexer {
135142
/// everything.
136143
Lexer(const PrincipalTag &, const LangOptions &LangOpts,
137144
const SourceManager &SourceMgr, unsigned BufferID,
138-
DiagnosticEngine *Diags, bool InSILMode, HashbangMode HashbangAllowed,
139-
CommentRetentionMode RetainComments,
145+
DiagnosticEngine *Diags, LexerMode LexMode,
146+
HashbangMode HashbangAllowed, CommentRetentionMode RetainComments,
140147
TriviaRetentionMode TriviaRetention);
141148

142149
void initialize(unsigned Offset, unsigned EndOffset);
@@ -150,21 +157,21 @@ class Lexer {
150157
/// identifier), but not things like how many characters are
151158
/// consumed. If that changes, APIs like getLocForEndOfToken will
152159
/// need to take a LangOptions explicitly.
153-
/// \param InSILMode - whether we're parsing a SIL source file.
160+
/// \param LexMode - the kind of source file we're lexing.
154161
/// Unlike language options, this does affect primitive lexing, which
155162
/// means that APIs like getLocForEndOfToken really ought to take
156163
/// this flag; it's just that we don't care that much about fidelity
157164
/// when parsing SIL files.
158165
Lexer(
159166
const LangOptions &Options, const SourceManager &SourceMgr,
160-
unsigned BufferID, DiagnosticEngine *Diags, bool InSILMode,
167+
unsigned BufferID, DiagnosticEngine *Diags, LexerMode LexMode,
161168
HashbangMode HashbangAllowed = HashbangMode::Disallowed,
162169
CommentRetentionMode RetainComments = CommentRetentionMode::None,
163170
TriviaRetentionMode TriviaRetention = TriviaRetentionMode::WithoutTrivia);
164171

165172
/// Create a lexer that scans a subrange of the source buffer.
166173
Lexer(const LangOptions &Options, const SourceManager &SourceMgr,
167-
unsigned BufferID, DiagnosticEngine *Diags, bool InSILMode,
174+
unsigned BufferID, DiagnosticEngine *Diags, LexerMode LexMode,
168175
HashbangMode HashbangAllowed, CommentRetentionMode RetainComments,
169176
TriviaRetentionMode TriviaRetention, unsigned Offset,
170177
unsigned EndOffset);

lib/AST/RawComment.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ static RawComment toRawComment(ASTContext &Context, CharSourceRange Range) {
108108
unsigned Offset = SourceMgr.getLocOffsetInBuffer(Range.getStart(), BufferID);
109109
unsigned EndOffset = SourceMgr.getLocOffsetInBuffer(Range.getEnd(), BufferID);
110110
LangOptions FakeLangOpts;
111-
Lexer L(FakeLangOpts, SourceMgr, BufferID, nullptr, /*InSILMode=*/false,
111+
Lexer L(FakeLangOpts, SourceMgr, BufferID, nullptr, LexerMode::Swift,
112112
HashbangMode::Disallowed,
113113
CommentRetentionMode::ReturnAsTokens,
114114
TriviaRetentionMode::WithoutTrivia,

lib/Immediate/REPL.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1053,7 +1053,7 @@ class REPLEnvironment {
10531053
unsigned BufferID =
10541054
CI.getSourceMgr().addMemBufferCopy(Line, "<REPL Input>");
10551055
Lexer L(CI.getASTContext().LangOpts,
1056-
CI.getSourceMgr(), BufferID, nullptr, false /*not SIL*/);
1056+
CI.getSourceMgr(), BufferID, nullptr, LexerMode::Swift);
10571057
Token Tok;
10581058
L.lex(Tok);
10591059
assert(Tok.is(tok::colon));

lib/Parse/Lexer.cpp

+13-12
Original file line numberDiff line numberDiff line change
@@ -171,11 +171,11 @@ uint32_t swift::validateUTF8CharacterAndAdvance(const char *&Ptr,
171171

172172
Lexer::Lexer(const PrincipalTag &, const LangOptions &LangOpts,
173173
const SourceManager &SourceMgr, unsigned BufferID,
174-
DiagnosticEngine *Diags, bool InSILMode,
174+
DiagnosticEngine *Diags, LexerMode LexMode,
175175
HashbangMode HashbangAllowed, CommentRetentionMode RetainComments,
176176
TriviaRetentionMode TriviaRetention)
177177
: LangOpts(LangOpts), SourceMgr(SourceMgr), BufferID(BufferID),
178-
Diags(Diags), InSILMode(InSILMode),
178+
Diags(Diags), LexMode(LexMode),
179179
IsHashbangAllowed(HashbangAllowed == HashbangMode::Allowed),
180180
RetainComments(RetainComments), TriviaRetention(TriviaRetention) {}
181181

@@ -216,28 +216,28 @@ void Lexer::initialize(unsigned Offset, unsigned EndOffset) {
216216
}
217217

218218
Lexer::Lexer(const LangOptions &Options, const SourceManager &SourceMgr,
219-
unsigned BufferID, DiagnosticEngine *Diags, bool InSILMode,
219+
unsigned BufferID, DiagnosticEngine *Diags, LexerMode LexMode,
220220
HashbangMode HashbangAllowed, CommentRetentionMode RetainComments,
221221
TriviaRetentionMode TriviaRetention)
222-
: Lexer(PrincipalTag(), Options, SourceMgr, BufferID, Diags, InSILMode,
222+
: Lexer(PrincipalTag(), Options, SourceMgr, BufferID, Diags, LexMode,
223223
HashbangAllowed, RetainComments, TriviaRetention) {
224224
unsigned EndOffset = SourceMgr.getRangeForBuffer(BufferID).getByteLength();
225225
initialize(/*Offset=*/0, EndOffset);
226226
}
227227

228228
Lexer::Lexer(const LangOptions &Options, const SourceManager &SourceMgr,
229-
unsigned BufferID, DiagnosticEngine *Diags, bool InSILMode,
229+
unsigned BufferID, DiagnosticEngine *Diags, LexerMode LexMode,
230230
HashbangMode HashbangAllowed, CommentRetentionMode RetainComments,
231231
TriviaRetentionMode TriviaRetention, unsigned Offset,
232232
unsigned EndOffset)
233-
: Lexer(PrincipalTag(), Options, SourceMgr, BufferID, Diags, InSILMode,
233+
: Lexer(PrincipalTag(), Options, SourceMgr, BufferID, Diags, LexMode,
234234
HashbangAllowed, RetainComments, TriviaRetention) {
235235
initialize(Offset, EndOffset);
236236
}
237237

238238
Lexer::Lexer(Lexer &Parent, State BeginState, State EndState)
239239
: Lexer(PrincipalTag(), Parent.LangOpts, Parent.SourceMgr, Parent.BufferID,
240-
Parent.Diags, Parent.InSILMode,
240+
Parent.Diags, Parent.LexMode,
241241
Parent.IsHashbangAllowed
242242
? HashbangMode::Allowed
243243
: HashbangMode::Disallowed,
@@ -264,7 +264,7 @@ Token Lexer::getTokenAt(SourceLoc Loc) {
264264
SourceMgr.findBufferContainingLoc(Loc)) &&
265265
"location from the wrong buffer");
266266

267-
Lexer L(LangOpts, SourceMgr, BufferID, Diags, InSILMode,
267+
Lexer L(LangOpts, SourceMgr, BufferID, Diags, LexMode,
268268
HashbangMode::Allowed, CommentRetentionMode::None,
269269
TriviaRetentionMode::WithoutTrivia);
270270
L.restoreState(State(Loc));
@@ -672,7 +672,8 @@ void Lexer::lexIdentifier() {
672672
// Lex [a-zA-Z_$0-9[[:XID_Continue:]]]*
673673
while (advanceIfValidContinuationOfIdentifier(CurPtr, BufferEnd));
674674

675-
tok Kind = kindOfIdentifier(StringRef(TokStart, CurPtr-TokStart), InSILMode);
675+
tok Kind = kindOfIdentifier(StringRef(TokStart, CurPtr-TokStart),
676+
LexMode == LexerMode::SIL);
676677
return formToken(Kind, TokStart);
677678
}
678679

@@ -2515,7 +2516,7 @@ Token Lexer::getTokenAtLocation(const SourceManager &SM, SourceLoc Loc) {
25152516
// comments and normally we won't be at the beginning of a comment token
25162517
// (making this option irrelevant), or the caller lexed comments and
25172518
// we need to lex just the comment token.
2518-
Lexer L(FakeLangOpts, SM, BufferID, nullptr, /*InSILMode=*/ false,
2519+
Lexer L(FakeLangOpts, SM, BufferID, nullptr, LexerMode::Swift,
25192520
HashbangMode::Allowed, CommentRetentionMode::ReturnAsTokens);
25202521
L.restoreState(State(Loc));
25212522
return L.peekNextToken();
@@ -2671,7 +2672,7 @@ static SourceLoc getLocForStartOfTokenInBuf(SourceManager &SM,
26712672
// and the exact token produced.
26722673
LangOptions FakeLangOptions;
26732674

2674-
Lexer L(FakeLangOptions, SM, BufferID, nullptr, /*InSILMode=*/false,
2675+
Lexer L(FakeLangOptions, SM, BufferID, nullptr, LexerMode::Swift,
26752676
HashbangMode::Allowed, CommentRetentionMode::None,
26762677
TriviaRetentionMode::WithoutTrivia, BufferStart, BufferEnd);
26772678

@@ -2799,7 +2800,7 @@ SourceLoc Lexer::getLocForEndOfLine(SourceManager &SM, SourceLoc Loc) {
27992800
// comments and normally we won't be at the beginning of a comment token
28002801
// (making this option irrelevant), or the caller lexed comments and
28012802
// we need to lex just the comment token.
2802-
Lexer L(FakeLangOpts, SM, BufferID, nullptr, /*InSILMode=*/ false,
2803+
Lexer L(FakeLangOpts, SM, BufferID, nullptr, LexerMode::Swift,
28032804
HashbangMode::Allowed, CommentRetentionMode::ReturnAsTokens);
28042805
L.restoreState(State(Loc));
28052806
L.skipToEndOfLine(/*EatNewline=*/true);

lib/Parse/Parser.cpp

+17-4
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ void tokenize(const LangOptions &LangOpts, const SourceManager &SM,
5959
if (Offset == 0 && EndOffset == 0)
6060
EndOffset = SM.getRangeForBuffer(BufferID).getByteLength();
6161

62-
Lexer L(LangOpts, SM, BufferID, Diags, /*InSILMode=*/false,
62+
Lexer L(LangOpts, SM, BufferID, Diags, LexerMode::Swift,
6363
HashbangMode::Allowed, RetainComments, TriviaRetention, Offset,
6464
EndOffset);
6565

@@ -345,6 +345,19 @@ swift::tokenizeWithTrivia(const LangOptions &LangOpts, const SourceManager &SM,
345345
//===----------------------------------------------------------------------===//
346346

347347

348+
static LexerMode sourceFileKindToLexerMode(SourceFileKind kind) {
349+
switch (kind) {
350+
case swift::SourceFileKind::Interface:
351+
return LexerMode::SwiftInterface;
352+
case swift::SourceFileKind::SIL:
353+
return LexerMode::SIL;
354+
case swift::SourceFileKind::Library:
355+
case swift::SourceFileKind::Main:
356+
case swift::SourceFileKind::REPL:
357+
return LexerMode::Swift;
358+
}
359+
}
360+
348361
Parser::Parser(unsigned BufferID, SourceFile &SF, SILParserTUStateBase *SIL,
349362
PersistentParserState *PersistentState,
350363
std::shared_ptr<SyntaxParseActions> SPActions,
@@ -361,7 +374,7 @@ Parser::Parser(unsigned BufferID, SourceFile &SF, DiagnosticEngine* LexerDiags,
361374
std::unique_ptr<Lexer>(new Lexer(
362375
SF.getASTContext().LangOpts, SF.getASTContext().SourceMgr,
363376
BufferID, LexerDiags,
364-
/*InSILMode=*/SIL != nullptr,
377+
sourceFileKindToLexerMode(SF.Kind),
365378
SF.Kind == SourceFileKind::Main
366379
? HashbangMode::Allowed
367380
: HashbangMode::Disallowed,
@@ -399,7 +412,7 @@ class TokenRecorder: public ConsumeTokenReceiver {
399412

400413
void relexComment(CharSourceRange CommentRange,
401414
llvm::SmallVectorImpl<Token> &Scratch) {
402-
Lexer L(Ctx.LangOpts, Ctx.SourceMgr, BufferID, nullptr, /*InSILMode=*/false,
415+
Lexer L(Ctx.LangOpts, Ctx.SourceMgr, BufferID, nullptr, LexerMode::Swift,
403416
HashbangMode::Disallowed,
404417
CommentRetentionMode::ReturnAsTokens,
405418
TriviaRetentionMode::WithoutTrivia,
@@ -1119,7 +1132,7 @@ ParserUnit::ParserUnit(SourceManager &SM, SourceFileKind SFKind, unsigned Buffer
11191132
std::unique_ptr<Lexer> Lex;
11201133
Lex.reset(new Lexer(Impl.LangOpts, SM,
11211134
BufferID, &Impl.Diags,
1122-
/*InSILMode=*/false,
1135+
LexerMode::Swift,
11231136
HashbangMode::Allowed,
11241137
CommentRetentionMode::None,
11251138
TriviaRetentionMode::WithoutTrivia,

0 commit comments

Comments
 (0)