Skip to content

Commit 34e2aec

Browse files
authored
Parser: use parser to generate a refined token stream to help syntax coloring. (#11809)
This patch allows Parser to generate a refined token stream to satisfy tooling's need. For syntax coloring, token stream from lexer is insufficient because (1) we have contextual keywords like get and set; (2) we may allow keywords to be used as argument labels and names; and (3) we need to split tokens like "==<". In this patch, these refinements are directly fulfilled through parsing without additional heuristics. The refined token vector is optionally saved in SourceFile instance.
1 parent 57cc3ba commit 34e2aec

26 files changed

+373
-201
lines changed

include/swift/AST/Module.h

+21-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "swift/Basic/OptionSet.h"
2828
#include "swift/Basic/SourceLoc.h"
2929
#include "swift/Basic/STLExtras.h"
30+
#include "swift/Parse/Token.h"
3031
#include "llvm/ADT/ArrayRef.h"
3132
#include "llvm/ADT/DenseSet.h"
3233
#include "llvm/ADT/SetVector.h"
@@ -886,7 +887,7 @@ class SourceFile final : public FileUnit {
886887
ASTStage_t ASTStage = Parsing;
887888

888889
SourceFile(ModuleDecl &M, SourceFileKind K, Optional<unsigned> bufferID,
889-
ImplicitModuleImportKind ModImpKind);
890+
ImplicitModuleImportKind ModImpKind, bool KeepTokens);
890891

891892
void
892893
addImports(ArrayRef<std::pair<ModuleDecl::ImportedModule, ImportOptions>> IM);
@@ -1070,6 +1071,25 @@ class SourceFile final : public FileUnit {
10701071
getInterfaceHash(str);
10711072
out << str << '\n';
10721073
}
1074+
1075+
std::vector<Token> &getTokenVector() {
1076+
assert(EnabledAndAllCorrectedTokens.first && "Disabled");
1077+
return EnabledAndAllCorrectedTokens.second;
1078+
}
1079+
1080+
ArrayRef<Token> getAllTokens() const {
1081+
assert(EnabledAndAllCorrectedTokens.first && "Disabled");
1082+
return EnabledAndAllCorrectedTokens.second;
1083+
}
1084+
1085+
bool shouldKeepTokens() const {
1086+
return EnabledAndAllCorrectedTokens.first;
1087+
}
1088+
1089+
private:
1090+
/// Whether the SourceFile instance opts to collect underlying tokens and
1091+
/// the vector containing these tokens if so.
1092+
std::pair<bool, std::vector<Token>> EnabledAndAllCorrectedTokens;
10731093
};
10741094

10751095

include/swift/Basic/LangOptions.h

+5
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,11 @@ namespace swift {
245245
/// This is used to guard preemptive testing for the fix-it.
246246
bool FixStringToSubstringConversions = false;
247247

248+
/// Whether to keep track of a refined token stream in SourceFile while
249+
/// parsing. This is set true usually for tooling purposes like semantic
250+
/// coloring.
251+
bool KeepTokensInSourceFile = false;
252+
248253
/// Sets the target we are building for and updates platform conditions
249254
/// to match.
250255
///

include/swift/Parse/Lexer.h

+4
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,10 @@ class Lexer {
411411
Result.IndentToStrip = 0;
412412
return Result;
413413
}
414+
415+
SourceLoc getEndLoc() {
416+
return Loc.getAdvancedLoc(Length);
417+
}
414418
};
415419

416420
/// \brief Compute the bytes that the actual string literal should codegen to.

include/swift/Parse/Parser.h

+25-2
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,22 @@ namespace swift {
7171
ActiveConditionalBlock,
7272
};
7373

74+
/// The receiver will be fed with consumed tokens while parsing. The main purpose
75+
/// is to generate a corrected token stream for tooling support like syntax
76+
/// coloring.
77+
class ConsumeTokenReceiver {
78+
public:
79+
/// This is called when a token is consumed.
80+
virtual void receive(Token Tok) {}
81+
82+
/// This is called to update the kind of a token whose start location is Loc.
83+
virtual void registerTokenKindChange(SourceLoc Loc, tok NewKind) {};
84+
85+
/// This is called when a source file is fully parsed.
86+
virtual void finalize() {};
87+
virtual ~ConsumeTokenReceiver() = default;
88+
};
89+
7490
/// The main class used for parsing a source file (.swift or .sil).
7591
///
7692
/// Rather than instantiating a Parser yourself, use one of the parsing APIs
@@ -174,6 +190,9 @@ class Parser {
174190
/// \brief This is the current token being considered by the parser.
175191
Token Tok;
176192

193+
/// \brief The receiver to collect all consumed tokens.
194+
ConsumeTokenReceiver *TokReceiver;
195+
177196
/// \brief The location of the previous token.
178197
SourceLoc PreviousLoc;
179198

@@ -356,7 +375,7 @@ class Parser {
356375
// We might be at tok::eof now, so ensure that consumeToken() does not
357376
// assert about lexing past eof.
358377
Tok.setKind(tok::unknown);
359-
consumeToken();
378+
consumeTokenWithoutFeedingReceiver();
360379

361380
PreviousLoc = PP.PreviousLoc;
362381
}
@@ -369,7 +388,7 @@ class Parser {
369388
// We might be at tok::eof now, so ensure that consumeToken() does not
370389
// assert about lexing past eof.
371390
Tok.setKind(tok::unknown);
372-
consumeToken();
391+
consumeTokenWithoutFeedingReceiver();
373392

374393
PreviousLoc = PP.PreviousLoc;
375394
}
@@ -421,6 +440,10 @@ class Parser {
421440
/// \brief Return the next token that will be installed by \c consumeToken.
422441
const Token &peekToken();
423442

443+
/// Consume a token that we created on the fly to correct the original token
444+
/// stream from lexer.
445+
void consumeExtraToken(Token K);
446+
SourceLoc consumeTokenWithoutFeedingReceiver();
424447
SourceLoc consumeToken();
425448
SourceLoc consumeToken(tok K) {
426449
assert(Tok.is(K) && "Consuming wrong token kind");

include/swift/Parse/Token.h

+9-5
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,17 @@ class Token {
6161
public:
6262
Token() : Kind(tok::NUM_TOKENS), AtStartOfLine(false), CommentLength(0),
6363
EscapedIdentifier(false) {}
64-
Token(tok Kind, StringRef Text)
65-
: Kind(Kind), AtStartOfLine(false), CommentLength(0),
66-
EscapedIdentifier(false), MultilineString(false),
67-
Text(Text) {}
68-
64+
65+
Token(tok Kind, StringRef Text, unsigned CommentLength)
66+
: Kind(Kind), AtStartOfLine(false), CommentLength(CommentLength),
67+
EscapedIdentifier(false), MultilineString(false),
68+
Text(Text) {}
69+
70+
Token(tok Kind, StringRef Text): Token(Kind, Text, 0) {};
71+
6972
tok getKind() const { return Kind; }
7073
void setKind(tok K) { Kind = K; }
74+
void clearCommentLength() { CommentLength = 0; }
7175

7276
/// is/isNot - Predicates to check if this token is a specific kind, as in
7377
/// "if (Tok.is(tok::l_brace)) {...}".

include/swift/Syntax/TokenKinds.def

+3
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,9 @@ MISC(dollarident)
287287
MISC(sil_local_name) // %42 in SIL mode.
288288
MISC(comment)
289289

290+
MISC(string_interpolation_anchor)
291+
MISC(contextual_keyword)
292+
290293
#undef KEYWORD
291294
#undef SWIFT_KEYWORD
292295
#undef DECL_KEYWORD

lib/AST/Module.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -1321,9 +1321,12 @@ static void performAutoImport(SourceFile &SF,
13211321

13221322
SourceFile::SourceFile(ModuleDecl &M, SourceFileKind K,
13231323
Optional<unsigned> bufferID,
1324-
ImplicitModuleImportKind ModImpKind)
1324+
ImplicitModuleImportKind ModImpKind,
1325+
bool KeepTokens)
13251326
: FileUnit(FileUnitKind::Source, M),
1326-
BufferID(bufferID ? *bufferID : -1), Kind(K) {
1327+
BufferID(bufferID ? *bufferID : -1),
1328+
Kind(K),
1329+
EnabledAndAllCorrectedTokens(KeepTokens, std::vector<Token>()) {
13271330
M.getASTContext().addDestructorCleanup(*this);
13281331
performAutoImport(*this, ModImpKind);
13291332

lib/Frontend/Frontend.cpp

+12-5
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ ModuleDecl *CompilerInstance::getMainModule() {
264264
void CompilerInstance::performSema() {
265265
const FrontendOptions &options = Invocation.getFrontendOptions();
266266
const InputFileKind Kind = Invocation.getInputKind();
267+
bool KeepTokens = Invocation.getLangOptions().KeepTokensInSourceFile;
267268
ModuleDecl *MainModule = getMainModule();
268269
Context->LoadedModules[MainModule->getName()] = MainModule;
269270

@@ -392,7 +393,7 @@ void CompilerInstance::performSema() {
392393
if (Kind == InputFileKind::IFK_Swift_REPL) {
393394
auto *SingleInputFile =
394395
new (*Context) SourceFile(*MainModule, Invocation.getSourceFileKind(),
395-
None, modImpKind);
396+
None, modImpKind, KeepTokens);
396397
MainModule->addFile(*SingleInputFile);
397398
addAdditionalInitialImports(SingleInputFile);
398399
return;
@@ -420,7 +421,8 @@ void CompilerInstance::performSema() {
420421

421422
auto *MainFile = new (*Context) SourceFile(*MainModule,
422423
Invocation.getSourceFileKind(),
423-
MainBufferID, modImpKind);
424+
MainBufferID, modImpKind,
425+
KeepTokens);
424426
MainModule->addFile(*MainFile);
425427
addAdditionalInitialImports(MainFile);
426428

@@ -446,7 +448,8 @@ void CompilerInstance::performSema() {
446448
auto *NextInput = new (*Context) SourceFile(*MainModule,
447449
SourceFileKind::Library,
448450
BufferID,
449-
modImpKind);
451+
modImpKind,
452+
KeepTokens);
450453
MainModule->addFile(*NextInput);
451454
addAdditionalInitialImports(NextInput);
452455

@@ -585,6 +588,7 @@ void CompilerInstance::performParseOnly(bool EvaluateConditionals) {
585588
const InputFileKind Kind = Invocation.getInputKind();
586589
ModuleDecl *MainModule = getMainModule();
587590
Context->LoadedModules[MainModule->getName()] = MainModule;
591+
bool KeepTokens = Invocation.getLangOptions().KeepTokensInSourceFile;
588592

589593
assert((Kind == InputFileKind::IFK_Swift ||
590594
Kind == InputFileKind::IFK_Swift_Library) &&
@@ -600,7 +604,8 @@ void CompilerInstance::performParseOnly(bool EvaluateConditionals) {
600604
SourceMgr.setHashbangBufferID(MainBufferID);
601605

602606
auto *MainFile = new (*Context) SourceFile(
603-
*MainModule, Invocation.getSourceFileKind(), MainBufferID, modImpKind);
607+
*MainModule, Invocation.getSourceFileKind(), MainBufferID, modImpKind,
608+
KeepTokens);
604609
MainModule->addFile(*MainFile);
605610

606611
if (MainBufferID == PrimaryBufferID)
@@ -615,7 +620,9 @@ void CompilerInstance::performParseOnly(bool EvaluateConditionals) {
615620
continue;
616621

617622
auto *NextInput = new (*Context)
618-
SourceFile(*MainModule, SourceFileKind::Library, BufferID, modImpKind);
623+
SourceFile(*MainModule, SourceFileKind::Library, BufferID, modImpKind,
624+
KeepTokens);
625+
619626
MainModule->addFile(*NextInput);
620627
if (BufferID == PrimaryBufferID)
621628
setPrimarySourceFile(NextInput);

lib/IDE/Formatting.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -485,11 +485,11 @@ class FormatContext {
485485
};
486486

487487
class FormatWalker : public SourceEntityWalker {
488-
typedef std::vector<Token>::iterator TokenIt;
488+
typedef ArrayRef<Token>::iterator TokenIt;
489489
class SiblingCollector {
490490
SourceLoc FoundSibling;
491491
SourceManager &SM;
492-
std::vector<Token> &Tokens;
492+
ArrayRef<Token> Tokens;
493493
SourceLoc &TargetLoc;
494494
TokenIt TI;
495495
bool NeedExtraIndentation;
@@ -552,7 +552,7 @@ class FormatWalker : public SourceEntityWalker {
552552
}
553553

554554
public:
555-
SiblingCollector(SourceManager &SM, std::vector<Token> &Tokens,
555+
SiblingCollector(SourceManager &SM, ArrayRef<Token> Tokens,
556556
SourceLoc &TargetLoc) : SM(SM), Tokens(Tokens),
557557
TargetLoc(TargetLoc), TI(Tokens.begin()),
558558
NeedExtraIndentation(false) {}
@@ -660,7 +660,7 @@ class FormatWalker : public SourceEntityWalker {
660660
bool InDocCommentBlock = false;
661661
bool InCommentLine = false;
662662
bool InStringLiteral = false;
663-
std::vector<Token> Tokens;
663+
ArrayRef<Token> Tokens;
664664
LangOptions Options;
665665
TokenIt CurrentTokIt;
666666
unsigned TargetLine;
@@ -739,7 +739,7 @@ class FormatWalker : public SourceEntityWalker {
739739
public:
740740
explicit FormatWalker(SourceFile &SF, SourceManager &SM)
741741
:SF(SF), SM(SM),
742-
Tokens(tokenize(Options, SM, SF.getBufferID().getValue())),
742+
Tokens(SF.getAllTokens()),
743743
CurrentTokIt(Tokens.begin()),
744744
SCollector(SM, Tokens, TargetLocation) {}
745745

0 commit comments

Comments
 (0)