Skip to content
This repository was archived by the owner on Nov 1, 2021. It is now read-only.

Commit c4bf2b9

Browse files
committed
Introduced raw_identifier token kind.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@122394 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent eee1df1 commit c4bf2b9

14 files changed

+115
-89
lines changed

include/clang/Basic/TokenKinds.def

+1
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ TOK(comment) // Comment (only in -E -C[C] mode)
103103

104104
// C99 6.4.2: Identifiers.
105105
TOK(identifier) // abcde123
106+
TOK(raw_identifier) // Used only in raw lexing mode.
106107

107108
// C99 6.4.4.1: Integer Constants
108109
// C99 6.4.4.2: Floating Constants

include/clang/Lex/Preprocessor.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -751,10 +751,10 @@ class Preprocessor {
751751
// Preprocessor callback methods. These are invoked by a lexer as various
752752
// directives and events are found.
753753

754-
/// LookUpIdentifierInfo - Given a tok::identifier token, look up the
755-
/// identifier information for the token and install it into the token.
756-
IdentifierInfo *LookUpIdentifierInfo(Token &Identifier,
757-
const char *BufPtr = 0) const;
754+
/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
755+
/// identifier information for the token and install it into the token,
756+
/// updating the token kind accordingly.
757+
IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
758758

759759
/// HandleIdentifier - This callback is invoked when the lexer reads an
760760
/// identifier and has filled in the tokens IdentifierInfo member. This

include/clang/Lex/Token.h

+22-1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,12 @@ class Token {
8888
bool is(tok::TokenKind K) const { return Kind == (unsigned) K; }
8989
bool isNot(tok::TokenKind K) const { return Kind != (unsigned) K; }
9090

91+
/// isAnyIdentifier - Return true if this is a raw identifier (when lexing
92+
/// in raw mode) or a non-keyword identifier (when lexing in non-raw mode).
93+
bool isAnyIdentifier() const {
94+
return is(tok::identifier) || is(tok::raw_identifier);
95+
}
96+
9197
/// isLiteral - Return true if this is a "literal", like a numeric
9298
/// constant, string, etc.
9399
bool isLiteral() const {
@@ -154,14 +160,29 @@ class Token {
154160
}
155161

156162
IdentifierInfo *getIdentifierInfo() const {
157-
assert(!isAnnotation() && "Used IdentInfo on annotation token!");
163+
assert(isNot(tok::raw_identifier) &&
164+
"getIdentifierInfo() on a tok::raw_identifier token!");
165+
assert(!isAnnotation() &&
166+
"getIdentifierInfo() on an annotation token!");
158167
if (isLiteral()) return 0;
159168
return (IdentifierInfo*) PtrData;
160169
}
161170
void setIdentifierInfo(IdentifierInfo *II) {
162171
PtrData = (void*) II;
163172
}
164173

174+
/// getRawIdentifierData - For a raw identifier token (i.e., an identifier
175+
/// lexed in raw mode), returns a pointer to the start of it in the text
176+
/// buffer if known, null otherwise.
177+
const char *getRawIdentifierData() const {
178+
assert(is(tok::raw_identifier));
179+
return reinterpret_cast<const char*>(PtrData);
180+
}
181+
void setRawIdentifierData(const char *Ptr) {
182+
assert(is(tok::raw_identifier));
183+
PtrData = const_cast<char*>(Ptr);
184+
}
185+
165186
/// getLiteralData - For a literal token (numeric constant, string, etc), this
166187
/// returns a pointer to the start of it in the text buffer if known, null
167188
/// otherwise.

lib/Frontend/CacheTokens.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ PTHEntry PTHWriter::LexTokens(Lexer& L) {
300300
ParsingPreprocessorDirective = false;
301301
}
302302

303-
if (Tok.is(tok::identifier)) {
303+
if (Tok.is(tok::raw_identifier)) {
304304
PP.LookUpIdentifierInfo(Tok);
305305
EmitToken(Tok);
306306
continue;
@@ -320,13 +320,13 @@ PTHEntry PTHWriter::LexTokens(Lexer& L) {
320320
// this case, discard both tokens.
321321
if (NextTok.isAtStartOfLine())
322322
goto NextToken;
323-
323+
324324
// The token is the start of a directive. Emit it.
325325
EmitToken(Tok);
326326
Tok = NextTok;
327327

328328
// Did we see 'include'/'import'/'include_next'?
329-
if (Tok.isNot(tok::identifier)) {
329+
if (Tok.isNot(tok::raw_identifier)) {
330330
EmitToken(Tok);
331331
continue;
332332
}
@@ -353,7 +353,7 @@ PTHEntry PTHWriter::LexTokens(Lexer& L) {
353353
L.LexIncludeFilename(Tok);
354354
L.setParsingPreprocessorDirective(false);
355355
assert(!Tok.isAtStartOfLine());
356-
if (Tok.is(tok::identifier))
356+
if (Tok.is(tok::raw_identifier))
357357
PP.LookUpIdentifierInfo(Tok);
358358

359359
break;

lib/Lex/Lexer.cpp

+24-25
Original file line numberDiff line numberDiff line change
@@ -266,21 +266,23 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
266266
const SourceManager &SourceMgr,
267267
const LangOptions &Features, bool *Invalid) {
268268
assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
269-
270-
// If this token is an identifier, just return the string from the identifier
271-
// table, which is very quick.
272-
if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
269+
270+
const char *TokStart = 0;
271+
// NOTE: this has to be checked *before* testing for an IdentifierInfo.
272+
if (Tok.is(tok::raw_identifier))
273+
TokStart = Tok.getRawIdentifierData();
274+
else if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
275+
// Just return the string from the identifier table, which is very quick.
273276
Buffer = II->getNameStart();
274277
return II->getLength();
275278
}
276-
277-
// Otherwise, compute the start of the token in the input lexer buffer.
278-
const char *TokStart = 0;
279-
279+
280+
// NOTE: this can be checked even after testing for an IdentifierInfo.
280281
if (Tok.isLiteral())
281282
TokStart = Tok.getLiteralData();
282-
283+
283284
if (TokStart == 0) {
285+
// Compute the start of the token in the input lexer buffer.
284286
bool CharDataInvalid = false;
285287
TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid);
286288
if (Invalid)
@@ -290,13 +292,13 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
290292
return 0;
291293
}
292294
}
293-
295+
294296
// If this token contains nothing interesting, return it directly.
295297
if (!Tok.needsCleaning()) {
296298
Buffer = TokStart;
297299
return Tok.getLength();
298300
}
299-
301+
300302
// Otherwise, hard case, relex the characters into the string.
301303
char *OutBuf = const_cast<char*>(Buffer);
302304
for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
@@ -307,7 +309,7 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
307309
}
308310
assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
309311
"NeedsCleaning flag set on something that didn't need cleaning!");
310-
312+
311313
return OutBuf-Buffer;
312314
}
313315

@@ -473,10 +475,9 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, unsigned MaxLines) {
473475
// we don't have an identifier table available. Instead, just look at
474476
// the raw identifier to recognize and categorize preprocessor directives.
475477
TheLexer.LexFromRawLexer(TheTok);
476-
if (TheTok.getKind() == tok::identifier && !TheTok.needsCleaning()) {
477-
const char *IdStart = Buffer->getBufferStart()
478-
+ TheTok.getLocation().getRawEncoding() - 1;
479-
llvm::StringRef Keyword(IdStart, TheTok.getLength());
478+
if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) {
479+
llvm::StringRef Keyword(TheTok.getRawIdentifierData(),
480+
TheTok.getLength());
480481
PreambleDirectiveKind PDK
481482
= llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
482483
.Case("include", PDK_Skipped)
@@ -1046,19 +1047,17 @@ void Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
10461047
if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) {
10471048
FinishIdentifier:
10481049
const char *IdStart = BufferPtr;
1049-
FormTokenWithChars(Result, CurPtr, tok::identifier);
1050+
FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
1051+
Result.setRawIdentifierData(IdStart);
10501052

10511053
// If we are in raw mode, return this identifier raw. There is no need to
10521054
// look up identifier information or attempt to macro expand it.
1053-
if (LexingRawMode) return;
1054-
1055-
// Fill in Result.IdentifierInfo, looking up the identifier in the
1056-
// identifier table.
1057-
IdentifierInfo *II = PP->LookUpIdentifierInfo(Result, IdStart);
1055+
if (LexingRawMode)
1056+
return;
10581057

1059-
// Change the kind of this identifier to the appropriate token kind, e.g.
1060-
// turning "for" into a keyword.
1061-
Result.setKind(II->getTokenID());
1058+
// Fill in Result.IdentifierInfo and update the token kind,
1059+
// looking up the identifier in the identifier table.
1060+
IdentifierInfo *II = PP->LookUpIdentifierInfo(Result);
10621061

10631062
// Finally, now that we know we have an identifier, pass this off to the
10641063
// preprocessor, which may macro expand it or something.

lib/Lex/PPDirectives.cpp

+3-7
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
245245

246246
// If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
247247
// something bogus), skip it.
248-
if (Tok.isNot(tok::identifier)) {
248+
if (Tok.isNot(tok::raw_identifier)) {
249249
CurPPLexer->ParsingPreprocessorDirective = false;
250250
// Restore comment saving mode.
251251
if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
@@ -257,12 +257,8 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
257257
// to spell an i/e in a strange way that is another letter. Skipping this
258258
// allows us to avoid looking up the identifier info for #define/#undef and
259259
// other common directives.
260-
bool Invalid = false;
261-
const char *RawCharData = SourceMgr.getCharacterData(Tok.getLocation(),
262-
&Invalid);
263-
if (Invalid)
264-
return;
265-
260+
const char *RawCharData = Tok.getRawIdentifierData();
261+
266262
char FirstChar = RawCharData[0];
267263
if (FirstChar >= 'a' && FirstChar <= 'z' &&
268264
FirstChar != 'i' && FirstChar != 'e') {

lib/Lex/Pragma.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ void Preprocessor::HandlePragmaPoison(Token &PoisonTok) {
292292
if (Tok.is(tok::eom)) return;
293293

294294
// Can only poison identifiers.
295-
if (Tok.isNot(tok::identifier)) {
295+
if (Tok.isNot(tok::raw_identifier)) {
296296
Diag(Tok, diag::err_pp_invalid_poison);
297297
return;
298298
}
@@ -599,7 +599,7 @@ IdentifierInfo *Preprocessor::ParsePragmaPushOrPopMacro(Token &Tok) {
599599
// Create a Token from the string.
600600
Token MacroTok;
601601
MacroTok.startToken();
602-
MacroTok.setKind(tok::identifier);
602+
MacroTok.setKind(tok::raw_identifier);
603603
CreateString(&StrVal[1], StrVal.size() - 2, MacroTok);
604604

605605
// Get the IdentifierInfo of MacroToPushTok.

lib/Lex/Preprocessor.cpp

+22-13
Original file line numberDiff line numberDiff line change
@@ -285,9 +285,12 @@ void Preprocessor::CodeCompleteNaturalLanguage() {
285285
llvm::StringRef Preprocessor::getSpelling(const Token &Tok,
286286
llvm::SmallVectorImpl<char> &Buffer,
287287
bool *Invalid) const {
288-
// Try the fast path.
289-
if (const IdentifierInfo *II = Tok.getIdentifierInfo())
290-
return II->getName();
288+
// NOTE: this has to be checked *before* testing for an IdentifierInfo.
289+
if (Tok.isNot(tok::raw_identifier)) {
290+
// Try the fast path.
291+
if (const IdentifierInfo *II = Tok.getIdentifierInfo())
292+
return II->getName();
293+
}
291294

292295
// Resize the buffer if we need to copy into it.
293296
if (Tok.needsCleaning())
@@ -313,8 +316,10 @@ void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok,
313316
InstantiationLoc, Len);
314317
Tok.setLocation(Loc);
315318

316-
// If this is a literal token, set the pointer data.
317-
if (Tok.isLiteral())
319+
// If this is a raw identifier or a literal token, set the pointer data.
320+
if (Tok.is(tok::raw_identifier))
321+
Tok.setRawIdentifierData(DestPtr);
322+
else if (Tok.isLiteral())
318323
Tok.setLiteralData(DestPtr);
319324
}
320325

@@ -369,25 +374,29 @@ void Preprocessor::EndSourceFile() {
369374
// Lexer Event Handling.
370375
//===----------------------------------------------------------------------===//
371376

372-
/// LookUpIdentifierInfo - Given a tok::identifier token, look up the
373-
/// identifier information for the token and install it into the token.
374-
IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier,
375-
const char *BufPtr) const {
376-
assert(Identifier.is(tok::identifier) && "Not an identifier!");
377-
assert(Identifier.getIdentifierInfo() == 0 && "Identinfo already exists!");
377+
/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
378+
/// identifier information for the token and install it into the token,
379+
/// updating the token kind accordingly.
380+
IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
381+
assert(Identifier.getRawIdentifierData() != 0 && "No raw identifier data!");
378382

379383
// Look up this token, see if it is a macro, or if it is a language keyword.
380384
IdentifierInfo *II;
381-
if (BufPtr && !Identifier.needsCleaning()) {
385+
if (!Identifier.needsCleaning()) {
382386
// No cleaning needed, just use the characters from the lexed buffer.
383-
II = getIdentifierInfo(llvm::StringRef(BufPtr, Identifier.getLength()));
387+
II = getIdentifierInfo(llvm::StringRef(Identifier.getRawIdentifierData(),
388+
Identifier.getLength()));
384389
} else {
385390
// Cleaning needed, alloca a buffer, clean into it, then use the buffer.
386391
llvm::SmallString<64> IdentifierBuffer;
387392
llvm::StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
388393
II = getIdentifierInfo(CleanedStr);
389394
}
395+
396+
// Update the token info (identifier info and appropriate token kind).
390397
Identifier.setIdentifierInfo(II);
398+
Identifier.setKind(II->getTokenID());
399+
391400
return II;
392401
}
393402

lib/Lex/TokenConcatenation.cpp

+9-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include "clang/Lex/TokenConcatenation.h"
1515
#include "clang/Lex/Preprocessor.h"
16+
#include "llvm/Support/ErrorHandling.h"
1617
using namespace clang;
1718

1819

@@ -165,7 +166,14 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
165166
}
166167

167168
switch (PrevKind) {
168-
default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
169+
default:
170+
llvm_unreachable("InitAvoidConcatTokenInfo built wrong");
171+
return true;
172+
173+
case tok::raw_identifier:
174+
llvm_unreachable("tok::raw_identifier in non-raw lexing mode!");
175+
return true;
176+
169177
case tok::identifier: // id+id or id+number or id+L"foo".
170178
// id+'.'... will not append.
171179
if (Tok.is(tok::numeric_constant))

lib/Lex/TokenLexer.cpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -435,12 +435,13 @@ bool TokenLexer::PasteTokens(Token &Tok) {
435435
// Lex the resultant pasted token into Result.
436436
Token Result;
437437

438-
if (Tok.is(tok::identifier) && RHS.is(tok::identifier)) {
438+
if (Tok.isAnyIdentifier() && RHS.isAnyIdentifier()) {
439439
// Common paste case: identifier+identifier = identifier. Avoid creating
440440
// a lexer and other overhead.
441441
PP.IncrementPasteCounter(true);
442442
Result.startToken();
443-
Result.setKind(tok::identifier);
443+
Result.setKind(tok::raw_identifier);
444+
Result.setRawIdentifierData(ResultTokStrPtr);
444445
Result.setLocation(ResultTokLoc);
445446
Result.setLength(LHSLen+RHSLen);
446447
} else {
@@ -524,10 +525,10 @@ bool TokenLexer::PasteTokens(Token &Tok) {
524525
// Now that we got the result token, it will be subject to expansion. Since
525526
// token pasting re-lexes the result token in raw mode, identifier information
526527
// isn't looked up. As such, if the result is an identifier, look up id info.
527-
if (Tok.is(tok::identifier)) {
528+
if (Tok.is(tok::raw_identifier)) {
528529
// Look up the identifier info for the token. We disabled identifier lookup
529530
// by saying we're skipping contents, so we need to do this manually.
530-
PP.LookUpIdentifierInfo(Tok, ResultTokStrPtr);
531+
PP.LookUpIdentifierInfo(Tok);
531532
}
532533
return false;
533534
}

0 commit comments

Comments
 (0)