Skip to content

Commit d94bd80

Browse files
committed
Add support for raw identifiers.
Raw identifiers are backtick-delimited identifiers that can contain any non-identifier character other than the backtick itself, CR, LF, or other non-printable ASCII code units, and which are also not composed entirely of operator characters.
1 parent 7f1792a commit d94bd80

34 files changed

+522
-103
lines changed

include/swift/AST/ASTDemangler.h

+4
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,10 @@ class ASTBuilder {
306306

307307
static GenericTypeDecl *getAcceptableTypeDeclCandidate(ValueDecl *decl,
308308
Demangle::Node::Kind kind);
309+
310+
/// Returns an identifier with the given name, automatically removing any
311+
/// surrounding backticks that are present for raw identifiers.
312+
Identifier getIdentifier(StringRef name);
309313
};
310314

311315
SWIFT_END_INLINE_NAMESPACE

include/swift/AST/ASTPrinter.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -444,9 +444,9 @@ void printWithCompatibilityFeatureChecks(ASTPrinter &printer,
444444
Decl *decl,
445445
llvm::function_ref<void()> printBody);
446446

447-
/// Determine whether we need to escape the given keyword within the
448-
/// given context, by wrapping it in backticks.
449-
bool escapeKeywordInContext(StringRef keyword, PrintNameContext context);
447+
/// Determine whether we need to escape the given name within the given
448+
/// context, by wrapping it in backticks.
449+
bool escapeIdentifierInContext(Identifier name, PrintNameContext context);
450450

451451
} // namespace swift
452452

include/swift/AST/Identifier.h

+18-4
Original file line numberDiff line numberDiff line change
@@ -101,19 +101,22 @@ class Identifier {
101101

102102
/// isOperator - Return true if this identifier is an operator, false if it is
103103
/// a normal identifier.
104-
/// FIXME: We should maybe cache this.
105104
bool isOperator() const {
106105
if (empty())
107106
return false;
108107
if (isEditorPlaceholder())
109108
return false;
110-
if ((unsigned char)Pointer[0] < 0x80)
111-
return isOperatorStartCodePoint((unsigned char)Pointer[0]);
112109

113110
// Handle the high unicode case out of line.
114111
return isOperatorSlow();
115112
}
116113

114+
/// Returns true if this identifier contains non-identifier characters and
115+
/// must always be escaped with backticks, even in contexts were other
116+
/// escaped identifiers could omit backticks (like keywords as argument
117+
/// labels).
118+
bool mustAlwaysBeEscaped() const;
119+
117120
bool isArithmeticOperator() const {
118121
return is("+") || is("-") || is("*") || is("/") || is("%");
119122
}
@@ -350,6 +353,10 @@ class DeclBaseName {
350353
return !isSpecial() && getIdentifier().isOperator();
351354
}
352355

356+
bool mustAlwaysBeEscaped() const {
357+
return !isSpecial() && getIdentifier().mustAlwaysBeEscaped();
358+
}
359+
353360
bool isEditorPlaceholder() const {
354361
return !isSpecial() && getIdentifier().isEditorPlaceholder();
355362
}
@@ -571,7 +578,12 @@ class DeclName {
571578
bool isOperator() const {
572579
return getBaseName().isOperator();
573580
}
574-
581+
582+
/// True if this name is an escaped identifier.
583+
bool mustAlwaysBeEscaped() const {
584+
return getBaseName().mustAlwaysBeEscaped();
585+
}
586+
575587
/// True if this name should be found by a decl ref or member ref under the
576588
/// name specified by 'refName'.
577589
///
@@ -728,6 +740,8 @@ class DeclNameRef {
728740
return FullName.isOperator();
729741
}
730742

743+
bool mustAlwaysBeEscaped() const { return FullName.mustAlwaysBeEscaped(); }
744+
731745
bool isCompoundName() const {
732746
return FullName.isCompoundName();
733747
}

include/swift/Basic/Mangler.h

+8-1
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,13 @@ class Mangler {
114114
print(llvm::dbgs());
115115
}
116116

117+
/// Appends the given raw identifier to the buffer in the form required to
118+
/// mangle it. This handles the transformations needed for such identifiers
119+
/// to retain compatibility with older runtimes.
120+
static void
121+
appendRawIdentifierForRuntime(StringRef ident,
122+
llvm::SmallVectorImpl<char> &buffer);
123+
117124
protected:
118125
/// Removes the last characters of the buffer by setting it's size to a
119126
/// smaller value.
@@ -143,7 +150,7 @@ class Mangler {
143150
SWIFT_DEBUG_DUMP;
144151

145152
/// Appends a mangled identifier string.
146-
void appendIdentifier(StringRef ident);
153+
void appendIdentifier(StringRef ident, bool allowRawIdentifiers = true);
147154

148155
// NOTE: the addSubstitution functions perform the value computation before
149156
// the assignment because there is no sequence point synchronising the

include/swift/Demangling/ManglingUtils.h

+8-2
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,16 @@ inline bool isWordEnd(char ch, char prevCh) {
5858
return false;
5959
}
6060

61+
/// Returns true if \p ch is a valid character which may appear at the start
62+
/// of a symbol mangling.
63+
inline bool isValidSymbolStart(char ch) {
64+
return isLetter(ch) || ch == '_' || ch == '$';
65+
}
66+
6167
/// Returns true if \p ch is a valid character which may appear in a symbol
62-
/// mangling.
68+
/// mangling anywhere other than the first character.
6369
inline bool isValidSymbolChar(char ch) {
64-
return isLetter(ch) || isDigit(ch) || ch == '_' || ch == '$';
70+
return isValidSymbolStart(ch) || isDigit(ch);
6571
}
6672

6773
/// Returns true if \p str contains any character which may not appear in a

include/swift/IDE/CompletionLookup.h

+2
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,8 @@ class CompletionLookup final : public swift::VisibleDeclConsumer {
340340
void addValueBaseName(CodeCompletionResultBuilder &Builder,
341341
DeclBaseName Name);
342342

343+
void addIdentifier(CodeCompletionResultBuilder &Builder, Identifier Name);
344+
343345
void addLeadingDot(CodeCompletionResultBuilder &Builder);
344346

345347
void addTypeAnnotation(CodeCompletionResultBuilder &Builder, Type T,

include/swift/Parse/Lexer.h

+5
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,11 @@ class Lexer {
395395
/// identifier, without escaping characters.
396396
static bool isIdentifier(StringRef identifier);
397397

398+
// Returns true if the given string is a raw identifier that must always
399+
// be escaped by backticks when printing it back in source form or writing
400+
// its name into runtime metadata.
401+
static bool identifierMustAlwaysBeEscaped(StringRef str);
402+
398403
/// Determine the token kind of the string, given that it is a valid
399404
/// non-operator identifier. Return tok::identifier if the string is not a
400405
/// reserved word.

lib/AST/ASTDemangler.cpp

+38-14
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ TypeDecl *ASTBuilder::createTypeDecl(NodePointer node) {
9494
if (proto == nullptr)
9595
return nullptr;
9696

97-
auto name = Ctx.getIdentifier(node->getChild(1)->getText());
97+
auto name = getIdentifier(node->getChild(1)->getText());
9898
return proto->getAssociatedType(name);
9999
}
100100

@@ -110,10 +110,9 @@ ASTBuilder::createBuiltinType(StringRef builtinName,
110110

111111
StringRef strippedName =
112112
builtinName.drop_front(BUILTIN_TYPE_NAME_PREFIX.size());
113-
Ctx.TheBuiltinModule->lookupValue(Ctx.getIdentifier(strippedName),
114-
NLKind::QualifiedLookup,
115-
decls);
116-
113+
Ctx.TheBuiltinModule->lookupValue(getIdentifier(strippedName),
114+
NLKind::QualifiedLookup, decls);
115+
117116
if (decls.size() == 1 && isa<TypeDecl>(decls[0]))
118117
return cast<TypeDecl>(decls[0])->getDeclaredInterfaceType();
119118
}
@@ -348,7 +347,7 @@ Type ASTBuilder::createTupleType(ArrayRef<Type> eltTypes, ArrayRef<StringRef> la
348347
for (unsigned i : indices(eltTypes)) {
349348
Identifier label;
350349
if (!labels[i].empty())
351-
label = Ctx.getIdentifier(labels[i]);
350+
label = getIdentifier(labels[i]);
352351
elements.emplace_back(eltTypes[i], label);
353352
}
354353

@@ -408,7 +407,7 @@ Type ASTBuilder::createFunctionType(
408407
if (!type->isMaterializable())
409408
return Type();
410409

411-
auto label = Ctx.getIdentifier(param.getLabel());
410+
auto label = getIdentifier(param.getLabel());
412411
auto flags = param.getFlags();
413412
auto ownership =
414413
ParamDecl::getParameterSpecifierForValueOwnership(asValueOwnership(flags.getOwnership()));
@@ -884,7 +883,7 @@ Type ASTBuilder::createGenericTypeParameterType(unsigned depth,
884883

885884
Type ASTBuilder::createDependentMemberType(StringRef member,
886885
Type base) {
887-
auto identifier = Ctx.getIdentifier(member);
886+
auto identifier = getIdentifier(member);
888887

889888
if (auto *archetype = base->getAs<ArchetypeType>()) {
890889
if (Type memberType = archetype->getNestedTypeByName(identifier))
@@ -901,7 +900,7 @@ Type ASTBuilder::createDependentMemberType(StringRef member,
901900
Type ASTBuilder::createDependentMemberType(StringRef member,
902901
Type base,
903902
ProtocolDecl *protocol) {
904-
auto identifier = Ctx.getIdentifier(member);
903+
auto identifier = getIdentifier(member);
905904

906905
if (auto *archetype = base->getAs<ArchetypeType>()) {
907906
if (auto assocType = protocol->getAssociatedType(identifier))
@@ -1141,7 +1140,7 @@ ASTBuilder::getAcceptableTypeDeclCandidate(ValueDecl *decl,
11411140

11421141
DeclContext *ASTBuilder::getNotionalDC() {
11431142
if (!NotionalDC) {
1144-
NotionalDC = ModuleDecl::createEmpty(Ctx.getIdentifier(".RemoteAST"), Ctx);
1143+
NotionalDC = ModuleDecl::createEmpty(getIdentifier(".RemoteAST"), Ctx);
11451144
NotionalDC = new (Ctx) TopLevelCodeDecl(NotionalDC);
11461145
}
11471146
return NotionalDC;
@@ -1314,7 +1313,7 @@ ASTBuilder::findDeclContext(NodePointer node) {
13141313
Demangle::Node::Kind::PrivateDeclName) {
13151314
name = declNameNode->getChild(1)->getText();
13161315
privateDiscriminator =
1317-
Ctx.getIdentifier(declNameNode->getChild(0)->getText());
1316+
getIdentifier(declNameNode->getChild(0)->getText());
13181317

13191318
} else if (declNameNode->getKind() ==
13201319
Demangle::Node::Kind::RelatedEntityDeclName) {
@@ -1342,14 +1341,14 @@ ASTBuilder::findDeclContext(NodePointer node) {
13421341
return nullptr;
13431342

13441343
for (auto *module : potentialModules)
1345-
if (auto typeDecl = findTypeDecl(module, Ctx.getIdentifier(name),
1344+
if (auto typeDecl = findTypeDecl(module, getIdentifier(name),
13461345
privateDiscriminator, node->getKind()))
13471346
return typeDecl;
13481347
return nullptr;
13491348
}
13501349

13511350
if (auto *dc = findDeclContext(child))
1352-
if (auto typeDecl = findTypeDecl(dc, Ctx.getIdentifier(name),
1351+
if (auto typeDecl = findTypeDecl(dc, getIdentifier(name),
13531352
privateDiscriminator, node->getKind()))
13541353
return typeDecl;
13551354

@@ -1548,7 +1547,7 @@ GenericTypeDecl *ASTBuilder::findForeignTypeDecl(StringRef name,
15481547
found);
15491548
break;
15501549
}
1551-
importer->lookupValue(Ctx.getIdentifier(name), consumer);
1550+
importer->lookupValue(getIdentifier(name), consumer);
15521551
if (consumer.Result)
15531552
consumer.Result = getAcceptableTypeDeclCandidate(consumer.Result, kind);
15541553
break;
@@ -1558,3 +1557,28 @@ GenericTypeDecl *ASTBuilder::findForeignTypeDecl(StringRef name,
15581557

15591558
return consumer.Result;
15601559
}
1560+
1561+
Identifier ASTBuilder::getIdentifier(StringRef name) {
1562+
if (name.size() > 1 && name.front() == '`' && name.back() == '`') {
1563+
// Raw identifiers have backticks affixed before mangling. We need to
1564+
// remove those before creating the Identifier for the AST, which doesn't
1565+
// encode the backticks.
1566+
std::string fixedName;
1567+
for (size_t i = 1; i < name.size() - 1; ++i) {
1568+
unsigned char ch = name[i];
1569+
// Raw identifiers have the space (U+0020) replaced with a non-breaking
1570+
// space (U+00A0, UTF-8: 0xC2 0xA0) in their mangling so that parts of
1571+
// the runtime that use space as a delimiter remain compatible with
1572+
// these identifiers. Flip it back.
1573+
if (ch == 0xc2 && i < name.size() - 2 &&
1574+
(unsigned char)name[i + 1] == 0xa0) {
1575+
fixedName.push_back(' ');
1576+
++i;
1577+
} else {
1578+
fixedName.push_back(ch);
1579+
}
1580+
}
1581+
return Ctx.getIdentifier(fixedName);
1582+
}
1583+
return Ctx.getIdentifier(name);
1584+
}

lib/AST/ASTMangler.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1152,7 +1152,7 @@ void ASTMangler::appendDeclName(const ValueDecl *decl, DeclBaseName name) {
11521152
"synthesized type's original name must be a valid Swift identifier");
11531153
appendIdentifier(synthesizedTypeAttr->originalTypeName);
11541154
} else if (name.isOperator()) {
1155-
appendIdentifier(translateOperator(name.getIdentifier().str()));
1155+
appendIdentifier(translateOperator(name.getIdentifier().str()), /*allowRawIdentifiers=*/ false);
11561156
switch (decl->getAttrs().getUnaryOperatorKind()) {
11571157
case UnaryOperatorKind::Prefix:
11581158
appendOperator("op");
@@ -4782,7 +4782,7 @@ void ASTMangler::appendMacroExpansionContext(
47824782
appendIdentifier(origDC->getParentModule()->getName().str());
47834783

47844784
auto *SF = origDC->getParentSourceFile();
4785-
appendIdentifier(llvm::sys::path::filename(SF->getFilename()));
4785+
appendIdentifier(llvm::sys::path::filename(SF->getFilename()), /*allowRawIdentifiers=*/false);
47864786

47874787
auto lineColumn = sourceMgr.getLineAndColumnInBuffer(loc);
47884788
appendOperator("fMX", Index(lineColumn.first), Index(lineColumn.second));

lib/AST/ASTPrinter.cpp

+9-10
Original file line numberDiff line numberDiff line change
@@ -576,10 +576,9 @@ ASTPrinter &operator<<(ASTPrinter &printer, tok keyword) {
576576
}
577577

578578
/// Determine whether to escape the given keyword in the given context.
579-
bool swift::escapeKeywordInContext(
580-
StringRef keyword,
581-
PrintNameContext context
582-
) {
579+
bool swift::escapeIdentifierInContext(Identifier name,
580+
PrintNameContext context) {
581+
StringRef keyword = name.str();
583582
bool isKeyword = llvm::StringSwitch<bool>(keyword)
584583
#define KEYWORD(KW) \
585584
.Case(#KW, true)
@@ -589,7 +588,7 @@ bool swift::escapeKeywordInContext(
589588
switch (context) {
590589
case PrintNameContext::Normal:
591590
case PrintNameContext::Attribute:
592-
return isKeyword;
591+
return isKeyword || name.mustAlwaysBeEscaped();
593592
case PrintNameContext::Keyword:
594593
case PrintNameContext::IntroducerKeyword:
595594
return false;
@@ -599,12 +598,12 @@ bool swift::escapeKeywordInContext(
599598
return isKeyword && keyword != "Self";
600599

601600
case PrintNameContext::TypeMember:
602-
return isKeyword || !canBeMemberName(keyword);
601+
return isKeyword || !canBeMemberName(keyword) || name.mustAlwaysBeEscaped();
603602

604603
case PrintNameContext::FunctionParameterExternal:
605604
case PrintNameContext::FunctionParameterLocal:
606605
case PrintNameContext::TupleElement:
607-
return !canBeArgumentLabel(keyword);
606+
return !canBeArgumentLabel(keyword) || name.mustAlwaysBeEscaped();
608607
}
609608

610609
llvm_unreachable("Unhandled PrintNameContext in switch.");
@@ -619,12 +618,12 @@ void ASTPrinter::printName(Identifier Name, PrintNameContext Context) {
619618
return;
620619
}
621620

622-
bool shouldEscapeKeyword = escapeKeywordInContext(Name.str(), Context);
621+
bool shouldEscapeIdentifier = escapeIdentifierInContext(Name, Context);
623622

624-
if (shouldEscapeKeyword)
623+
if (shouldEscapeIdentifier)
625624
*this << "`";
626625
*this << Name.str();
627-
if (shouldEscapeKeyword)
626+
if (shouldEscapeIdentifier)
628627
*this << "`";
629628

630629
printNamePost(Context);

lib/AST/Attr.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -1553,7 +1553,8 @@ bool DeclAttribute::printImpl(ASTPrinter &Printer, const PrintOptions &Options,
15531553
StringRef nameText = name.getName().getString(buffer);
15541554
bool shouldEscape =
15551555
!name.getName().isSpecial() &&
1556-
(escapeKeywordInContext(nameText, PrintNameContext::Normal) ||
1556+
(escapeIdentifierInContext(name.getName().getBaseIdentifier(),
1557+
PrintNameContext::Normal) ||
15571558
nameText == "$");
15581559
Printer << "(";
15591560
if (shouldEscape)

lib/AST/Identifier.cpp

+4-10
Original file line numberDiff line numberDiff line change
@@ -78,16 +78,10 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, swift::ObjCSelector S) {
7878
return OS;
7979
}
8080

81-
bool Identifier::isOperatorSlow() const {
82-
StringRef data = str();
83-
auto *s = reinterpret_cast<llvm::UTF8 const *>(data.begin()),
84-
*end = reinterpret_cast<llvm::UTF8 const *>(data.end());
85-
llvm::UTF32 codePoint;
86-
llvm::ConversionResult res =
87-
llvm::convertUTF8Sequence(&s, end, &codePoint, llvm::strictConversion);
88-
assert(res == llvm::conversionOK && "invalid UTF-8 in identifier?!");
89-
(void)res;
90-
return !empty() && isOperatorStartCodePoint(codePoint);
81+
bool Identifier::isOperatorSlow() const { return Lexer::isOperator(str()); }
82+
83+
bool Identifier::mustAlwaysBeEscaped() const {
84+
return Lexer::identifierMustAlwaysBeEscaped(str());
9185
}
9286

9387
int Identifier::compare(Identifier other) const {

0 commit comments

Comments
 (0)