-
Notifications
You must be signed in to change notification settings - Fork 10.5k
/
Copy pathInlinableText.cpp
318 lines (266 loc) · 11 KB
/
InlinableText.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
//===---- InlinableText.cpp - Extract inlinable source text -----*- C++ -*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#include "InlinableText.h"
#include "swift/AST/ASTContext.h"
#include "swift/AST/ASTNode.h"
#include "swift/AST/ASTVisitor.h"
#include "swift/AST/ASTWalker.h"
#include "swift/AST/Decl.h"
#include "swift/AST/Expr.h"
#include "swift/Parse/Lexer.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SmallString.h"
using namespace swift;
/// Gets the last token that exists inside this IfConfigClause, ignoring
/// hoisted elements.
///
/// If the clause is the last element, this returns the beginning of the line
/// before the parent IfConfigDecl's #endif token. Otherwise, it's the beginning
/// of the line before the next clause's #else or #elseif token.
static SourceLoc
getEffectiveEndLoc(SourceManager &sourceMgr, const IfConfigClause *clause,
const IfConfigDecl *decl) {
auto clauses = decl->getClauses();
if (clause == &clauses.back())
return Lexer::getLocForStartOfLine(sourceMgr, decl->getEndLoc());
assert(clause >= clauses.begin() && clause < clauses.end() &&
"clauses must be contiguous");
auto *nextClause = clause + 1;
return Lexer::getLocForStartOfLine(sourceMgr, nextClause->Loc);
}
namespace {
class IsFeatureCheck : public ASTWalker {
public:
bool foundFeature = false;
/// Walk everything that's available.
MacroWalking getMacroWalkingBehavior() const override {
return MacroWalking::ArgumentsAndExpansion;
}
PreWalkResult<Expr *> walkToExprPre(Expr *expr) override {
if (auto unresolved = dyn_cast<UnresolvedDeclRefExpr>(expr)) {
if (unresolved->getName().getBaseName().userFacingName().startswith("$"))
foundFeature = true;
}
if (auto call = dyn_cast<CallExpr>(expr)) {
if (auto unresolved = dyn_cast<UnresolvedDeclRefExpr>(call->getFn())) {
StringRef userFacing = unresolved->getName().getBaseName()
.userFacingName();
if (userFacing == "compiler" || userFacing == "_compiler_version")
foundFeature = true;
}
}
return Action::SkipNodeIf(foundFeature, expr);
}
};
bool clauseIsFeatureCheck(Expr *cond) {
IsFeatureCheck checker;
cond->walk(checker);
return checker.foundFeature;
}
/// Whether any of the clauses here involves a feature check
/// (e.g., $AsyncAwait).
bool anyClauseIsFeatureCheck(ArrayRef<IfConfigClause> clauses) {
for (const auto &clause : clauses) {
if (Expr *cond = clause.Cond) {
if (clauseIsFeatureCheck(cond))
return true;
}
}
return false;
}
/// A walker that searches through #if declarations, finding all text that does
/// not contribute to the final evaluated AST.
///
/// For example, in the following code:
/// ```
/// #if true
/// print("true")
/// #else
/// print("false")
/// #endif
/// ```
/// ExtractInactiveRanges will return the ranges (with leading newlines) of:
/// ```
/// #if true
/// #else
/// print("false")
/// #endif
/// ```
/// Leaving behind just 'print("true")'s range.
struct ExtractInactiveRanges : public ASTWalker {
SmallVector<CharSourceRange, 4> ranges;
SourceManager &sourceMgr;
/// Walk everything that's available.
MacroWalking getMacroWalkingBehavior() const override {
return MacroWalking::ArgumentsAndExpansion;
}
explicit ExtractInactiveRanges(SourceManager &sourceMgr)
: sourceMgr(sourceMgr) {}
/// Adds the two SourceLocs as a CharSourceRange to the set of ignored
/// ranges.
/// \note: This assumes each of these locs is a character location, not a
/// token location.
void addRange(SourceLoc start, SourceLoc end) {
auto charRange = CharSourceRange(sourceMgr, start, end);
ranges.push_back(charRange);
}
PreWalkAction walkToDeclPre(Decl *d) override {
auto icd = dyn_cast<IfConfigDecl>(d);
if (!icd)
return Action::Continue();
auto start = Lexer::getLocForStartOfLine(sourceMgr, icd->getStartLoc());
auto end = Lexer::getLocForEndOfLine(sourceMgr, icd->getEndLoc());
auto clause = icd->getActiveClause();
// If there's no active clause, add the entire #if...#endif block.
if (!clause) {
addRange(start, end);
return Action::SkipNode();
}
// If the clause is checking for a particular feature with $ or a compiler
// version, keep the whole thing.
if (anyClauseIsFeatureCheck(icd->getClauses())) {
return Action::SkipNode();
}
// Ignore range from beginning of '#if', '#elseif', or '#else' to the
// beginning of the elements of this clause.
auto elementsBegin = clause->Loc;
// If there's a condition (e.g. this isn't a '#else' block), then ignore
// everything up to the end of the condition.
if (auto cond = clause->Cond) {
elementsBegin = cond->getEndLoc();
}
addRange(start, Lexer::getLocForEndOfLine(sourceMgr, elementsBegin));
// Ignore range from effective end of the elements of this clause to the
// end of the '#endif'
addRange(getEffectiveEndLoc(sourceMgr, clause, icd), end);
// Walk into direct children of this node that are IfConfigDecls, because
// the standard walker won't walk into them.
for (auto &elt : clause->Elements)
if (elt.isDecl(DeclKind::IfConfig))
elt.get<Decl *>()->walk(*this);
return Action::SkipNode();
}
/// Gets the ignored ranges in source order.
ArrayRef<CharSourceRange> getSortedRanges() {
std::sort(ranges.begin(), ranges.end(),
[&](CharSourceRange r1, CharSourceRange r2) {
assert(!r1.overlaps(r2) && "no overlapping ranges");
return sourceMgr.isBeforeInBuffer(r1.getStart(), r2.getStart());
});
return ranges;
}
};
} // end anonymous namespace
/// Appends the textual contents of the provided source range, stripping
/// the contents of comments that appear in the source.
///
/// Given that comments are treated as whitespace, this also appends a
/// space or newline (depending if the comment was multi-line and itself
/// had newlines in the body) in place of the comment, to avoid fusing tokens
/// together.
static void appendRange(
SourceManager &sourceMgr, SourceLoc start, SourceLoc end,
SmallVectorImpl<char> &scratch) {
unsigned bufferID = sourceMgr.findBufferContainingLoc(start);
unsigned offset = sourceMgr.getLocOffsetInBuffer(start, bufferID);
unsigned endOffset = sourceMgr.getLocOffsetInBuffer(end, bufferID);
// Strip comments from the chunk before adding it by re-lexing the range.
LangOptions FakeLangOpts;
Lexer lexer(FakeLangOpts, sourceMgr, bufferID, nullptr, LexerMode::Swift,
HashbangMode::Disallowed, CommentRetentionMode::ReturnAsTokens,
offset, endOffset);
SourceLoc nonCommentStart = start;
Token token;
// Re-lex the range, and skip the full text of `tok::comment` tokens.
while (!token.is(tok::eof)) {
lexer.lex(token);
// Skip over #sourceLocation's in the file.
if (token.is(tok::pound_sourceLocation)) {
// Append the text leading up to the #sourceLocation
auto charRange = CharSourceRange(
sourceMgr, nonCommentStart, token.getLoc());
StringRef text = sourceMgr.extractText(charRange);
scratch.append(text.begin(), text.end());
// Skip to the right paren. We know the AST is already valid, so there's
// definitely a right paren.
while (!token.is(tok::r_paren)) {
lexer.lex(token);
}
nonCommentStart = Lexer::getLocForEndOfToken(sourceMgr, token.getLoc());
}
if (token.is(tok::comment)) {
// Grab the start of the full comment token (with leading trivia as well)
SourceLoc commentLoc = token.getLoc();
// Find the end of the token (with trailing trivia)
SourceLoc endLoc = Lexer::getLocForEndOfToken(sourceMgr, token.getLoc());
// The comment token's range includes leading/trailing whitespace, so trim
// whitespace and only strip the portions of the comment that are not
// whitespace.
CharSourceRange range = CharSourceRange(sourceMgr, commentLoc, endLoc);
StringRef fullTokenText = sourceMgr.extractText(range);
unsigned leadingWhitespace = fullTokenText.size() -
fullTokenText.ltrim().size();
if (leadingWhitespace > 0) {
commentLoc = commentLoc.getAdvancedLoc(leadingWhitespace);
}
unsigned trailingWhitespace = fullTokenText.size() -
fullTokenText.rtrim().size();
if (trailingWhitespace > 0) {
endLoc = endLoc.getAdvancedLoc(-trailingWhitespace);
}
// First, extract the text up to the start of the comment, including the
// whitespace.
auto charRange = CharSourceRange(sourceMgr, nonCommentStart, commentLoc);
StringRef text = sourceMgr.extractText(charRange);
scratch.append(text.begin(), text.end());
// Next, search through the comment text to see if it's a block comment
// with a newline. If so we need to re-insert a newline to avoid fusing
// multi-line tokens together.
auto commentTextRange = CharSourceRange(sourceMgr, commentLoc, endLoc);
StringRef commentText = sourceMgr.extractText(commentTextRange);
bool hasNewline = commentText.find_first_of("\n\r") != StringRef::npos;
// Use a newline as a filler character if the comment itself had a newline
// in it.
char filler = hasNewline ? '\n' : ' ';
// Append a single whitespace filler character, to avoid fusing tokens.
scratch.push_back(filler);
// Start the next region after the contents of the comment.
nonCommentStart = endLoc;
}
}
if (nonCommentStart.isValid() && nonCommentStart != end) {
auto charRange = CharSourceRange(sourceMgr, nonCommentStart, end);
StringRef text = sourceMgr.extractText(charRange);
scratch.append(text.begin(), text.end());
}
}
StringRef swift::extractInlinableText(SourceManager &sourceMgr, ASTNode node,
SmallVectorImpl<char> &scratch) {
// Extract inactive ranges from the text of the node.
ExtractInactiveRanges extractor(sourceMgr);
node.walk(extractor);
// Begin piecing together active code ranges.
// Get the full start and end of the provided node, as character locations.
SourceLoc start = node.getStartLoc();
SourceLoc end = Lexer::getLocForEndOfToken(sourceMgr, node.getEndLoc());
for (auto &range : extractor.getSortedRanges()) {
// Add the text from the current 'start' to this ignored range's start.
appendRange(sourceMgr, start, range.getStart(), scratch);
// Set 'start' to the end of this range, effectively skipping it.
start = range.getEnd();
}
// If there's leftover unignored text, add it.
if (start != end) {
appendRange(sourceMgr, start, end, scratch);
}
return { scratch.data(), scratch.size() };
}