Skip to content

Commit 3186e3c

Browse files
committed
[clangd] Lib to compute and represent selection under cursor.
Summary: The primary problem this solves is to expose the codeAction selection to AST-based refactorings in a way that makes it easy and efficient for them to bind to the right parts of the AST. It should also allow us to make XRefs based features (textDocument/definition) more robust, more easily implement textDocument/typeDefinition etc. As an example, template parameter references can be identified without special handling. There should be slight speedup too: we can prune most of the AST traversal in most cases. Elephant in the room: this is similar-but-different to Tooling/Refactoring/ASTSelection. That captures a smaller set of AST nodes, has a slightly different way of representing selections, and generally has mare features and does more work. The overall shape is pretty similar, and yet I can't quite get to behave as I expect. Reviewers: ilya-biryukov, kadircet Subscribers: mgorny, ioeric, MaskRay, jkorous, mgrang, arphaman Tags: #clang Differential Revision: https://reviews.llvm.org/D57562 llvm-svn: 352874
1 parent fbcbac7 commit 3186e3c

File tree

5 files changed

+670
-0
lines changed

5 files changed

+670
-0
lines changed

Diff for: clang-tools-extra/clangd/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ add_clang_library(clangDaemon
4646
Protocol.cpp
4747
Quality.cpp
4848
RIFF.cpp
49+
Selection.cpp
4950
SourceCode.cpp
5051
Threading.cpp
5152
Trace.cpp

Diff for: clang-tools-extra/clangd/Selection.cpp

+301
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
//===--- Selection.h ------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "Selection.h"
10+
#include "ClangdUnit.h"
11+
#include "clang/AST/RecursiveASTVisitor.h"
12+
13+
namespace clang {
14+
namespace clangd {
15+
namespace {
16+
using Node = SelectionTree::Node;
17+
using ast_type_traits::DynTypedNode;
18+
19+
// We find the selection by visiting written nodes in the AST, looking for nodes
20+
// that intersect with the selected character range.
21+
//
22+
// While traversing, we maintain a parent stack. As nodes pop off the stack,
23+
// we decide whether to keep them or not. To be kept, they must either be
24+
// selected or contain some nodes that are.
25+
//
26+
// For simple cases (not inside macros) we prune subtrees that don't intersect.
27+
class SelectionVisitor : public RecursiveASTVisitor<SelectionVisitor> {
28+
public:
29+
// Runs the visitor to gather selected nodes and their ancestors.
30+
// If there is any selection, the root (TUDecl) is the first node.
31+
static std::deque<Node> collect(ASTContext &AST, unsigned Begin,
32+
unsigned End, FileID File) {
33+
SelectionVisitor V(AST, Begin, End, File);
34+
V.TraverseAST(AST);
35+
assert(V.Stack.size() == 1 && "Unpaired push/pop?");
36+
assert(V.Stack.top() == &V.Nodes.front());
37+
if (V.Nodes.size() == 1) // TUDecl, but no nodes under it.
38+
V.Nodes.clear();
39+
return std::move(V.Nodes);
40+
}
41+
42+
// We traverse all "well-behaved" nodes the same way:
43+
// - push the node onto the stack
44+
// - traverse its children recursively
45+
// - pop it from the stack
46+
// - hit testing: is intersection(node, selection) - union(children) empty?
47+
// - attach it to the tree if it or any children hit the selection
48+
//
49+
// Two categories of nodes are not "well-behaved":
50+
// - those without source range information, we don't record those
51+
// - those that can't be stored in DynTypedNode.
52+
// We're missing some interesting things like Attr due to the latter.
53+
bool TraverseDecl(Decl *X) {
54+
if (isa<TranslationUnitDecl>(X))
55+
return Base::TraverseDecl(X); // Already pushed by constructor.
56+
return traverseNode(X, [&] { return Base::TraverseDecl(X); });
57+
}
58+
bool TraverseTypeLoc(TypeLoc X) {
59+
return traverseNode(&X, [&] { return Base::TraverseTypeLoc(X); });
60+
}
61+
bool TraverseTypeNestedNameSpecifierLoc(NestedNameSpecifierLoc X) {
62+
return traverseNode(
63+
&X, [&] { return Base::TraverseNestedNameSpecifierLoc(X); });
64+
}
65+
bool TraverseConstructorInitializer(CXXCtorInitializer *X) {
66+
return traverseNode(
67+
X, [&] { return Base::TraverseConstructorInitializer(X); });
68+
}
69+
// Stmt is the same, but this form allows the data recursion optimization.
70+
bool dataTraverseStmtPre(Stmt *X) {
71+
if (!X || canSafelySkipNode(X->getSourceRange()))
72+
return false;
73+
push(DynTypedNode::create(*X));
74+
return true;
75+
}
76+
bool dataTraverseStmtPost(Stmt *X) {
77+
pop();
78+
return true;
79+
}
80+
// Uninteresting parts of the AST that don't have locations within them.
81+
bool TraverseNestedNameSpecifier(NestedNameSpecifier *) { return true; }
82+
bool TraverseType(QualType) { return true; }
83+
84+
private:
85+
using Base = RecursiveASTVisitor<SelectionVisitor>;
86+
SelectionVisitor(ASTContext &AST, unsigned SelBegin, unsigned SelEnd,
87+
FileID SelFile)
88+
: SM(AST.getSourceManager()), LangOpts(AST.getLangOpts()),
89+
SelBegin(SelBegin), SelEnd(SelEnd), SelFile(SelFile),
90+
SelBeginTokenStart(SM.getFileOffset(Lexer::GetBeginningOfToken(
91+
SM.getComposedLoc(SelFile, SelBegin), SM, LangOpts))) {
92+
// Ensure we have a node for the TU decl, regardless of traversal scope.
93+
Nodes.emplace_back();
94+
Nodes.back().ASTNode = DynTypedNode::create(*AST.getTranslationUnitDecl());
95+
Nodes.back().Parent = nullptr;
96+
Nodes.back().Selected = SelectionTree::Unselected;
97+
Stack.push(&Nodes.back());
98+
}
99+
100+
// Generic case of TraverseFoo. Func should be the call to Base::TraverseFoo.
101+
// Node is always a pointer so the generic code can handle any null checks.
102+
template <typename T, typename Func>
103+
bool traverseNode(T *Node, const Func &Body) {
104+
if (Node == nullptr || canSafelySkipNode(Node->getSourceRange()))
105+
return true;
106+
push(DynTypedNode::create(*Node));
107+
bool Ret = Body();
108+
pop();
109+
return Ret;
110+
}
111+
112+
// An optimization for a common case: nodes outside macro expansions that
113+
// don't intersect the selection may be recursively skipped.
114+
bool canSafelySkipNode(SourceRange S) {
115+
auto B = SM.getDecomposedLoc(S.getBegin());
116+
auto E = SM.getDecomposedLoc(S.getEnd());
117+
if (B.first != SelFile || E.first != SelFile)
118+
return false;
119+
return B.second >= SelEnd || E.second < SelBeginTokenStart;
120+
}
121+
122+
// Pushes a node onto the ancestor stack. Pairs with pop().
123+
void push(DynTypedNode Node) {
124+
Nodes.emplace_back();
125+
Nodes.back().ASTNode = std::move(Node);
126+
Nodes.back().Parent = Stack.top();
127+
Nodes.back().Selected = SelectionTree::Unselected;
128+
Stack.push(&Nodes.back());
129+
}
130+
131+
// Pops a node off the ancestor stack, and finalizes it. Pairs with push().
132+
void pop() {
133+
Node &N = *Stack.top();
134+
N.Selected = computeSelection(N);
135+
if (N.Selected || !N.Children.empty()) {
136+
// Attach to the tree.
137+
N.Parent->Children.push_back(&N);
138+
} else {
139+
// Neither N any children are selected, it doesn't belong in the tree.
140+
assert(&N == &Nodes.back());
141+
Nodes.pop_back();
142+
}
143+
Stack.pop();
144+
}
145+
146+
// Perform hit-testing of a complete Node against the selection.
147+
// This runs for every node in the AST, and must be fast in common cases.
148+
// This is called from pop(), so we can take children into account.
149+
SelectionTree::Selection computeSelection(const Node &N) {
150+
SourceRange S = N.ASTNode.getSourceRange();
151+
if (!S.isValid())
152+
return SelectionTree::Unselected;
153+
// getTopMacroCallerLoc() allows selection of constructs in macro args. e.g:
154+
// #define LOOP_FOREVER(Body) for(;;) { Body }
155+
// void IncrementLots(int &x) {
156+
// LOOP_FOREVER( ++x; )
157+
// }
158+
// Selecting "++x" or "x" will do the right thing.
159+
auto B = SM.getDecomposedLoc(SM.getTopMacroCallerLoc(S.getBegin()));
160+
auto E = SM.getDecomposedLoc(SM.getTopMacroCallerLoc(S.getEnd()));
161+
// Otherwise, nodes in macro expansions can't be selected.
162+
if (B.first != SelFile || E.first != SelFile)
163+
return SelectionTree::Unselected;
164+
// Cheap test: is there any overlap at all between the selection and range?
165+
// Note that E.second is the *start* of the last token, which is why we
166+
// compare against the "rounded-down" SelBegin.
167+
if (B.second >= SelEnd || E.second < SelBeginTokenStart)
168+
return SelectionTree::Unselected;
169+
170+
// We hit something, need some more precise checks.
171+
// Adjust [B, E) to be a half-open character range.
172+
E.second += Lexer::MeasureTokenLength(S.getEnd(), SM, LangOpts);
173+
// This node's own selected text is (this range ^ selection) - child ranges.
174+
// If that's empty, then we've only collided with children.
175+
if (nodesCoverRange(N.Children, std::max(SelBegin, B.second),
176+
std::min(SelEnd, E.second)))
177+
return SelectionTree::Unselected; // Hit children only.
178+
// Some of our own characters are covered, this is a true hit.
179+
return (B.second >= SelBegin && E.second <= SelEnd)
180+
? SelectionTree::Complete
181+
: SelectionTree::Partial;
182+
}
183+
184+
// Is the range [Begin, End) entirely covered by the union of the Nodes?
185+
// (The range is a parent node's extent, and the covering nodes are children).
186+
bool nodesCoverRange(llvm::ArrayRef<const Node *> Nodes, unsigned Begin,
187+
unsigned End) {
188+
if (Begin >= End)
189+
return true;
190+
if (Nodes.empty())
191+
return false;
192+
193+
// Collect all the expansion ranges, as offsets.
194+
SmallVector<std::pair<unsigned, unsigned>, 8> ChildRanges;
195+
for (const Node *N : Nodes) {
196+
CharSourceRange R = SM.getExpansionRange(N->ASTNode.getSourceRange());
197+
auto B = SM.getDecomposedLoc(R.getBegin());
198+
auto E = SM.getDecomposedLoc(R.getEnd());
199+
if (B.first != SelFile || E.first != SelFile)
200+
continue;
201+
assert(R.isTokenRange());
202+
// Try to cover up to the next token, spaces between children don't count.
203+
if (auto Tok = Lexer::findNextToken(R.getEnd(), SM, LangOpts))
204+
E.second = SM.getFileOffset(Tok->getLocation());
205+
else
206+
E.second += Lexer::MeasureTokenLength(R.getEnd(), SM, LangOpts);
207+
ChildRanges.push_back({B.second, E.second});
208+
}
209+
llvm::sort(ChildRanges);
210+
211+
// Scan through the child ranges, removing as we go.
212+
for (const auto R : ChildRanges) {
213+
if (R.first > Begin)
214+
return false; // [Begin, R.first) is not covered.
215+
Begin = R.second; // Eliminate [R.first, R.second).
216+
if (Begin >= End)
217+
return true; // Remaining range is empty.
218+
}
219+
return false; // Went through all children, trailing characters remain.
220+
}
221+
222+
SourceManager &SM;
223+
const LangOptions &LangOpts;
224+
std::stack<Node *> Stack;
225+
std::deque<Node> Nodes; // Stable pointers as we add more nodes.
226+
// Half-open selection range.
227+
unsigned SelBegin;
228+
unsigned SelEnd;
229+
FileID SelFile;
230+
// If the selection start slices a token in half, the beginning of that token.
231+
// This is useful for checking whether the end of a token range overlaps
232+
// the selection: range.end < SelBeginTokenStart is equivalent to
233+
// range.end + measureToken(range.end) < SelBegin (assuming range.end points
234+
// to a token), and it saves a lex every time.
235+
unsigned SelBeginTokenStart;
236+
};
237+
238+
} // namespace
239+
240+
void SelectionTree::print(llvm::raw_ostream &OS, const SelectionTree::Node &N,
241+
int Indent) const {
242+
if (N.Selected)
243+
OS.indent(Indent - 1) << (N.Selected == SelectionTree::Complete ? '*'
244+
: '.');
245+
else
246+
OS.indent(Indent);
247+
OS << N.ASTNode.getNodeKind().asStringRef() << " ";
248+
N.ASTNode.print(OS, PrintPolicy);
249+
OS << "\n";
250+
for (const Node *Child : N.Children)
251+
print(OS, *Child, Indent + 2);
252+
}
253+
254+
// Decide which selection emulates a "point" query in between characters.
255+
static std::pair<unsigned, unsigned> pointBounds(unsigned Offset, FileID FID,
256+
ASTContext &AST) {
257+
StringRef Buf = AST.getSourceManager().getBufferData(FID);
258+
// Edge-cases where the choice is forced.
259+
if (Buf.size() == 0)
260+
return {0, 0};
261+
if (Offset == 0)
262+
return {0, 1};
263+
if (Offset == Buf.size())
264+
return {Offset - 1, Offset};
265+
// We could choose either this byte or the previous. Usually we prefer the
266+
// character on the right of the cursor (or under a block cursor).
267+
// But if that's whitespace, we likely want the token on the left.
268+
if (isWhitespace(Buf[Offset]) && !isWhitespace(Buf[Offset - 1]))
269+
return {Offset - 1, Offset};
270+
return {Offset, Offset + 1};
271+
}
272+
273+
SelectionTree::SelectionTree(ASTContext &AST, unsigned Begin, unsigned End)
274+
: PrintPolicy(AST.getLangOpts()) {
275+
// No fundamental reason the selection needs to be in the main file,
276+
// but that's all clangd has needed so far.
277+
FileID FID = AST.getSourceManager().getMainFileID();
278+
if (Begin == End)
279+
std::tie(Begin, End) = pointBounds(Begin, FID, AST);
280+
PrintPolicy.TerseOutput = true;
281+
282+
Nodes = SelectionVisitor::collect(AST, Begin, End, FID);
283+
Root = Nodes.empty() ? nullptr : &Nodes.front();
284+
}
285+
286+
SelectionTree::SelectionTree(ASTContext &AST, unsigned Offset)
287+
: SelectionTree(AST, Offset, Offset) {}
288+
289+
const Node *SelectionTree::commonAncestor() const {
290+
if (!Root)
291+
return nullptr;
292+
for (const Node *Ancestor = Root;; Ancestor = Ancestor->Children.front()) {
293+
if (Ancestor->Selected || Ancestor->Children.size() > 1)
294+
return Ancestor;
295+
// The tree only contains ancestors of the interesting nodes.
296+
assert(!Ancestor->Children.empty() && "bad node in selection tree");
297+
}
298+
}
299+
300+
} // namespace clangd
301+
} // namespace clang

0 commit comments

Comments
 (0)