Skip to content

Commit 91e8eac

Browse files
committed
[clangd] Support extraction of binary "subexpressions" like a + [[b + c]].
Summary: These aren't formally subexpressions in C++, in this case + is left-associative. However informally +, *, etc are usually (mathematically) associative and users consider these subexpressions. We detect these and in simple cases support extracting the partial expression. As well as builtin associative operators, we assume that overloads of them are associative and support those too. Reviewers: SureYeaah Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D65139 llvm-svn: 367121
1 parent 736e8aa commit 91e8eac

File tree

5 files changed

+254
-17
lines changed

5 files changed

+254
-17
lines changed

clang-tools-extra/clangd/Selection.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -452,5 +452,12 @@ const DeclContext& SelectionTree::Node::getDeclContext() const {
452452
llvm_unreachable("A tree must always be rooted at TranslationUnitDecl.");
453453
}
454454

455+
const SelectionTree::Node &SelectionTree::Node::ignoreImplicit() const {
456+
if (Children.size() == 1 &&
457+
Children.front()->ASTNode.getSourceRange() == ASTNode.getSourceRange())
458+
return Children.front()->ignoreImplicit();
459+
return *this;
460+
}
461+
455462
} // namespace clangd
456463
} // namespace clang

clang-tools-extra/clangd/Selection.h

+3
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,9 @@ class SelectionTree {
103103
const DeclContext& getDeclContext() const;
104104
// Printable node kind, like "CXXRecordDecl" or "AutoTypeLoc".
105105
std::string kind() const;
106+
// If this node is a wrapper with no syntax (e.g. implicit cast), return
107+
// its contents. (If multiple wrappers are present, unwraps all of them).
108+
const Node& ignoreImplicit() const;
106109
};
107110
// The most specific common ancestor of all the selected nodes.
108111
// Returns nullptr if the common ancestor is the root.

clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp

+157-16
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "llvm/ADT/StringRef.h"
2828
#include "llvm/Support/Casting.h"
2929
#include "llvm/Support/Error.h"
30+
#include "llvm/Support/raw_ostream.h"
3031

3132
namespace clang {
3233
namespace clangd {
@@ -39,10 +40,14 @@ class ExtractionContext {
3940
const clang::Expr *getExpr() const { return Expr; }
4041
const SelectionTree::Node *getExprNode() const { return ExprNode; }
4142
bool isExtractable() const { return Extractable; }
43+
// The half-open range for the expression to be extracted.
44+
SourceRange getExtractionChars() const;
4245
// Generate Replacement for replacing selected expression with given VarName
43-
tooling::Replacement replaceWithVar(llvm::StringRef VarName) const;
46+
tooling::Replacement replaceWithVar(SourceRange Chars,
47+
llvm::StringRef VarName) const;
4448
// Generate Replacement for declaring the selected Expr as a new variable
45-
tooling::Replacement insertDeclaration(llvm::StringRef VarName) const;
49+
tooling::Replacement insertDeclaration(llvm::StringRef VarName,
50+
SourceRange InitChars) const;
4651

4752
private:
4853
bool Extractable = false;
@@ -152,23 +157,20 @@ const clang::Stmt *ExtractionContext::computeInsertionPoint() const {
152157
}
153158
return nullptr;
154159
}
160+
155161
// returns the replacement for substituting the extraction with VarName
156162
tooling::Replacement
157-
ExtractionContext::replaceWithVar(llvm::StringRef VarName) const {
158-
const llvm::Optional<SourceRange> ExtractionRng =
159-
toHalfOpenFileRange(SM, Ctx.getLangOpts(), getExpr()->getSourceRange());
160-
unsigned ExtractionLength = SM.getFileOffset(ExtractionRng->getEnd()) -
161-
SM.getFileOffset(ExtractionRng->getBegin());
162-
return tooling::Replacement(SM, ExtractionRng->getBegin(), ExtractionLength,
163-
VarName);
163+
ExtractionContext::replaceWithVar(SourceRange Chars,
164+
llvm::StringRef VarName) const {
165+
unsigned ExtractionLength =
166+
SM.getFileOffset(Chars.getEnd()) - SM.getFileOffset(Chars.getBegin());
167+
return tooling::Replacement(SM, Chars.getBegin(), ExtractionLength, VarName);
164168
}
165169
// returns the Replacement for declaring a new variable storing the extraction
166170
tooling::Replacement
167-
ExtractionContext::insertDeclaration(llvm::StringRef VarName) const {
168-
const llvm::Optional<SourceRange> ExtractionRng =
169-
toHalfOpenFileRange(SM, Ctx.getLangOpts(), getExpr()->getSourceRange());
170-
assert(ExtractionRng && "ExtractionRng should not be null");
171-
llvm::StringRef ExtractionCode = toSourceCode(SM, *ExtractionRng);
171+
ExtractionContext::insertDeclaration(llvm::StringRef VarName,
172+
SourceRange InitializerChars) const {
173+
llvm::StringRef ExtractionCode = toSourceCode(SM, InitializerChars);
172174
const SourceLocation InsertionLoc =
173175
toHalfOpenFileRange(SM, Ctx.getLangOpts(),
174176
InsertionPoint->getSourceRange())
@@ -179,6 +181,144 @@ ExtractionContext::insertDeclaration(llvm::StringRef VarName) const {
179181
return tooling::Replacement(SM, InsertionLoc, 0, ExtractedVarDecl);
180182
}
181183

184+
// Helpers for handling "binary subexpressions" like a + [[b + c]] + d.
185+
//
186+
// These are special, because the formal AST doesn't match what users expect:
187+
// - the AST is ((a + b) + c) + d, so the ancestor expression is `a + b + c`.
188+
// - but extracting `b + c` is reasonable, as + is (mathematically) associative.
189+
//
190+
// So we try to support these cases with some restrictions:
191+
// - the operator must be associative
192+
// - no mixing of operators is allowed
193+
// - we don't look inside macro expansions in the subexpressions
194+
// - we only adjust the extracted range, so references in the unselected parts
195+
// of the AST expression (e.g. `a`) are still considered referenced for
196+
// the purposes of calculating the insertion point.
197+
// FIXME: it would be nice to exclude these references, by micromanaging
198+
// the computeReferencedDecls() calls around the binary operator tree.
199+
200+
// Information extracted about a binary operator encounted in a SelectionTree.
201+
// It can represent either an overloaded or built-in operator.
202+
struct ParsedBinaryOperator {
203+
BinaryOperatorKind Kind;
204+
SourceLocation ExprLoc;
205+
llvm::SmallVector<const SelectionTree::Node*, 8> SelectedOperands;
206+
207+
// If N is a binary operator, populate this and return true.
208+
bool parse(const SelectionTree::Node &N) {
209+
SelectedOperands.clear();
210+
211+
if (const BinaryOperator *Op =
212+
llvm::dyn_cast_or_null<BinaryOperator>(N.ASTNode.get<Expr>())) {
213+
Kind = Op->getOpcode();
214+
ExprLoc = Op->getExprLoc();
215+
SelectedOperands = N.Children;
216+
return true;
217+
}
218+
if (const CXXOperatorCallExpr *Op =
219+
llvm::dyn_cast_or_null<CXXOperatorCallExpr>(
220+
N.ASTNode.get<Expr>())) {
221+
if (!Op->isInfixBinaryOp())
222+
return false;
223+
224+
Kind = BinaryOperator::getOverloadedOpcode(Op->getOperator());
225+
ExprLoc = Op->getExprLoc();
226+
// Not all children are args, there's also the callee (operator).
227+
for (const auto* Child : N.Children) {
228+
const Expr *E = Child->ASTNode.get<Expr>();
229+
assert(E && "callee and args should be Exprs!");
230+
if (E == Op->getArg(0) || E == Op->getArg(1))
231+
SelectedOperands.push_back(Child);
232+
}
233+
return true;
234+
}
235+
return false;
236+
}
237+
238+
bool associative() const {
239+
// Must also be left-associative, or update getBinaryOperatorRange()!
240+
switch (Kind) {
241+
case BO_Add:
242+
case BO_Mul:
243+
case BO_And:
244+
case BO_Or:
245+
case BO_Xor:
246+
case BO_LAnd:
247+
case BO_LOr:
248+
return true;
249+
default:
250+
return false;
251+
}
252+
}
253+
254+
bool crossesMacroBoundary(const SourceManager &SM) {
255+
FileID F = SM.getFileID(ExprLoc);
256+
for (const SelectionTree::Node *Child : SelectedOperands)
257+
if (SM.getFileID(Child->ASTNode.get<Expr>()->getExprLoc()) != F)
258+
return true;
259+
return false;
260+
}
261+
};
262+
263+
// If have an associative operator at the top level, then we must find
264+
// the start point (rightmost in LHS) and end point (leftmost in RHS).
265+
// We can only descend into subtrees where the operator matches.
266+
//
267+
// e.g. for a + [[b + c]] + d
268+
// +
269+
// / \
270+
// N-> + d
271+
// / \
272+
// + c <- End
273+
// / \
274+
// a b <- Start
275+
const SourceRange getBinaryOperatorRange(const SelectionTree::Node &N,
276+
const SourceManager &SM,
277+
const LangOptions &LangOpts) {
278+
// If N is not a suitable binary operator, bail out.
279+
ParsedBinaryOperator Op;
280+
if (!Op.parse(N.ignoreImplicit()) || !Op.associative() ||
281+
Op.crossesMacroBoundary(SM) || Op.SelectedOperands.size() != 2)
282+
return SourceRange();
283+
BinaryOperatorKind OuterOp = Op.Kind;
284+
285+
// Because the tree we're interested in contains only one operator type, and
286+
// all eligible operators are left-associative, the shape of the tree is
287+
// very restricted: it's a linked list along the left edges.
288+
// This simplifies our implementation.
289+
const SelectionTree::Node *Start = Op.SelectedOperands.front(); // LHS
290+
const SelectionTree::Node *End = Op.SelectedOperands.back(); // RHS
291+
// End is already correct: it can't be an OuterOp (as it's left-associative).
292+
// Start needs to be pushed down int the subtree to the right spot.
293+
while (Op.parse(Start->ignoreImplicit()) && Op.Kind == OuterOp &&
294+
!Op.crossesMacroBoundary(SM)) {
295+
assert(!Op.SelectedOperands.empty() && "got only operator on one side!");
296+
if (Op.SelectedOperands.size() == 1) { // Only Op.RHS selected
297+
Start = Op.SelectedOperands.back();
298+
break;
299+
}
300+
// Op.LHS is (at least partially) selected, so descend into it.
301+
Start = Op.SelectedOperands.front();
302+
}
303+
304+
return SourceRange(
305+
toHalfOpenFileRange(SM, LangOpts, Start->ASTNode.getSourceRange())
306+
->getBegin(),
307+
toHalfOpenFileRange(SM, LangOpts, End->ASTNode.getSourceRange())
308+
->getEnd());
309+
}
310+
311+
SourceRange ExtractionContext::getExtractionChars() const {
312+
// Special case: we're extracting an associative binary subexpression.
313+
SourceRange BinaryOperatorRange =
314+
getBinaryOperatorRange(*ExprNode, SM, Ctx.getLangOpts());
315+
if (BinaryOperatorRange.isValid())
316+
return BinaryOperatorRange;
317+
318+
// Usual case: we're extracting the whole expression.
319+
return *toHalfOpenFileRange(SM, Ctx.getLangOpts(), Expr->getSourceRange());
320+
}
321+
182322
/// Extracts an expression to the variable dummy
183323
/// Before:
184324
/// int x = 5 + 4 * 3;
@@ -218,11 +358,12 @@ Expected<Tweak::Effect> ExtractVariable::apply(const Selection &Inputs) {
218358
tooling::Replacements Result;
219359
// FIXME: get variable name from user or suggest based on type
220360
std::string VarName = "dummy";
361+
SourceRange Range = Target->getExtractionChars();
221362
// insert new variable declaration
222-
if (auto Err = Result.add(Target->insertDeclaration(VarName)))
363+
if (auto Err = Result.add(Target->insertDeclaration(VarName, Range)))
223364
return std::move(Err);
224365
// replace expression with variable name
225-
if (auto Err = Result.add(Target->replaceWithVar(VarName)))
366+
if (auto Err = Result.add(Target->replaceWithVar(Range, VarName)))
226367
return std::move(Err);
227368
return Effect::applyEdit(Result);
228369
}

clang-tools-extra/clangd/unittests/SelectionTests.cpp

+17
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,23 @@ TEST(SelectionTest, Selected) {
343343
}
344344
}
345345

346+
TEST(SelectionTest, Implicit) {
347+
const char* Test = R"cpp(
348+
struct S { S(const char*); };
349+
int f(S);
350+
int x = f("^");
351+
)cpp";
352+
auto AST = TestTU::withCode(Annotations(Test).code()).build();
353+
auto T = makeSelectionTree(Test, AST);
354+
355+
const SelectionTree::Node *Str = T.commonAncestor();
356+
EXPECT_EQ("StringLiteral", nodeKind(Str)) << "Implicit selected?";
357+
EXPECT_EQ("ImplicitCastExpr", nodeKind(Str->Parent));
358+
EXPECT_EQ("CXXConstructExpr", nodeKind(Str->Parent->Parent));
359+
EXPECT_EQ(Str, &Str->Parent->Parent->ignoreImplicit())
360+
<< "Didn't unwrap " << nodeKind(&Str->Parent->Parent->ignoreImplicit());
361+
}
362+
346363
} // namespace
347364
} // namespace clangd
348365
} // namespace clang

clang-tools-extra/clangd/unittests/TweakTests.cpp

+70-1
Original file line numberDiff line numberDiff line change
@@ -476,10 +476,79 @@ TEST(TweakTest, ExtractVariable) {
476476
R"cpp(int f() {
477477
auto dummy = f(); return dummy;
478478
})cpp"},
479-
480479
// FIXME: Wrong result for \[\[clang::uninitialized\]\] int b = [[1]];
481480
// since the attr is inside the DeclStmt and the bounds of
482481
// DeclStmt don't cover the attribute.
482+
483+
// Binary subexpressions
484+
{R"cpp(void f() {
485+
int x = 1 + [[2 + 3 + 4]] + 5;
486+
})cpp",
487+
R"cpp(void f() {
488+
auto dummy = 2 + 3 + 4; int x = 1 + dummy + 5;
489+
})cpp"},
490+
{R"cpp(void f() {
491+
int x = [[1 + 2 + 3]] + 4 + 5;
492+
})cpp",
493+
R"cpp(void f() {
494+
auto dummy = 1 + 2 + 3; int x = dummy + 4 + 5;
495+
})cpp"},
496+
{R"cpp(void f() {
497+
int x = 1 + 2 + [[3 + 4 + 5]];
498+
})cpp",
499+
R"cpp(void f() {
500+
auto dummy = 3 + 4 + 5; int x = 1 + 2 + dummy;
501+
})cpp"},
502+
// Non-associative operations have no special support
503+
{R"cpp(void f() {
504+
int x = 1 - [[2 - 3 - 4]] - 5;
505+
})cpp",
506+
R"cpp(void f() {
507+
auto dummy = 1 - 2 - 3 - 4; int x = dummy - 5;
508+
})cpp"},
509+
// A mix of associative operators isn't associative.
510+
{R"cpp(void f() {
511+
int x = 0 + 1 * [[2 + 3]] * 4 + 5;
512+
})cpp",
513+
R"cpp(void f() {
514+
auto dummy = 1 * 2 + 3 * 4; int x = 0 + dummy + 5;
515+
})cpp"},
516+
// Overloaded operators are supported, we assume associativity
517+
// as if they were built-in.
518+
{R"cpp(struct S {
519+
S(int);
520+
};
521+
S operator+(S, S);
522+
523+
void f() {
524+
S x = S(1) + [[S(2) + S(3) + S(4)]] + S(5);
525+
})cpp",
526+
R"cpp(struct S {
527+
S(int);
528+
};
529+
S operator+(S, S);
530+
531+
void f() {
532+
auto dummy = S(2) + S(3) + S(4); S x = S(1) + dummy + S(5);
533+
})cpp"},
534+
// Don't try to analyze across macro boundaries
535+
// FIXME: it'd be nice to do this someday (in a safe way)
536+
{R"cpp(#define ECHO(X) X
537+
void f() {
538+
int x = 1 + [[ECHO(2 + 3) + 4]] + 5;
539+
})cpp",
540+
R"cpp(#define ECHO(X) X
541+
void f() {
542+
auto dummy = 1 + ECHO(2 + 3) + 4; int x = dummy + 5;
543+
})cpp"},
544+
{R"cpp(#define ECHO(X) X
545+
void f() {
546+
int x = 1 + [[ECHO(2) + ECHO(3) + 4]] + 5;
547+
})cpp",
548+
R"cpp(#define ECHO(X) X
549+
void f() {
550+
auto dummy = 1 + ECHO(2) + ECHO(3) + 4; int x = dummy + 5;
551+
})cpp"},
483552
};
484553
for (const auto &IO : InputOutputs) {
485554
checkTransform(ID, IO.first, IO.second);

0 commit comments

Comments
 (0)