[clangd] Rethink how SelectionTree deals with macros and #includes.

sam-mccall · sam-mccall · commit 19daa21f841a · 2019-11-29T15:21:13.000+01:00
Summary: The exclusive-claim model is successful at resolving conflicts over tokens between parent/child or siblings. However claims at the spelled-token level do the wrong thing for macro expansions, where siblings can be equally associated with the macro invocation. Moreover, any model that only uses the endpoints in a range can fail when a macro invocation occurs inside the node. To address this, we use the existing TokenBuffer in more depth. Claims are expressed in terms of expanded tokens, so there is no need to worry about macros, includes etc. Once we know which expanded tokens were claimed, they are mapped onto spelled tokens for hit-testing. This mapping is fairly flexible, currently the handling of macros is pretty simple (map macro args onto spellings, other macro expansions onto the macro name token). This mapping is in principle token-by-token for correctness (though there's some batching for performance). The aggregation of the selection enum is now more principled as we need to be able to aggregate several hit-test results together. For simplicity i removed the ability to determine selectedness of TUDecl. (That was originally implemented in 90a5bf9, but doesn't seem to be very important or worth the complexity any longer). The expandedTokens(SourceLocation) helper could be added locally, but seems to make sense on TokenBuffer. Fixes clangd/clangd#202 Fixes clangd/clangd#126 Reviewers: hokein Subscribers: MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits, ilya-biryukov Tags: #clang Differential Revision: https://reviews.llvm.org/D70512
diff --git a/clang-tools-extra/clangd/Selection.cpp b/clang-tools-extra/clangd/Selection.cpp
diff --git a/clang-tools-extra/clangd/Selection.h b/clang-tools-extra/clangd/Selection.h
@@ -76,7 +76,7 @@ class SelectionTree {
                 unsigned Start, unsigned End);
 
   // Describes to what extent an AST node is covered by the selection.
-  enum Selection {
+  enum Selection : unsigned char {
     // The AST node owns no characters covered by the selection.
     // Note that characters owned by children don't count:
     //   if (x == 0) scream();
diff --git a/clang-tools-extra/clangd/unittests/SelectionTests.cpp b/clang-tools-extra/clangd/unittests/SelectionTests.cpp
@@ -134,6 +134,15 @@ TEST(SelectionTest, CommonAncestor) {
           )cpp",
           "IfStmt",
       },
+      {
+          R"cpp(
+            int x(int);
+            #define M(foo) x(foo)
+            int a = 42;
+            int b = M([[^a]]);
+          )cpp",
+          "DeclRefExpr",
+      },
       {
           R"cpp(
             void foo();
@@ -378,6 +387,7 @@ TEST(SelectionTest, Selected) {
             $C[[return]];
           }]] else [[{^
           }]]]]
+          char z;
         }
       )cpp",
       R"cpp(
@@ -386,10 +396,10 @@ TEST(SelectionTest, Selected) {
           void foo(^$C[[unique_ptr<$C[[unique_ptr<$C[[int]]>]]>]]^ a) {}
       )cpp",
       R"cpp(int a = [[5 >^> 1]];)cpp",
-      R"cpp([[
+      R"cpp(
         #define ECHO(X) X
-        ECHO(EC^HO([[$C[[int]]) EC^HO(a]]));
-      ]])cpp",
+        ECHO(EC^HO($C[[int]]) EC^HO(a));
+      )cpp",
       R"cpp( $C[[^$C[[int]] a^]]; )cpp",
       R"cpp( $C[[^$C[[int]] a = $C[[5]]^]]; )cpp",
   };
@@ -428,6 +438,56 @@ TEST(SelectionTest, PathologicalPreprocessor) {
   EXPECT_EQ("WhileStmt", T.commonAncestor()->Parent->kind());
 }
 
+TEST(SelectionTest, IncludedFile) {
+  const char *Case = R"cpp(
+    void test() {
+#include "Exp^and.inc"
+        break;
+    }
+  )cpp";
+  Annotations Test(Case);
+  auto TU = TestTU::withCode(Test.code());
+  TU.AdditionalFiles["Expand.inc"] = "while(1)\n";
+  auto AST = TU.build();
+  auto T = makeSelectionTree(Case, AST);
+
+  EXPECT_EQ("WhileStmt", T.commonAncestor()->kind());
+}
+
+TEST(SelectionTest, MacroArgExpansion) {
+  // If a macro arg is expanded several times, we consider them all selected.
+  const char *Case = R"cpp(
+    int mul(int, int);
+    #define SQUARE(X) mul(X, X);
+    int nine = SQUARE(^3);
+  )cpp";
+  Annotations Test(Case);
+  auto AST = TestTU::withCode(Test.code()).build();
+  auto T = makeSelectionTree(Case, AST);
+  // Unfortunately, this makes the common ancestor the CallExpr...
+  // FIXME: hack around this by picking one?
+  EXPECT_EQ("CallExpr", T.commonAncestor()->kind());
+  EXPECT_FALSE(T.commonAncestor()->Selected);
+  EXPECT_EQ(2u, T.commonAncestor()->Children.size());
+  for (const auto* N : T.commonAncestor()->Children) {
+    EXPECT_EQ("IntegerLiteral", N->kind());
+    EXPECT_TRUE(N->Selected);
+  }
+
+  // Verify that the common assert() macro doesn't suffer from this.
+  // (This is because we don't associate the stringified token with the arg).
+  Case = R"cpp(
+    void die(const char*);
+    #define assert(x) (x ? (void)0 : die(#x)
+    void foo() { assert(^42); }
+  )cpp";
+  Test = Annotations(Case);
+  AST = TestTU::withCode(Test.code()).build();
+  T = makeSelectionTree(Case, AST);
+
+  EXPECT_EQ("IntegerLiteral", T.commonAncestor()->kind());
+}
+
 TEST(SelectionTest, Implicit) {
   const char* Test = R"cpp(
     struct S { S(const char*); };
diff --git a/clang-tools-extra/clangd/unittests/TweakTests.cpp b/clang-tools-extra/clangd/unittests/TweakTests.cpp
@@ -269,7 +269,7 @@ TEST_F(ExtractVariableTest, Test) {
   EXPECT_UNAVAILABLE(UnavailableCases);
 
   // vector of pairs of input and output strings
-  const std::vector<std::pair<llvm::StringLiteral, llvm::StringLiteral>>
+  const std::vector<std::pair<std::string, std::string>>
       InputOutputs = {
           // extraction from variable declaration/assignment
           {R"cpp(void varDecl() {
@@ -321,17 +321,10 @@ TEST_F(ExtractVariableTest, Test) {
                    if(1)
                     LOOP(5 + [[3]])
                  })cpp",
-           /*FIXME: It should be extracted like this. SelectionTree needs to be
-             * fixed for macros.
             R"cpp(#define LOOP(x) while (1) {a = x;}
-                void f(int a) {
-                  auto dummy = 3; if(1)
-                   LOOP(5 + dummy)
-                })cpp"},*/
-           R"cpp(#define LOOP(x) while (1) {a = x;}
                  void f(int a) {
-                   auto dummy = LOOP(5 + 3); if(1)
-                    dummy
+                   auto dummy = 3; if(1)
+                    LOOP(5 + dummy)
                  })cpp"},
           {R"cpp(#define LOOP(x) do {x;} while(1);
                  void f(int a) {
@@ -644,13 +637,18 @@ void f(const int c) {
   )cpp";
   EXPECT_EQ(apply(TemplateFailInput), "unavailable");
 
-  // FIXME: This should be extractable after selectionTree works correctly for
-  // macros (currently it doesn't select anything for the following case)
-  std::string MacroFailInput = R"cpp(
+  std::string MacroInput = R"cpp(
     #define F(BODY) void f() { BODY }
     F ([[int x = 0;]])
   )cpp";
-  EXPECT_EQ(apply(MacroFailInput), "unavailable");
+  std::string MacroOutput = R"cpp(
+    #define F(BODY) void f() { BODY }
+    void extracted() {
+int x = 0;
+}
+F (extracted();)
+  )cpp";
+  EXPECT_EQ(apply(MacroInput), MacroOutput);
 
   // Shouldn't crash.
   EXPECT_EQ(apply("void f([[int a]]);"), "unavailable");
diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h
@@ -175,13 +175,18 @@ class TokenBuffer {
   /// All tokens produced by the preprocessor after all macro replacements,
   /// directives, etc. Source locations found in the clang AST will always
   /// point to one of these tokens.
+  /// Tokens are in TU order (per SourceManager::isBeforeInTranslationUnit()).
   /// FIXME: figure out how to handle token splitting, e.g. '>>' can be split
   ///        into two '>' tokens by the parser. However, TokenBuffer currently
   ///        keeps it as a single '>>' token.
   llvm::ArrayRef<syntax::Token> expandedTokens() const {
     return ExpandedTokens;
   }
 
+  /// Returns the subrange of expandedTokens() corresponding to the closed
+  /// token range R.
+  llvm::ArrayRef<syntax::Token> expandedTokens(SourceRange R) const;
+
   /// Find the subrange of spelled tokens that produced the corresponding \p
   /// Expanded tokens.
   ///
diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp
@@ -119,6 +119,22 @@ llvm::StringRef FileRange::text(const SourceManager &SM) const {
   return Text.substr(Begin, length());
 }
 
+llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
+  if (R.isInvalid())
+    return {};
+  const Token *Begin =
+      llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) {
+        return SourceMgr->isBeforeInTranslationUnit(T.location(), R.getBegin());
+      });
+  const Token *End =
+      llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) {
+        return !SourceMgr->isBeforeInTranslationUnit(R.getEnd(), T.location());
+      });
+  if (Begin > End)
+    return {};
+  return {Begin, End};
+}
+
 std::pair<const syntax::Token *, const TokenBuffer::Mapping *>
 TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
   assert(Expanded);
diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -40,6 +40,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Testing/Support/Annotations.h"
 #include "llvm/Testing/Support/SupportHelpers.h"
+#include "gmock/gmock.h"
 #include <cassert>
 #include <cstdlib>
 #include <gmock/gmock.h>
@@ -663,6 +664,20 @@ TEST_F(TokenBufferTest, SpelledByExpanded) {
               ValueIs(SameRange(findSpelled("not_mapped"))));
 }
 
+TEST_F(TokenBufferTest, ExpandedTokensForRange) {
+  recordTokens(R"cpp(
+    #define SIGN(X) X##_washere
+    A SIGN(B) C SIGN(D) E SIGN(F) G
+  )cpp");
+
+  SourceRange R(findExpanded("C").front().location(),
+                findExpanded("F_washere").front().location());
+  // Sanity check: expanded and spelled tokens are stored separately.
+  EXPECT_THAT(Buffer.expandedTokens(R),
+              SameRange(findExpanded("C D_washere E F_washere")));
+  EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty());
+}
+
 TEST_F(TokenBufferTest, ExpansionStartingAt) {
   // Object-like macro expansions.
   recordTokens(R"cpp(