Skip to content

Commit 34cc210

Browse files
committedNov 26, 2021
[clangd] IncludeCleaner: Attribute symbols from non self-contained headers to their parents
When a symbol comes from the non self-contained header, we recursively uplift the file we consider used to the first includer that has a header guard. We need to do this while we still have FileIDs because every time a non self-contained header is included, it gets a new FileID but is later deduplicated by HeaderID and it's not possible to understand where it was included from. Based on D114370. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D114623
1 parent fc0aacf commit 34cc210

File tree

3 files changed

+117
-11
lines changed

3 files changed

+117
-11
lines changed
 

Diff for: ‎clang-tools-extra/clangd/IncludeCleaner.cpp

+40-10
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "IncludeCleaner.h"
1010
#include "Config.h"
11+
#include "Headers.h"
1112
#include "ParsedAST.h"
1213
#include "Protocol.h"
1314
#include "SourceCode.h"
@@ -16,6 +17,7 @@
1617
#include "clang/AST/ExprCXX.h"
1718
#include "clang/AST/RecursiveASTVisitor.h"
1819
#include "clang/Basic/SourceLocation.h"
20+
#include "clang/Basic/SourceManager.h"
1921
#include "clang/Lex/HeaderSearch.h"
2022
#include "clang/Lex/Preprocessor.h"
2123
#include "clang/Tooling/Syntax/Tokens.h"
@@ -221,6 +223,31 @@ bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST) {
221223
return true;
222224
}
223225

226+
// In case symbols are coming from non self-contained header, we need to find
227+
// its first includer that is self-contained. This is the header users can
228+
// include, so it will be responsible for bringing the symbols from given
229+
// header into the scope.
230+
FileID headerResponsible(FileID ID, const SourceManager &SM,
231+
const IncludeStructure &Includes) {
232+
// Unroll the chain of non self-contained headers until we find the one that
233+
// can be included.
234+
for (const FileEntry *FE = SM.getFileEntryForID(ID); ID != SM.getMainFileID();
235+
FE = SM.getFileEntryForID(ID)) {
236+
// If FE is nullptr, we consider it to be the responsible header.
237+
if (!FE)
238+
break;
239+
auto HID = Includes.getID(FE);
240+
assert(HID && "We're iterating over headers already existing in "
241+
"IncludeStructure");
242+
if (Includes.isSelfContained(*HID))
243+
break;
244+
// The header is not self-contained: put the responsibility for its symbols
245+
// on its includer.
246+
ID = SM.getFileID(SM.getIncludeLoc(ID));
247+
}
248+
return ID;
249+
}
250+
224251
} // namespace
225252

226253
ReferencedLocations findReferencedLocations(ParsedAST &AST) {
@@ -234,20 +261,27 @@ ReferencedLocations findReferencedLocations(ParsedAST &AST) {
234261

235262
llvm::DenseSet<FileID>
236263
findReferencedFiles(const llvm::DenseSet<SourceLocation> &Locs,
237-
const SourceManager &SM) {
264+
const IncludeStructure &Includes, const SourceManager &SM) {
238265
std::vector<SourceLocation> Sorted{Locs.begin(), Locs.end()};
239266
llvm::sort(Sorted); // Group by FileID.
240-
ReferencedFiles Result(SM);
267+
ReferencedFiles Files(SM);
241268
for (auto It = Sorted.begin(); It < Sorted.end();) {
242269
FileID FID = SM.getFileID(*It);
243-
Result.add(FID, *It);
270+
Files.add(FID, *It);
244271
// Cheaply skip over all the other locations from the same FileID.
245272
// This avoids lots of redundant Loc->File lookups for the same file.
246273
do
247274
++It;
248275
while (It != Sorted.end() && SM.isInFileID(*It, FID));
249276
}
250-
return std::move(Result.Files);
277+
// If a header is not self-contained, we consider its symbols a logical part
278+
// of the including file. Therefore, mark the parents of all used
279+
// non-self-contained FileIDs as used. Perform this on FileIDs rather than
280+
// HeaderIDs, as each inclusion of a non-self-contained file is distinct.
281+
llvm::DenseSet<FileID> Result;
282+
for (FileID ID : Files.Files)
283+
Result.insert(headerResponsible(ID, SM, Includes));
284+
return Result;
251285
}
252286

253287
std::vector<const Inclusion *>
@@ -304,12 +338,8 @@ std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST) {
304338
const auto &SM = AST.getSourceManager();
305339

306340
auto Refs = findReferencedLocations(AST);
307-
// FIXME(kirillbobyrev): Attribute the symbols from non self-contained
308-
// headers to their parents while the information about respective
309-
// SourceLocations and FileIDs is not lost. It is not possible to do that
310-
// later when non-guarded headers included multiple times will get same
311-
// HeaderID.
312-
auto ReferencedFileIDs = findReferencedFiles(Refs, SM);
341+
auto ReferencedFileIDs = findReferencedFiles(Refs, AST.getIncludeStructure(),
342+
AST.getSourceManager());
313343
auto ReferencedHeaders =
314344
translateToHeaderIDs(ReferencedFileIDs, AST.getIncludeStructure(), SM);
315345
return getUnused(AST, ReferencedHeaders);

Diff for: ‎clang-tools-extra/clangd/IncludeCleaner.h

+1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ ReferencedLocations findReferencedLocations(ParsedAST &AST);
5252
/// The output only includes things SourceManager sees as files (not macro IDs).
5353
/// This can include <built-in>, <scratch space> etc that are not true files.
5454
llvm::DenseSet<FileID> findReferencedFiles(const ReferencedLocations &Locs,
55+
const IncludeStructure &Includes,
5556
const SourceManager &SM);
5657

5758
/// Maps FileIDs to the internal IncludeStructure representation (HeaderIDs).

Diff for: ‎clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp

+76-1
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,8 @@ TEST(IncludeCleaner, VirtualBuffers) {
281281
auto &SM = AST.getSourceManager();
282282
auto &Includes = AST.getIncludeStructure();
283283

284-
auto ReferencedFiles = findReferencedFiles(findReferencedLocations(AST), SM);
284+
auto ReferencedFiles =
285+
findReferencedFiles(findReferencedLocations(AST), Includes, SM);
285286
llvm::StringSet<> ReferencedFileNames;
286287
for (FileID FID : ReferencedFiles)
287288
ReferencedFileNames.insert(
@@ -303,6 +304,80 @@ TEST(IncludeCleaner, VirtualBuffers) {
303304
EXPECT_THAT(getUnused(AST, ReferencedHeaders), ::testing::IsEmpty());
304305
}
305306

307+
TEST(IncludeCleaner, DistinctUnguardedInclusions) {
308+
TestTU TU;
309+
TU.Code = R"cpp(
310+
#include "bar.h"
311+
#include "foo.h"
312+
313+
int LocalFoo = foo::Variable;
314+
)cpp";
315+
TU.AdditionalFiles["foo.h"] = R"cpp(
316+
#pragma once
317+
namespace foo {
318+
#include "unguarded.h"
319+
}
320+
)cpp";
321+
TU.AdditionalFiles["bar.h"] = R"cpp(
322+
#pragma once
323+
namespace bar {
324+
#include "unguarded.h"
325+
}
326+
)cpp";
327+
TU.AdditionalFiles["unguarded.h"] = R"cpp(
328+
constexpr int Variable = 42;
329+
)cpp";
330+
331+
ParsedAST AST = TU.build();
332+
333+
auto ReferencedFiles =
334+
findReferencedFiles(findReferencedLocations(AST),
335+
AST.getIncludeStructure(), AST.getSourceManager());
336+
llvm::StringSet<> ReferencedFileNames;
337+
auto &SM = AST.getSourceManager();
338+
for (FileID FID : ReferencedFiles)
339+
ReferencedFileNames.insert(
340+
SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename());
341+
// Note that we have uplifted the referenced files from non self-contained
342+
// headers to header-guarded ones.
343+
EXPECT_THAT(ReferencedFileNames.keys(),
344+
UnorderedElementsAre(testPath("foo.h")));
345+
}
346+
347+
TEST(IncludeCleaner, NonSelfContainedHeaders) {
348+
TestTU TU;
349+
TU.Code = R"cpp(
350+
#include "foo.h"
351+
352+
int LocalFoo = Variable;
353+
)cpp";
354+
TU.AdditionalFiles["foo.h"] = R"cpp(
355+
#pragma once
356+
#include "indirection.h"
357+
)cpp";
358+
TU.AdditionalFiles["indirection.h"] = R"cpp(
359+
#include "unguarded.h"
360+
)cpp";
361+
TU.AdditionalFiles["unguarded.h"] = R"cpp(
362+
constexpr int Variable = 42;
363+
)cpp";
364+
365+
ParsedAST AST = TU.build();
366+
367+
auto ReferencedFiles =
368+
findReferencedFiles(findReferencedLocations(AST),
369+
AST.getIncludeStructure(), AST.getSourceManager());
370+
llvm::StringSet<> ReferencedFileNames;
371+
auto &SM = AST.getSourceManager();
372+
for (FileID FID : ReferencedFiles)
373+
ReferencedFileNames.insert(
374+
SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename());
375+
// Note that we have uplifted the referenced files from non self-contained
376+
// headers to header-guarded ones.
377+
EXPECT_THAT(ReferencedFileNames.keys(),
378+
UnorderedElementsAre(testPath("foo.h")));
379+
}
380+
306381
} // namespace
307382
} // namespace clangd
308383
} // namespace clang

0 commit comments

Comments
 (0)
Please sign in to comment.