Skip to content

Commit e2c9836

Browse files
committed
[CursorInfo] Add Clang documentation to SymbolGraph output
This currently doesn't check for inherited docs, ie. either the imported declaration has docs or it doesn't. There's also a few odd cases with mixed doc types and when each line is prefixed with '*', but it's good enough for an initial implementation. Moves UTF8 sanitisation out of ASTPrinter.h and into Unicode.h so that it can be used here as well. Resolves rdar://91388603.
1 parent 66b08a8 commit e2c9836

File tree

9 files changed

+347
-125
lines changed

9 files changed

+347
-125
lines changed

include/swift/AST/ASTPrinter.h

-3
Original file line numberDiff line numberDiff line change
@@ -327,9 +327,6 @@ class ASTPrinter {
327327
printStructurePre(Kind, D);
328328
}
329329

330-
/// To sanitize a malformed utf8 string to a well-formed one.
331-
static std::string sanitizeUtf8(StringRef Text);
332-
333330
private:
334331
virtual void anchor();
335332
};

include/swift/Basic/Unicode.h

+3
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ unsigned extractFirstUnicodeScalar(StringRef S);
7474
/// unit (Unicode scalar).
7575
bool isWellFormedUTF8(StringRef S);
7676

77+
/// Replaces any ill-formed subsequences with `u8"\ufffd"`.
78+
std::string sanitizeUTF8(StringRef Text);
79+
7780
} // end namespace unicode
7881
} // end namespace swift
7982

include/swift/SymbolGraphGen/SymbolGraphOptions.h

+3
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ struct SymbolGraphOptions {
4545

4646
/// Whether to emit symbols with SPI information.
4747
bool IncludeSPISymbols;
48+
49+
/// Whether to include documentation for clang nodes or not.
50+
bool IncludeClangDocs;
4851
};
4952

5053
} // end namespace symbolgraphgen

lib/AST/ASTPrinter.cpp

+3-27
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include "swift/Basic/QuotedString.h"
4747
#include "swift/Basic/STLExtras.h"
4848
#include "swift/Basic/StringExtras.h"
49+
#include "swift/Basic/Unicode.h"
4950
#include "swift/ClangImporter/ClangImporterRequests.h"
5051
#include "swift/Config.h"
5152
#include "swift/Parse/Lexer.h"
@@ -302,31 +303,6 @@ bool TypeTransformContext::isPrintingSynthesizedExtension() const {
302303
return !Decl.isNull();
303304
}
304305

305-
std::string ASTPrinter::sanitizeUtf8(StringRef Text) {
306-
llvm::SmallString<256> Builder;
307-
Builder.reserve(Text.size());
308-
const llvm::UTF8* Data = reinterpret_cast<const llvm::UTF8*>(Text.begin());
309-
const llvm::UTF8* End = reinterpret_cast<const llvm::UTF8*>(Text.end());
310-
StringRef Replacement = u8"\ufffd";
311-
while (Data < End) {
312-
auto Step = llvm::getNumBytesForUTF8(*Data);
313-
if (Data + Step > End) {
314-
Builder.append(Replacement);
315-
break;
316-
}
317-
318-
if (llvm::isLegalUTF8Sequence(Data, Data + Step)) {
319-
Builder.append(Data, Data + Step);
320-
} else {
321-
322-
// If malformed, add replacement characters.
323-
Builder.append(Replacement);
324-
}
325-
Data += Step;
326-
}
327-
return std::string(Builder.str());
328-
}
329-
330306
void ASTPrinter::anchor() {}
331307

332308
void ASTPrinter::printIndent() {
@@ -633,9 +609,9 @@ class PrintAST : public ASTVisitor<PrintAST> {
633609
bool FirstLine = true;
634610
for (auto Line : Lines) {
635611
if (FirstLine)
636-
Printer << sanitizeClangDocCommentStyle(ASTPrinter::sanitizeUtf8(Line));
612+
Printer << sanitizeClangDocCommentStyle(unicode::sanitizeUTF8(Line));
637613
else
638-
Printer << ASTPrinter::sanitizeUtf8(Line);
614+
Printer << unicode::sanitizeUTF8(Line);
639615
Printer.printNewline();
640616
FirstLine = false;
641617
}

lib/Basic/Unicode.cpp

+26
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "swift/Basic/Unicode.h"
14+
#include "llvm/ADT/SmallString.h"
1415
#include "llvm/ADT/SmallVector.h"
1516
#include "llvm/Support/ConvertUTF.h"
1617

@@ -128,3 +129,28 @@ bool swift::unicode::isWellFormedUTF8(StringRef S) {
128129
const llvm::UTF8 *begin = S.bytes_begin();
129130
return llvm::isLegalUTF8String(&begin, S.bytes_end());
130131
}
132+
133+
std::string swift::unicode::sanitizeUTF8(StringRef Text) {
134+
llvm::SmallString<256> Builder;
135+
Builder.reserve(Text.size());
136+
const llvm::UTF8* Data = reinterpret_cast<const llvm::UTF8*>(Text.begin());
137+
const llvm::UTF8* End = reinterpret_cast<const llvm::UTF8*>(Text.end());
138+
StringRef Replacement = u8"\ufffd";
139+
while (Data < End) {
140+
auto Step = llvm::getNumBytesForUTF8(*Data);
141+
if (Data + Step > End) {
142+
Builder.append(Replacement);
143+
break;
144+
}
145+
146+
if (llvm::isLegalUTF8Sequence(Data, Data + Step)) {
147+
Builder.append(Data, Data + Step);
148+
} else {
149+
150+
// If malformed, add replacement characters.
151+
Builder.append(Replacement);
152+
}
153+
Data += Step;
154+
}
155+
return std::string(Builder.str());
156+
}

lib/IDE/ModuleInterfacePrinting.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "swift/AST/PrintOptions.h"
2323
#include "swift/AST/SourceFile.h"
2424
#include "swift/Basic/PrimitiveParsing.h"
25+
#include "swift/Basic/Unicode.h"
2526
#include "swift/ClangImporter/ClangImporter.h"
2627
#include "swift/ClangImporter/ClangModule.h"
2728
#include "swift/Parse/Token.h"
@@ -977,7 +978,7 @@ void ClangCommentPrinter::printDeclPost(const Decl *D,
977978
return;
978979

979980
for (auto CommentText : PendingComments) {
980-
*this << " " << ASTPrinter::sanitizeUtf8(CommentText);
981+
*this << " " << unicode::sanitizeUTF8(CommentText);
981982
}
982983
PendingComments.clear();
983984
if (auto ClangN = swift::ide::getEffectiveClangNode(D))
@@ -1068,7 +1069,7 @@ void ClangCommentPrinter::printComment(StringRef RawText, unsigned StartCol) {
10681069
trimLeadingWhitespaceFromLines(RawText, WhitespaceToTrim, Lines);
10691070

10701071
for (auto Line : Lines) {
1071-
*this << ASTPrinter::sanitizeUtf8(Line) << "\n";
1072+
*this << unicode::sanitizeUTF8(Line) << "\n";
10721073
printIndent();
10731074
}
10741075
}

lib/SymbolGraphGen/Symbol.cpp

+39
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@
1616
#include "swift/AST/ParameterList.h"
1717
#include "swift/AST/RawComment.h"
1818
#include "swift/AST/USRGeneration.h"
19+
#include "swift/Basic/PrimitiveParsing.h"
1920
#include "swift/Basic/SourceManager.h"
21+
#include "swift/Basic/Unicode.h"
22+
#include "clang/AST/ASTContext.h"
23+
#include "clang/AST/Decl.h"
2024
#include "AvailabilityMixin.h"
2125
#include "JSON.h"
2226
#include "Symbol.h"
@@ -193,6 +197,41 @@ const ValueDecl *Symbol::getDeclInheritingDocs() const {
193197
}
194198

195199
void Symbol::serializeDocComment(llvm::json::OStream &OS) const {
200+
if (ClangNode ClangN = VD->getClangNode()) {
201+
if (!Graph->Walker.Options.IncludeClangDocs)
202+
return;
203+
204+
if (auto *ClangD = ClangN.getAsDecl()) {
205+
const clang::ASTContext &ClangContext = ClangD->getASTContext();
206+
const clang::RawComment *RC =
207+
ClangContext.getRawCommentForAnyRedecl(ClangD);
208+
if (!RC || !RC->isDocumentation())
209+
return;
210+
211+
// TODO: Replace this with `getFormattedLines` when it's in and add the
212+
// line and column ranges. Also consider handling cross-language
213+
// hierarchies, ie. if there's no comment on the ObjC decl we should
214+
// look up the hierarchy (and vice versa).
215+
std::string Text = RC->getFormattedText(ClangContext.getSourceManager(),
216+
ClangContext.getDiagnostics());
217+
Text = unicode::sanitizeUTF8(Text);
218+
219+
SmallVector<StringRef, 8> Lines;
220+
splitIntoLines(Text, Lines);
221+
222+
OS.attributeObject("docComment", [&]() {
223+
OS.attributeArray("lines", [&]() {
224+
for (StringRef Line : Lines) {
225+
OS.object([&](){
226+
OS.attribute("text", Line);
227+
});
228+
}
229+
});
230+
});
231+
}
232+
return;
233+
}
234+
196235
const auto *DocCommentProvidingDecl = VD;
197236
if (!Graph->Walker.Options.SkipInheritedDocs) {
198237
DocCommentProvidingDecl = dyn_cast_or_null<ValueDecl>(

0 commit comments

Comments
 (0)