-
Notifications
You must be signed in to change notification settings - Fork 13.3k
/
Copy pathClangCommentHTMLNamedCharacterReferenceEmitter.cpp
77 lines (67 loc) · 2.65 KB
/
ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
//===-- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This tablegen backend emits an efficient function to translate HTML named
// character references to UTF-8 sequences.
//
//===----------------------------------------------------------------------===//
#include "TableGenBackends.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/StringMatcher.h"
#include "llvm/TableGen/TableGenBackend.h"
#include <vector>
using namespace llvm;
/// Convert a code point to the corresponding UTF-8 sequence represented
/// as a C string literal.
///
/// \returns true on success.
static bool translateCodePointToUTF8(unsigned CodePoint,
SmallVectorImpl<char> &CLiteral) {
char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
char *TranslatedPtr = Translated;
if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
return false;
StringRef UTF8(Translated, TranslatedPtr - Translated);
raw_svector_ostream OS(CLiteral);
OS << "\"";
for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
OS << "\\x";
OS.write_hex(static_cast<unsigned char>(UTF8[i]));
}
OS << "\"";
return true;
}
void clang::EmitClangCommentHTMLNamedCharacterReferences(
const RecordKeeper &Records, raw_ostream &OS) {
std::vector<StringMatcher::StringPair> NameToUTF8;
SmallString<32> CLiteral;
for (const Record *Tag : Records.getAllDerivedDefinitions("NCR")) {
std::string Spelling = Tag->getValueAsString("Spelling").str();
uint64_t CodePoint = Tag->getValueAsInt("CodePoint");
CLiteral.clear();
CLiteral.append("return ");
if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
SrcMgr.PrintMessage(Tag->getLoc().front(), SourceMgr::DK_Error,
Twine("invalid code point"));
continue;
}
CLiteral.append(";");
StringMatcher::StringPair Match(Spelling, std::string(CLiteral));
NameToUTF8.push_back(Match);
}
emitSourceFileHeader("HTML named character reference to UTF-8 translation",
OS, Records);
OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
" StringRef Name) {\n";
StringMatcher("Name", NameToUTF8, OS).Emit();
OS << " return StringRef();\n"
<< "}\n\n";
}