Skip to content
This repository was archived by the owner on Nov 1, 2021. It is now read-only.

Commit 65ad22d

Browse files
committed
[YAML] Add support for non-printable characters
LLVM IR function names which disable mangling start with '\01' (https://www.llvm.org/docs/LangRef.html#identifiers). When an identifier like "\01@abc@" gets dumped to MIR, it is quoted, but only with single quotes. http://www.yaml.org/spec/1.2/spec.html#id2770814: "The allowed character range explicitly excludes the C0 control block allowed), the surrogate block #xD800-#xDFFF, #xFFFE, and #xFFFF." http://www.yaml.org/spec/1.2/spec.html#id2776092: "All non-printable characters must be escaped. [...] Note that escape sequences are only interpreted in double-quoted scalars." This patch adds support for printing escaped non-printable characters between double quotes if needed. Should also fix PR31743. Differential Revision: https://reviews.llvm.org/D41290 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@320996 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent d6be214 commit 65ad22d

17 files changed

+245
-76
lines changed

docs/YamlIO.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ looks like:
466466
return StringRef();
467467
}
468468
// Determine if this scalar needs quotes.
469-
static bool mustQuote(StringRef) { return true; }
469+
static QuotingType mustQuote(StringRef) { return QuotingType::Single; }
470470
};
471471

472472
Block Scalars

include/llvm/CodeGen/MIRYamlMapping.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ template <> struct ScalarTraits<StringValue> {
5656
return "";
5757
}
5858

59-
static bool mustQuote(StringRef Scalar) { return needsQuotes(Scalar); }
59+
static QuotingType mustQuote(StringRef S) { return needsQuotes(S); }
6060
};
6161

6262
struct FlowStringValue : StringValue {
@@ -73,7 +73,7 @@ template <> struct ScalarTraits<FlowStringValue> {
7373
return ScalarTraits<StringValue>::input(Scalar, Ctx, S);
7474
}
7575

76-
static bool mustQuote(StringRef Scalar) { return needsQuotes(Scalar); }
76+
static QuotingType mustQuote(StringRef S) { return needsQuotes(S); }
7777
};
7878

7979
struct BlockStringValue {
@@ -120,7 +120,7 @@ template <> struct ScalarTraits<UnsignedValue> {
120120
return ScalarTraits<unsigned>::input(Scalar, Ctx, Value.Value);
121121
}
122122

123-
static bool mustQuote(StringRef Scalar) {
123+
static QuotingType mustQuote(StringRef Scalar) {
124124
return ScalarTraits<unsigned>::mustQuote(Scalar);
125125
}
126126
};

include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ ArrayRef<uint8_t> toDebugH(const DebugHSection &DebugH,
5656
} // end namespace llvm
5757

5858
LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::DebugHSection)
59-
LLVM_YAML_DECLARE_SCALAR_TRAITS(CodeViewYAML::GlobalHash, false)
59+
LLVM_YAML_DECLARE_SCALAR_TRAITS(CodeViewYAML::GlobalHash, QuotingType::None)
6060
LLVM_YAML_IS_SEQUENCE_VECTOR(CodeViewYAML::GlobalHash)
6161

6262
#endif // LLVM_OBJECTYAML_CODEVIEWYAMLTYPES_H

include/llvm/ObjectYAML/CodeViewYAMLTypes.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ ArrayRef<uint8_t> toDebugT(ArrayRef<LeafRecord>, BumpPtrAllocator &Alloc);
5858

5959
} // end namespace llvm
6060

61-
LLVM_YAML_DECLARE_SCALAR_TRAITS(codeview::GUID, true)
61+
LLVM_YAML_DECLARE_SCALAR_TRAITS(codeview::GUID, QuotingType::Single)
6262

6363
LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::LeafRecord)
6464
LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::MemberRecord)

include/llvm/ObjectYAML/MachOYAML.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ using char_16 = char[16];
261261
template <> struct ScalarTraits<char_16> {
262262
static void output(const char_16 &Val, void *, raw_ostream &Out);
263263
static StringRef input(StringRef Scalar, void *, char_16 &Val);
264-
static bool mustQuote(StringRef S);
264+
static QuotingType mustQuote(StringRef S);
265265
};
266266

267267
// This trait is used for UUIDs. It reads and writes them matching otool's
@@ -271,7 +271,7 @@ using uuid_t = raw_ostream::uuid_t;
271271
template <> struct ScalarTraits<uuid_t> {
272272
static void output(const uuid_t &Val, void *, raw_ostream &Out);
273273
static StringRef input(StringRef Scalar, void *, uuid_t &Val);
274-
static bool mustQuote(StringRef S);
274+
static QuotingType mustQuote(StringRef S);
275275
};
276276

277277
// Load Command struct mapping traits

include/llvm/ObjectYAML/YAML.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ inline bool operator==(const BinaryRef &LHS, const BinaryRef &RHS) {
107107
template <> struct ScalarTraits<BinaryRef> {
108108
static void output(const BinaryRef &, void *, raw_ostream &);
109109
static StringRef input(StringRef, void *, BinaryRef &);
110-
static bool mustQuote(StringRef S) { return needsQuotes(S); }
110+
static QuotingType mustQuote(StringRef S) { return needsQuotes(S); }
111111
};
112112

113113
} // end namespace yaml

include/llvm/Support/YAMLTraits.h

+84-40
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "llvm/ADT/Optional.h"
1414
#include "llvm/ADT/SmallVector.h"
15+
#include "llvm/ADT/StringExtras.h"
1516
#include "llvm/ADT/StringMap.h"
1617
#include "llvm/ADT/StringRef.h"
1718
#include "llvm/ADT/Twine.h"
@@ -117,6 +118,11 @@ struct ScalarBitSetTraits {
117118
// static void bitset(IO &io, T &value);
118119
};
119120

121+
/// Describe which type of quotes should be used when quoting is necessary.
122+
/// Some non-printable characters need to be double-quoted, while some others
123+
/// are fine with simple-quoting, and some don't need any quoting.
124+
enum class QuotingType { None, Single, Double };
125+
120126
/// This class should be specialized by type that requires custom conversion
121127
/// to/from a yaml scalar. For example:
122128
///
@@ -131,7 +137,7 @@ struct ScalarBitSetTraits {
131137
/// // return empty string on success, or error string
132138
/// return StringRef();
133139
/// }
134-
/// static bool mustQuote(StringRef) { return true; }
140+
/// static QuotingType mustQuote(StringRef) { return QuotingType::Single; }
135141
/// };
136142
template<typename T>
137143
struct ScalarTraits {
@@ -145,7 +151,7 @@ struct ScalarTraits {
145151
//static StringRef input(StringRef scalar, void *ctxt, T &value);
146152
//
147153
// Function to determine if the value should be quoted.
148-
//static bool mustQuote(StringRef);
154+
//static QuotingType mustQuote(StringRef);
149155
};
150156

151157
/// This class should be specialized by type that requires custom conversion
@@ -270,7 +276,7 @@ struct has_ScalarTraits
270276
{
271277
using Signature_input = StringRef (*)(StringRef, void*, T&);
272278
using Signature_output = void (*)(const T&, void*, raw_ostream&);
273-
using Signature_mustQuote = bool (*)(StringRef);
279+
using Signature_mustQuote = QuotingType (*)(StringRef);
274280

275281
template <typename U>
276282
static char test(SameType<Signature_input, &U::input> *,
@@ -495,28 +501,66 @@ inline bool isBool(StringRef S) {
495501
S.equals("false") || S.equals("False") || S.equals("FALSE");
496502
}
497503

498-
inline bool needsQuotes(StringRef S) {
504+
// 5.1. Character Set
505+
// The allowed character range explicitly excludes the C0 control block #x0-#x1F
506+
// (except for TAB #x9, LF #xA, and CR #xD which are allowed), DEL #x7F, the C1
507+
// control block #x80-#x9F (except for NEL #x85 which is allowed), the surrogate
508+
// block #xD800-#xDFFF, #xFFFE, and #xFFFF.
509+
inline QuotingType needsQuotes(StringRef S) {
499510
if (S.empty())
500-
return true;
511+
return QuotingType::Single;
501512
if (isspace(S.front()) || isspace(S.back()))
502-
return true;
513+
return QuotingType::Single;
503514
if (S.front() == ',')
504-
return true;
505-
506-
static const char ScalarSafeChars[] =
507-
"abcdefghijklmnopqrstuvwxyz"
508-
"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-/^., \t";
509-
if (S.find_first_not_of(ScalarSafeChars) != StringRef::npos)
510-
return true;
511-
515+
return QuotingType::Single;
512516
if (isNull(S))
513-
return true;
517+
return QuotingType::Single;
514518
if (isBool(S))
515-
return true;
519+
return QuotingType::Single;
516520
if (isNumeric(S))
517-
return true;
521+
return QuotingType::Single;
522+
523+
QuotingType MaxQuotingNeeded = QuotingType::None;
524+
for (unsigned char C : S) {
525+
// Alphanum is safe.
526+
if (isAlnum(C))
527+
continue;
528+
529+
switch (C) {
530+
// Safe scalar characters.
531+
case '_':
532+
case '-':
533+
case '/':
534+
case '^':
535+
case '.':
536+
case ',':
537+
case ' ':
538+
// TAB (0x9), LF (0xA), CR (0xD) and NEL (0x85) are allowed.
539+
case 0x9:
540+
case 0xA:
541+
case 0xD:
542+
case 0x85:
543+
continue;
544+
// DEL (0x7F) are excluded from the allowed character range.
545+
case 0x7F:
546+
return QuotingType::Double;
547+
default: {
548+
// C0 control block (0x0 - 0x1F) is excluded from the allowed character
549+
// range.
550+
if (C <= 0x1F)
551+
return QuotingType::Double;
552+
// C1 control block (0x80 - 0x9F) is excluded from the allowed character
553+
// range.
554+
if (C >= 0x80 && C <= 0x9F)
555+
return QuotingType::Double;
556+
557+
// The character is not safe, at least simple quoting needed.
558+
MaxQuotingNeeded = QuotingType::Single;
559+
}
560+
}
561+
}
518562

519-
return false;
563+
return MaxQuotingNeeded;
520564
}
521565

522566
template <typename T, typename Context>
@@ -581,7 +625,7 @@ class IO {
581625
virtual bool bitSetMatch(const char*, bool) = 0;
582626
virtual void endBitSetScalar() = 0;
583627

584-
virtual void scalarString(StringRef &, bool) = 0;
628+
virtual void scalarString(StringRef &, QuotingType) = 0;
585629
virtual void blockScalarString(StringRef &) = 0;
586630

587631
virtual void setError(const Twine &) = 0;
@@ -911,91 +955,91 @@ template<>
911955
struct ScalarTraits<bool> {
912956
static void output(const bool &, void* , raw_ostream &);
913957
static StringRef input(StringRef, void *, bool &);
914-
static bool mustQuote(StringRef) { return false; }
958+
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
915959
};
916960

917961
template<>
918962
struct ScalarTraits<StringRef> {
919963
static void output(const StringRef &, void *, raw_ostream &);
920964
static StringRef input(StringRef, void *, StringRef &);
921-
static bool mustQuote(StringRef S) { return needsQuotes(S); }
965+
static QuotingType mustQuote(StringRef S) { return needsQuotes(S); }
922966
};
923967

924968
template<>
925969
struct ScalarTraits<std::string> {
926970
static void output(const std::string &, void *, raw_ostream &);
927971
static StringRef input(StringRef, void *, std::string &);
928-
static bool mustQuote(StringRef S) { return needsQuotes(S); }
972+
static QuotingType mustQuote(StringRef S) { return needsQuotes(S); }
929973
};
930974

931975
template<>
932976
struct ScalarTraits<uint8_t> {
933977
static void output(const uint8_t &, void *, raw_ostream &);
934978
static StringRef input(StringRef, void *, uint8_t &);
935-
static bool mustQuote(StringRef) { return false; }
979+
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
936980
};
937981

938982
template<>
939983
struct ScalarTraits<uint16_t> {
940984
static void output(const uint16_t &, void *, raw_ostream &);
941985
static StringRef input(StringRef, void *, uint16_t &);
942-
static bool mustQuote(StringRef) { return false; }
986+
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
943987
};
944988

945989
template<>
946990
struct ScalarTraits<uint32_t> {
947991
static void output(const uint32_t &, void *, raw_ostream &);
948992
static StringRef input(StringRef, void *, uint32_t &);
949-
static bool mustQuote(StringRef) { return false; }
993+
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
950994
};
951995

952996
template<>
953997
struct ScalarTraits<uint64_t> {
954998
static void output(const uint64_t &, void *, raw_ostream &);
955999
static StringRef input(StringRef, void *, uint64_t &);
956-
static bool mustQuote(StringRef) { return false; }
1000+
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
9571001
};
9581002

9591003
template<>
9601004
struct ScalarTraits<int8_t> {
9611005
static void output(const int8_t &, void *, raw_ostream &);
9621006
static StringRef input(StringRef, void *, int8_t &);
963-
static bool mustQuote(StringRef) { return false; }
1007+
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
9641008
};
9651009

9661010
template<>
9671011
struct ScalarTraits<int16_t> {
9681012
static void output(const int16_t &, void *, raw_ostream &);
9691013
static StringRef input(StringRef, void *, int16_t &);
970-
static bool mustQuote(StringRef) { return false; }
1014+
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
9711015
};
9721016

9731017
template<>
9741018
struct ScalarTraits<int32_t> {
9751019
static void output(const int32_t &, void *, raw_ostream &);
9761020
static StringRef input(StringRef, void *, int32_t &);
977-
static bool mustQuote(StringRef) { return false; }
1021+
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
9781022
};
9791023

9801024
template<>
9811025
struct ScalarTraits<int64_t> {
9821026
static void output(const int64_t &, void *, raw_ostream &);
9831027
static StringRef input(StringRef, void *, int64_t &);
984-
static bool mustQuote(StringRef) { return false; }
1028+
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
9851029
};
9861030

9871031
template<>
9881032
struct ScalarTraits<float> {
9891033
static void output(const float &, void *, raw_ostream &);
9901034
static StringRef input(StringRef, void *, float &);
991-
static bool mustQuote(StringRef) { return false; }
1035+
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
9921036
};
9931037

9941038
template<>
9951039
struct ScalarTraits<double> {
9961040
static void output(const double &, void *, raw_ostream &);
9971041
static StringRef input(StringRef, void *, double &);
998-
static bool mustQuote(StringRef) { return false; }
1042+
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
9991043
};
10001044

10011045
// For endian types, we just use the existing ScalarTraits for the underlying
@@ -1019,7 +1063,7 @@ struct ScalarTraits<support::detail::packed_endian_specific_integral<
10191063
return R;
10201064
}
10211065

1022-
static bool mustQuote(StringRef Str) {
1066+
static QuotingType mustQuote(StringRef Str) {
10231067
return ScalarTraits<value_type>::mustQuote(Str);
10241068
}
10251069
};
@@ -1148,7 +1192,7 @@ class Input : public IO {
11481192
bool beginBitSetScalar(bool &) override;
11491193
bool bitSetMatch(const char *, bool ) override;
11501194
void endBitSetScalar() override;
1151-
void scalarString(StringRef &, bool) override;
1195+
void scalarString(StringRef &, QuotingType) override;
11521196
void blockScalarString(StringRef &) override;
11531197
void setError(const Twine &message) override;
11541198
bool canElideEmptySequence() override;
@@ -1293,7 +1337,7 @@ class Output : public IO {
12931337
bool beginBitSetScalar(bool &) override;
12941338
bool bitSetMatch(const char *, bool ) override;
12951339
void endBitSetScalar() override;
1296-
void scalarString(StringRef &, bool) override;
1340+
void scalarString(StringRef &, QuotingType) override;
12971341
void blockScalarString(StringRef &) override;
12981342
void setError(const Twine &message) override;
12991343
bool canElideEmptySequence() override;
@@ -1371,28 +1415,28 @@ template<>
13711415
struct ScalarTraits<Hex8> {
13721416
static void output(const Hex8 &, void *, raw_ostream &);
13731417
static StringRef input(StringRef, void *, Hex8 &);
1374-
static bool mustQuote(StringRef) { return false; }
1418+
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
13751419
};
13761420

13771421
template<>
13781422
struct ScalarTraits<Hex16> {
13791423
static void output(const Hex16 &, void *, raw_ostream &);
13801424
static StringRef input(StringRef, void *, Hex16 &);
1381-
static bool mustQuote(StringRef) { return false; }
1425+
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
13821426
};
13831427

13841428
template<>
13851429
struct ScalarTraits<Hex32> {
13861430
static void output(const Hex32 &, void *, raw_ostream &);
13871431
static StringRef input(StringRef, void *, Hex32 &);
1388-
static bool mustQuote(StringRef) { return false; }
1432+
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
13891433
};
13901434

13911435
template<>
13921436
struct ScalarTraits<Hex64> {
13931437
static void output(const Hex64 &, void *, raw_ostream &);
13941438
static StringRef input(StringRef, void *, Hex64 &);
1395-
static bool mustQuote(StringRef) { return false; }
1439+
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
13961440
};
13971441

13981442
// Define non-member operator>> so that Input can stream in a document list.
@@ -1681,7 +1725,7 @@ template <typename T> struct StdMapStringCustomMappingTraitsImpl {
16811725
template <> struct ScalarTraits<Type> { \
16821726
static void output(const Type &Value, void *ctx, raw_ostream &Out); \
16831727
static StringRef input(StringRef Scalar, void *ctxt, Type &Value); \
1684-
static bool mustQuote(StringRef) { return MustQuote; } \
1728+
static QuotingType mustQuote(StringRef) { return MustQuote; } \
16851729
}; \
16861730
} \
16871731
}

0 commit comments

Comments
 (0)