Skip to content

Commit 0780c52

Browse files
committed
[Syntax] Unify RawSyntax and RawTokenSyntax using union and TrailingObjects
It better matches with SwiftSyntax model. Using TrailingObjects reduces the number of heap allocation which gains 18% performance improvement.
1 parent 4c17203 commit 0780c52

28 files changed

+543
-613
lines changed

include/swift/Parse/Parser.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ namespace swift {
5858

5959
namespace syntax {
6060
class AbsolutePosition;
61-
struct RawTokenSyntax;
61+
class RawSyntax;
6262
enum class SyntaxKind;
6363
class TypeSyntax;
6464
}// end of syntax namespace
@@ -1435,7 +1435,7 @@ bool isKeywordPossibleDeclStart(const Token &Tok);
14351435

14361436
/// \brief Lex and return a vector of `TokenSyntax` tokens, which include
14371437
/// leading and trailing trivia.
1438-
std::vector<std::pair<RC<syntax::RawTokenSyntax>,
1438+
std::vector<std::pair<RC<syntax::RawSyntax>,
14391439
syntax::AbsolutePosition>>
14401440
tokenizeWithTrivia(const LangOptions &LangOpts,
14411441
const SourceManager &SM,

include/swift/Syntax/RawSyntax.h

+193-79
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,15 @@
2929
#ifndef SWIFT_SYNTAX_RAWSYNTAX_H
3030
#define SWIFT_SYNTAX_RAWSYNTAX_H
3131

32+
#include "swift/Basic/InlineBitfield.h"
3233
#include "swift/Syntax/References.h"
3334
#include "swift/Syntax/SyntaxKind.h"
35+
#include "swift/Syntax/TokenKinds.h"
3436
#include "swift/Syntax/Trivia.h"
3537
#include "llvm/ADT/IntrusiveRefCntPtr.h"
3638
#include "llvm/ADT/PointerUnion.h"
3739
#include "llvm/Support/Casting.h"
40+
#include "llvm/Support/TrailingObjects.h"
3841
#include "llvm/Support/raw_ostream.h"
3942

4043
#include <vector>
@@ -53,16 +56,17 @@ using llvm::StringRef;
5356
#define syntax_assert_child_token(Raw, CursorName, ...) \
5457
({ \
5558
bool __Found = false; \
56-
auto __Token = cast<RawTokenSyntax>(Raw->getChild(Cursor::CursorName)); \
59+
auto __Token = Raw->getChild(Cursor::CursorName); \
60+
assert(__Token->isToken()); \
5761
if (__Token->isPresent()) { \
5862
for (auto Token : {__VA_ARGS__}) { \
5963
if (__Token->getTokenKind() == Token) { \
6064
__Found = true; \
6165
break; \
6266
} \
6367
} \
64-
assert(__Found && "invalid token supplied for " \
65-
#CursorName ", expected one of {" #__VA_ARGS__ "}"); \
68+
assert(__Found && "invalid token supplied for " #CursorName \
69+
", expected one of {" #__VA_ARGS__ "}"); \
6670
} \
6771
})
6872
#else
@@ -72,18 +76,19 @@ using llvm::StringRef;
7276
#ifndef NDEBUG
7377
#define syntax_assert_child_token_text(Raw, CursorName, TokenKind, ...) \
7478
({ \
75-
bool __Found = false; \
76-
auto __Child = cast<RawTokenSyntax>(Raw->getChild(Cursor::CursorName)); \
77-
if (__Child->isPresent()) { \
79+
bool __Found = false; \
80+
auto __Child = Raw->getChild(Cursor::CursorName); \
81+
assert(__Child->isToken()); \
82+
if (__Child->isPresent()) { \
7883
assert(__Child->getTokenKind() == TokenKind); \
7984
for (auto __Text : {__VA_ARGS__}) { \
80-
if (__Child->getText() == __Text) { \
85+
if (__Child->getTokenText() == __Text) { \
8186
__Found = true; \
8287
break; \
8388
} \
8489
} \
85-
assert(__Found && "invalid text supplied for " \
86-
#CursorName ", expected one of {" #__VA_ARGS__ "}"); \
90+
assert(__Found && "invalid text supplied for " #CursorName \
91+
", expected one of {" #__VA_ARGS__ "}"); \
8792
} \
8893
})
8994
#else
@@ -158,22 +163,6 @@ class AbsolutePosition {
158163
}
159164
}
160165

161-
/// Use some character as a reference for adding to the absolute position,
162-
/// taking note of newlines, etc.
163-
/// Take care that consecutive call of this function with '\r' and '\n'
164-
/// causes increase of 2 Line but desirable result may be 1 Line.
165-
void addCharacter(char C) {
166-
switch (C) {
167-
case '\n':
168-
case '\r':
169-
addNewlines(1, 1);
170-
break;
171-
default:
172-
addColumns(1);
173-
break;
174-
}
175-
}
176-
177166
/// Get the line number of this position.
178167
uint32_t getLine() const { return Line; }
179168

@@ -218,95 +207,220 @@ struct SyntaxPrintOptions {
218207
/// RawSyntax - the strictly immutable, shared backing nodes for all syntax.
219208
///
220209
/// This is implementation detail - do not expose it in public API.
221-
struct RawSyntax : public llvm::ThreadSafeRefCountedBase<RawSyntax> {
222-
223-
using LayoutList = std::vector<RC<RawSyntax>>;
224-
225-
/// The kind of syntax this node represents.
226-
const SyntaxKind Kind;
227-
228-
/// The "layout" of the node - representing the children, or the terms
229-
/// in the production of the grammar.
230-
const LayoutList Layout;
231-
232-
/// Whether this piece of syntax was actually present in the source.
233-
const SourcePresence Presence;
210+
class RawSyntax final
211+
: public llvm::ThreadSafeRefCountedBase<RawSyntax>,
212+
private llvm::TrailingObjects<RawSyntax, RC<RawSyntax>, OwnedString,
213+
TriviaPiece> {
214+
friend TrailingObjects;
215+
216+
union {
217+
uint64_t Clear;
218+
struct {
219+
/// The kind of syntax this node represents.
220+
unsigned Kind : bitmax(NumSyntaxKindBits, 8);
221+
/// Whether this piece of syntax was actually present in the source.
222+
unsigned Presence : 1;
223+
};
224+
enum { NumRawSyntaxBits = bitmax(NumSyntaxKindBits, 8) + 1 };
225+
226+
// For "layout" nodes.
227+
struct {
228+
uint64_t : bitmax(NumRawSyntaxBits, 32);
229+
/// Number of children this "layout" node has.
230+
unsigned NumChildren : 32;
231+
};
232+
233+
// For "token" nodes.
234+
struct {
235+
uint64_t : bitmax(NumRawSyntaxBits, 16);
236+
/// The kind of token this "token" node represents.
237+
unsigned TokenKind : 16;
238+
/// Number of leading trivia pieces.
239+
unsigned NumLeadingTrivia : 16;
240+
/// Number of trailing trivia pieces.
241+
unsigned NumTrailingTrivia : 16;
242+
};
243+
} Bits;
244+
245+
size_t numTrailingObjects(OverloadToken<RC<RawSyntax>>) const {
246+
return isToken() ? 0 : Bits.NumChildren;
247+
}
248+
size_t numTrailingObjects(OverloadToken<OwnedString>) const {
249+
return isToken() ? 1 : 0;
250+
}
251+
size_t numTrailingObjects(OverloadToken<TriviaPiece>) const {
252+
return isToken() ? Bits.NumLeadingTrivia + Bits.NumTrailingTrivia : 0;
253+
}
234254

235-
/// Create a piece of raw syntax.
236-
RawSyntax(const SyntaxKind Kind, const std::vector<RC<RawSyntax>> Layout,
237-
const SourcePresence Presence)
238-
: Kind(Kind), Layout(Layout), Presence(Presence) {}
255+
RawSyntax(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
256+
SourcePresence Presence);
257+
RawSyntax(tok TokKind, OwnedString Text, SourcePresence Presence,
258+
ArrayRef<TriviaPiece> LeadingTrivia,
259+
ArrayRef<TriviaPiece> TrailingTrivia);
239260

240-
virtual ~RawSyntax() = default;
261+
public:
262+
~RawSyntax();
241263

242-
/// Returns a raw syntax node of the given Kind, specified Layout,
243-
/// and source presence.
244-
static RC<RawSyntax> make(const SyntaxKind Kind, const LayoutList Layout,
245-
const SourcePresence Presence) {
246-
return RC<RawSyntax>{new RawSyntax{Kind, Layout, Presence}};
264+
/// \name Factory methods.
265+
/// @{
266+
267+
/// Make a raw "layout" syntax node.
268+
static RC<RawSyntax> make(SyntaxKind Kind, ArrayRef<RC<RawSyntax>> Layout,
269+
SourcePresence Presence);
270+
271+
/// Make a raw "token" syntax node.
272+
static RC<RawSyntax> make(tok TokKind, OwnedString Text,
273+
SourcePresence Presence,
274+
ArrayRef<TriviaPiece> LeadingTrivia,
275+
ArrayRef<TriviaPiece> TrailingTrivia);
276+
277+
/// Make a missing raw "layout" syntax node.
278+
static RC<RawSyntax> missing(SyntaxKind Kind) {
279+
return make(Kind, {}, SourcePresence::Missing);
247280
}
248281

249-
/// Returns a raw syntax node of the given Kind, marked as missing.
250-
static RC<RawSyntax> missing(const SyntaxKind Kind) {
251-
return make(Kind, {}, SourcePresence::Missing);
282+
/// Make a missing raw "token" syntax node.
283+
static RC<RawSyntax> missing(tok TokKind, OwnedString Text) {
284+
return make(TokKind, Text, SourcePresence::Missing,
285+
ArrayRef<TriviaPiece>{}, ArrayRef<TriviaPiece>{});
252286
}
253287

254-
/// Get a child based on a particular node's "Cursor", indicating
255-
/// the position of the terms in the production of the Swift grammar.
256-
template <typename CursorType> RC<RawSyntax> getChild(CursorType C) const {
257-
return Layout[cursorIndex(C)];
288+
/// @}
289+
290+
SourcePresence getPresence() const {
291+
return static_cast<SourcePresence>(Bits.Presence);
258292
}
259293

294+
SyntaxKind getKind() const { return static_cast<SyntaxKind>(Bits.Kind); }
295+
260296
/// Returns true if the node is "missing" in the source (i.e. it was
261297
/// expected (or optional) but not written.
262-
bool isMissing() const { return Presence == SourcePresence::Missing; }
298+
bool isMissing() const { return getPresence() == SourcePresence::Missing; }
263299

264300
/// Returns true if the node is "present" in the source.
265-
bool isPresent() const {
266-
return Presence == SourcePresence::Present;
267-
}
301+
bool isPresent() const { return getPresence() == SourcePresence::Present; }
268302

269303
/// Returns true if this raw syntax node is some kind of declaration.
270-
bool isDecl() const { return isDeclKind(Kind); }
304+
bool isDecl() const { return isDeclKind(getKind()); }
271305

272306
/// Returns true if this raw syntax node is some kind of type syntax.
273-
bool isType() const { return isTypeKind(Kind); }
307+
bool isType() const { return isTypeKind(getKind()); }
274308

275309
/// Returns true if this raw syntax node is some kind of statement.
276-
bool isStmt() const { return isStmtKind(Kind); }
310+
bool isStmt() const { return isStmtKind(getKind()); }
277311

278312
/// Returns true if this raw syntax node is some kind of expression.
279-
bool isExpr() const { return isExprKind(Kind); }
313+
bool isExpr() const { return isExprKind(getKind()); }
280314

281315
/// Returns true if this raw syntax node is some kind of pattern.
282-
bool isPattern() const { return isPatternKind(Kind); }
316+
bool isPattern() const { return isPatternKind(getKind()); }
317+
318+
/// Return true is this raw syntax node is a unknown node.
319+
bool isUnknown() const { return isUnknownKind(getKind()); }
283320

284321
/// Return true if this raw syntax node is a token.
285-
bool isToken() const { return isTokenKind(Kind); }
322+
bool isToken() const { return isTokenKind(getKind()); }
323+
324+
/// \name Getter routines for SyntaxKind::Token.
325+
/// @{
286326

287-
bool isUnknown() const { return isUnknownKind(Kind); }
327+
/// Get the kind of the token.
328+
tok getTokenKind() const {
329+
assert(isToken());
330+
return static_cast<tok>(Bits.TokenKind);
331+
}
332+
333+
/// Return the text of the token.
334+
StringRef getTokenText() const {
335+
assert(isToken());
336+
return getTrailingObjects<OwnedString>()->str();
337+
}
338+
339+
/// Return the leading trivia list of the token.
340+
ArrayRef<TriviaPiece> getLeadingTrivia() const {
341+
assert(isToken());
342+
return {getTrailingObjects<TriviaPiece>(), Bits.NumLeadingTrivia};
343+
}
344+
/// Return the trailing trivia list of the token.
345+
ArrayRef<TriviaPiece> getTrailingTrivia() const {
346+
assert(isToken());
347+
return {getTrailingObjects<TriviaPiece>() + Bits.NumLeadingTrivia,
348+
Bits.NumTrailingTrivia};
349+
}
350+
351+
/// Return \c true if this is the given kind of token.
352+
bool isToken(tok K) const { return isToken() && getTokenKind() == K; }
353+
354+
/// @}
355+
356+
/// \name Transform routines for "token" nodes.
357+
/// @{
358+
359+
/// Return a new token like this one, but with the given leading
360+
/// trivia instead.
361+
RC<RawSyntax>
362+
withLeadingTrivia(ArrayRef<TriviaPiece> NewLeadingTrivia) const {
363+
return make(getTokenKind(), getTokenText(), getPresence(),
364+
NewLeadingTrivia, getTrailingTrivia());
365+
}
366+
367+
RC<RawSyntax> withLeadingTrivia(Trivia NewLeadingTrivia) const {
368+
return withLeadingTrivia(NewLeadingTrivia.Pieces);
369+
}
370+
371+
/// Return a new token like this one, but with the given trailing
372+
/// trivia instead.
373+
RC<RawSyntax>
374+
withTrailingTrivia(ArrayRef<TriviaPiece> NewTrailingTrivia) const {
375+
return make(getTokenKind(), getTokenText(), getPresence(),
376+
getLeadingTrivia(), NewTrailingTrivia);
377+
}
378+
379+
RC<RawSyntax> withTrailingTrivia(Trivia NewTrailingTrivia) const {
380+
return withTrailingTrivia(NewTrailingTrivia.Pieces);
381+
}
382+
383+
/// @}
384+
385+
/// \name Getter routines for "layout" nodes.
386+
/// @{
387+
388+
/// Get the child nodes.
389+
ArrayRef<RC<RawSyntax>> getLayout() const {
390+
if (isToken())
391+
return {};
392+
return {getTrailingObjects<RC<RawSyntax>>(), Bits.NumChildren};
393+
}
394+
395+
/// Get a child based on a particular node's "Cursor", indicating
396+
/// the position of the terms in the production of the Swift grammar.
397+
const RC<RawSyntax> &getChild(CursorIndex Index) const {
398+
return getLayout()[Index];
399+
}
400+
401+
/// @}
402+
403+
/// \name Transform routines for "layout" nodes.
404+
/// @{
288405

289406
/// Return a new raw syntax node with the given new layout element appended
290407
/// to the end of the node's layout.
291408
RC<RawSyntax> append(RC<RawSyntax> NewLayoutElement) const;
292409

293410
/// Return a new raw syntax node with the given new layout element replacing
294411
/// another at some cursor position.
295-
template <typename CursorType>
296412
RC<RawSyntax>
297-
replaceChild(CursorType C, RC<RawSyntax> NewLayoutElement) const {
298-
LayoutList NewLayout;
413+
replaceChild(CursorIndex Index, RC<RawSyntax> NewLayoutElement) const;
299414

300-
std::copy(Layout.begin(), Layout.begin() + cursorIndex(C),
301-
std::back_inserter(NewLayout));
415+
/// @}
302416

303-
NewLayout.push_back(NewLayoutElement);
304-
305-
std::copy(Layout.begin() + cursorIndex(C) + 1, Layout.end(),
306-
std::back_inserter(NewLayout));
307-
308-
return RawSyntax::make(Kind, NewLayout, Presence);
309-
}
417+
/// Advance the provided AbsolutePosition by the full width of this node.
418+
///
419+
/// If this is token node, returns the AbsolutePosition of the start of the
420+
/// token's nontrivial text. Otherwise, return the position of the first
421+
/// token. If this contains no tokens, return None.
422+
llvm::Optional<AbsolutePosition>
423+
accumulateAbsolutePosition(AbsolutePosition &Pos) const;
310424

311425
/// Print this piece of syntax recursively.
312426
void print(llvm::raw_ostream &OS, SyntaxPrintOptions Opts) const;
@@ -315,7 +429,7 @@ struct RawSyntax : public llvm::ThreadSafeRefCountedBase<RawSyntax> {
315429
void dump() const;
316430

317431
/// Dump this piece of syntax recursively.
318-
void dump(llvm::raw_ostream &OS, unsigned Indent) const;
432+
void dump(llvm::raw_ostream &OS, unsigned Indent = 0) const;
319433
};
320434

321435
} // end namespace syntax

0 commit comments

Comments
 (0)