diff --git a/.rubocop.yml b/.rubocop.yml index d0bf0830..b7ba43e8 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -64,6 +64,9 @@ Style/CaseEquality: Style/CaseLikeIf: Enabled: false +Style/Documentation: + Enabled: false + Style/ExplicitBlockArgument: Enabled: false @@ -94,6 +97,9 @@ Style/MutableConstant: Style/NegatedIfElseCondition: Enabled: false +Style/Next: + Enabled: false + Style/NumericPredicate: Enabled: false diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index aea21d8e..b2ff8414 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require "etc" +require "fiddle" require "json" require "pp" require "prettier_print" @@ -9,11 +10,11 @@ require_relative "syntax_tree/formatter" require_relative "syntax_tree/node" +require_relative "syntax_tree/dsl" require_relative "syntax_tree/version" require_relative "syntax_tree/basic_visitor" require_relative "syntax_tree/visitor" -require_relative "syntax_tree/visitor/compiler" require_relative "syntax_tree/visitor/field_visitor" require_relative "syntax_tree/visitor/json_visitor" require_relative "syntax_tree/visitor/match_visitor" @@ -26,6 +27,15 @@ require_relative "syntax_tree/pattern" require_relative "syntax_tree/search" +require_relative "syntax_tree/yarv" +require_relative "syntax_tree/yarv/bf" +require_relative "syntax_tree/yarv/compiler" +require_relative "syntax_tree/yarv/disassembler" +require_relative "syntax_tree/yarv/instruction_sequence" +require_relative "syntax_tree/yarv/instructions" +require_relative "syntax_tree/yarv/legacy" +require_relative "syntax_tree/yarv/local_table" + # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the # tools necessary to inspect and manipulate that syntax tree. It can be used to diff --git a/lib/syntax_tree/dsl.rb b/lib/syntax_tree/dsl.rb new file mode 100644 index 00000000..860a1fe5 --- /dev/null +++ b/lib/syntax_tree/dsl.rb @@ -0,0 +1,1004 @@ +# frozen_string_literal: true + +module SyntaxTree + # This module provides shortcuts for creating AST nodes. + module DSL + # Create a new BEGINBlock node. + def BEGINBlock(lbrace, statements) + BEGINBlock.new( + lbrace: lbrace, + statements: statements, + location: Location.default + ) + end + + # Create a new CHAR node. + def CHAR(value) + CHAR.new(value: value, location: Location.default) + end + + # Create a new ENDBlock node. + def ENDBlock(lbrace, statements) + ENDBlock.new( + lbrace: lbrace, + statements: statements, + location: Location.default + ) + end + + # Create a new EndContent node. + def EndContent(value) + EndContent.new(value: value, location: Location.default) + end + + # Create a new AliasNode node. + def AliasNode(left, right) + AliasNode.new(left: left, right: right, location: Location.default) + end + + # Create a new ARef node. + def ARef(collection, index) + ARef.new(collection: collection, index: index, location: Location.default) + end + + # Create a new ARefField node. + def ARefField(collection, index) + ARefField.new( + collection: collection, + index: index, + location: Location.default + ) + end + + # Create a new ArgParen node. + def ArgParen(arguments) + ArgParen.new(arguments: arguments, location: Location.default) + end + + # Create a new Args node. + def Args(parts) + Args.new(parts: parts, location: Location.default) + end + + # Create a new ArgBlock node. + def ArgBlock(value) + ArgBlock.new(value: value, location: Location.default) + end + + # Create a new ArgStar node. + def ArgStar(value) + ArgStar.new(value: value, location: Location.default) + end + + # Create a new ArgsForward node. + def ArgsForward + ArgsForward.new(location: Location.default) + end + + # Create a new ArrayLiteral node. + def ArrayLiteral(lbracket, contents) + ArrayLiteral.new( + lbracket: lbracket, + contents: contents, + location: Location.default + ) + end + + # Create a new AryPtn node. + def AryPtn(constant, requireds, rest, posts) + AryPtn.new( + constant: constant, + requireds: requireds, + rest: rest, + posts: posts, + location: Location.default + ) + end + + # Create a new Assign node. + def Assign(target, value) + Assign.new(target: target, value: value, location: Location.default) + end + + # Create a new Assoc node. + def Assoc(key, value) + Assoc.new(key: key, value: value, location: Location.default) + end + + # Create a new AssocSplat node. + def AssocSplat(value) + AssocSplat.new(value: value, location: Location.default) + end + + # Create a new Backref node. + def Backref(value) + Backref.new(value: value, location: Location.default) + end + + # Create a new Backtick node. + def Backtick(value) + Backtick.new(value: value, location: Location.default) + end + + # Create a new BareAssocHash node. + def BareAssocHash(assocs) + BareAssocHash.new(assocs: assocs, location: Location.default) + end + + # Create a new Begin node. + def Begin(bodystmt) + Begin.new(bodystmt: bodystmt, location: Location.default) + end + + # Create a new PinnedBegin node. + def PinnedBegin(statement) + PinnedBegin.new(statement: statement, location: Location.default) + end + + # Create a new Binary node. + def Binary(left, operator, right) + Binary.new( + left: left, + operator: operator, + right: right, + location: Location.default + ) + end + + # Create a new BlockVar node. + def BlockVar(params, locals) + BlockVar.new(params: params, locals: locals, location: Location.default) + end + + # Create a new BlockArg node. + def BlockArg(name) + BlockArg.new(name: name, location: Location.default) + end + + # Create a new BodyStmt node. + def BodyStmt( + statements, + rescue_clause, + else_keyword, + else_clause, + ensure_clause + ) + BodyStmt.new( + statements: statements, + rescue_clause: rescue_clause, + else_keyword: else_keyword, + else_clause: else_clause, + ensure_clause: ensure_clause, + location: Location.default + ) + end + + # Create a new Break node. + def Break(arguments) + Break.new(arguments: arguments, location: Location.default) + end + + # Create a new CallNode node. + def CallNode(receiver, operator, message, arguments) + CallNode.new( + receiver: receiver, + operator: operator, + message: message, + arguments: arguments, + location: Location.default + ) + end + + # Create a new Case node. + def Case(keyword, value, consequent) + Case.new( + keyword: keyword, + value: value, + consequent: consequent, + location: Location.default + ) + end + + # Create a new RAssign node. + def RAssign(value, operator, pattern) + RAssign.new( + value: value, + operator: operator, + pattern: pattern, + location: Location.default + ) + end + + # Create a new ClassDeclaration node. + def ClassDeclaration(constant, superclass, bodystmt) + ClassDeclaration.new( + constant: constant, + superclass: superclass, + bodystmt: bodystmt, + location: Location.default + ) + end + + # Create a new Comma node. + def Comma(value) + Comma.new(value: value, location: Location.default) + end + + # Create a new Command node. + def Command(message, arguments, block) + Command.new( + message: message, + arguments: arguments, + block: block, + location: Location.default + ) + end + + # Create a new CommandCall node. + def CommandCall(receiver, operator, message, arguments, block) + CommandCall.new( + receiver: receiver, + operator: operator, + message: message, + arguments: arguments, + block: block, + location: Location.default + ) + end + + # Create a new Comment node. + def Comment(value, inline) + Comment.new(value: value, inline: inline, location: Location.default) + end + + # Create a new Const node. + def Const(value) + Const.new(value: value, location: Location.default) + end + + # Create a new ConstPathField node. + def ConstPathField(parent, constant) + ConstPathField.new( + parent: parent, + constant: constant, + location: Location.default + ) + end + + # Create a new ConstPathRef node. + def ConstPathRef(parent, constant) + ConstPathRef.new( + parent: parent, + constant: constant, + location: Location.default + ) + end + + # Create a new ConstRef node. + def ConstRef(constant) + ConstRef.new(constant: constant, location: Location.default) + end + + # Create a new CVar node. + def CVar(value) + CVar.new(value: value, location: Location.default) + end + + # Create a new DefNode node. + def DefNode(target, operator, name, params, bodystmt) + DefNode.new( + target: target, + operator: operator, + name: name, + params: params, + bodystmt: bodystmt, + location: Location.default + ) + end + + # Create a new Defined node. + def Defined(value) + Defined.new(value: value, location: Location.default) + end + + # Create a new BlockNode node. + def BlockNode(opening, block_var, bodystmt) + BlockNode.new( + opening: opening, + block_var: block_var, + bodystmt: bodystmt, + location: Location.default + ) + end + + # Create a new RangeNode node. + def RangeNode(left, operator, right) + RangeNode.new( + left: left, + operator: operator, + right: right, + location: Location.default + ) + end + + # Create a new DynaSymbol node. + def DynaSymbol(parts, quote) + DynaSymbol.new(parts: parts, quote: quote, location: Location.default) + end + + # Create a new Else node. + def Else(keyword, statements) + Else.new( + keyword: keyword, + statements: statements, + location: Location.default + ) + end + + # Create a new Elsif node. + def Elsif(predicate, statements, consequent) + Elsif.new( + predicate: predicate, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new EmbDoc node. + def EmbDoc(value) + EmbDoc.new(value: value, location: Location.default) + end + + # Create a new EmbExprBeg node. + def EmbExprBeg(value) + EmbExprBeg.new(value: value, location: Location.default) + end + + # Create a new EmbExprEnd node. + def EmbExprEnd(value) + EmbExprEnd.new(value: value, location: Location.default) + end + + # Create a new EmbVar node. + def EmbVar(value) + EmbVar.new(value: value, location: Location.default) + end + + # Create a new Ensure node. + def Ensure(keyword, statements) + Ensure.new( + keyword: keyword, + statements: statements, + location: Location.default + ) + end + + # Create a new ExcessedComma node. + def ExcessedComma(value) + ExcessedComma.new(value: value, location: Location.default) + end + + # Create a new Field node. + def Field(parent, operator, name) + Field.new( + parent: parent, + operator: operator, + name: name, + location: Location.default + ) + end + + # Create a new FloatLiteral node. + def FloatLiteral(value) + FloatLiteral.new(value: value, location: Location.default) + end + + # Create a new FndPtn node. + def FndPtn(constant, left, values, right) + FndPtn.new( + constant: constant, + left: left, + values: values, + right: right, + location: Location.default + ) + end + + # Create a new For node. + def For(index, collection, statements) + For.new( + index: index, + collection: collection, + statements: statements, + location: Location.default + ) + end + + # Create a new GVar node. + def GVar(value) + GVar.new(value: value, location: Location.default) + end + + # Create a new HashLiteral node. + def HashLiteral(lbrace, assocs) + HashLiteral.new( + lbrace: lbrace, + assocs: assocs, + location: Location.default + ) + end + + # Create a new Heredoc node. + def Heredoc(beginning, ending, dedent, parts) + Heredoc.new( + beginning: beginning, + ending: ending, + dedent: dedent, + parts: parts, + location: Location.default + ) + end + + # Create a new HeredocBeg node. + def HeredocBeg(value) + HeredocBeg.new(value: value, location: Location.default) + end + + # Create a new HeredocEnd node. + def HeredocEnd(value) + HeredocEnd.new(value: value, location: Location.default) + end + + # Create a new HshPtn node. + def HshPtn(constant, keywords, keyword_rest) + HshPtn.new( + constant: constant, + keywords: keywords, + keyword_rest: keyword_rest, + location: Location.default + ) + end + + # Create a new Ident node. + def Ident(value) + Ident.new(value: value, location: Location.default) + end + + # Create a new IfNode node. + def IfNode(predicate, statements, consequent) + IfNode.new( + predicate: predicate, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new IfOp node. + def IfOp(predicate, truthy, falsy) + IfOp.new( + predicate: predicate, + truthy: truthy, + falsy: falsy, + location: Location.default + ) + end + + # Create a new Imaginary node. + def Imaginary(value) + Imaginary.new(value: value, location: Location.default) + end + + # Create a new In node. + def In(pattern, statements, consequent) + In.new( + pattern: pattern, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new Int node. + def Int(value) + Int.new(value: value, location: Location.default) + end + + # Create a new IVar node. + def IVar(value) + IVar.new(value: value, location: Location.default) + end + + # Create a new Kw node. + def Kw(value) + Kw.new(value: value, location: Location.default) + end + + # Create a new KwRestParam node. + def KwRestParam(name) + KwRestParam.new(name: name, location: Location.default) + end + + # Create a new Label node. + def Label(value) + Label.new(value: value, location: Location.default) + end + + # Create a new LabelEnd node. + def LabelEnd(value) + LabelEnd.new(value: value, location: Location.default) + end + + # Create a new Lambda node. + def Lambda(params, statements) + Lambda.new( + params: params, + statements: statements, + location: Location.default + ) + end + + # Create a new LambdaVar node. + def LambdaVar(params, locals) + LambdaVar.new(params: params, locals: locals, location: Location.default) + end + + # Create a new LBrace node. + def LBrace(value) + LBrace.new(value: value, location: Location.default) + end + + # Create a new LBracket node. + def LBracket(value) + LBracket.new(value: value, location: Location.default) + end + + # Create a new LParen node. + def LParen(value) + LParen.new(value: value, location: Location.default) + end + + # Create a new MAssign node. + def MAssign(target, value) + MAssign.new(target: target, value: value, location: Location.default) + end + + # Create a new MethodAddBlock node. + def MethodAddBlock(call, block) + MethodAddBlock.new(call: call, block: block, location: Location.default) + end + + # Create a new MLHS node. + def MLHS(parts, comma) + MLHS.new(parts: parts, comma: comma, location: Location.default) + end + + # Create a new MLHSParen node. + def MLHSParen(contents, comma) + MLHSParen.new( + contents: contents, + comma: comma, + location: Location.default + ) + end + + # Create a new ModuleDeclaration node. + def ModuleDeclaration(constant, bodystmt) + ModuleDeclaration.new( + constant: constant, + bodystmt: bodystmt, + location: Location.default + ) + end + + # Create a new MRHS node. + def MRHS(parts) + MRHS.new(parts: parts, location: Location.default) + end + + # Create a new Next node. + def Next(arguments) + Next.new(arguments: arguments, location: Location.default) + end + + # Create a new Op node. + def Op(value) + Op.new(value: value, location: Location.default) + end + + # Create a new OpAssign node. + def OpAssign(target, operator, value) + OpAssign.new( + target: target, + operator: operator, + value: value, + location: Location.default + ) + end + + # Create a new Params node. + def Params(requireds, optionals, rest, posts, keywords, keyword_rest, block) + Params.new( + requireds: requireds, + optionals: optionals, + rest: rest, + posts: posts, + keywords: keywords, + keyword_rest: keyword_rest, + block: block, + location: Location.default + ) + end + + # Create a new Paren node. + def Paren(lparen, contents) + Paren.new(lparen: lparen, contents: contents, location: Location.default) + end + + # Create a new Period node. + def Period(value) + Period.new(value: value, location: Location.default) + end + + # Create a new Program node. + def Program(statements) + Program.new(statements: statements, location: Location.default) + end + + # Create a new QSymbols node. + def QSymbols(beginning, elements) + QSymbols.new( + beginning: beginning, + elements: elements, + location: Location.default + ) + end + + # Create a new QSymbolsBeg node. + def QSymbolsBeg(value) + QSymbolsBeg.new(value: value, location: Location.default) + end + + # Create a new QWords node. + def QWords(beginning, elements) + QWords.new( + beginning: beginning, + elements: elements, + location: Location.default + ) + end + + # Create a new QWordsBeg node. + def QWordsBeg(value) + QWordsBeg.new(value: value, location: Location.default) + end + + # Create a new RationalLiteral node. + def RationalLiteral(value) + RationalLiteral.new(value: value, location: Location.default) + end + + # Create a new RBrace node. + def RBrace(value) + RBrace.new(value: value, location: Location.default) + end + + # Create a new RBracket node. + def RBracket(value) + RBracket.new(value: value, location: Location.default) + end + + # Create a new Redo node. + def Redo + Redo.new(location: Location.default) + end + + # Create a new RegexpContent node. + def RegexpContent(beginning, parts) + RegexpContent.new( + beginning: beginning, + parts: parts, + location: Location.default + ) + end + + # Create a new RegexpBeg node. + def RegexpBeg(value) + RegexpBeg.new(value: value, location: Location.default) + end + + # Create a new RegexpEnd node. + def RegexpEnd(value) + RegexpEnd.new(value: value, location: Location.default) + end + + # Create a new RegexpLiteral node. + def RegexpLiteral(beginning, ending, parts) + RegexpLiteral.new( + beginning: beginning, + ending: ending, + parts: parts, + location: Location.default + ) + end + + # Create a new RescueEx node. + def RescueEx(exceptions, variable) + RescueEx.new( + exceptions: exceptions, + variable: variable, + location: Location.default + ) + end + + # Create a new Rescue node. + def Rescue(keyword, exception, statements, consequent) + Rescue.new( + keyword: keyword, + exception: exception, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new RescueMod node. + def RescueMod(statement, value) + RescueMod.new( + statement: statement, + value: value, + location: Location.default + ) + end + + # Create a new RestParam node. + def RestParam(name) + RestParam.new(name: name, location: Location.default) + end + + # Create a new Retry node. + def Retry + Retry.new(location: Location.default) + end + + # Create a new ReturnNode node. + def ReturnNode(arguments) + ReturnNode.new(arguments: arguments, location: Location.default) + end + + # Create a new RParen node. + def RParen(value) + RParen.new(value: value, location: Location.default) + end + + # Create a new SClass node. + def SClass(target, bodystmt) + SClass.new(target: target, bodystmt: bodystmt, location: Location.default) + end + + # Create a new Statements node. + def Statements(body) + Statements.new(nil, body: body, location: Location.default) + end + + # Create a new StringContent node. + def StringContent(parts) + StringContent.new(parts: parts, location: Location.default) + end + + # Create a new StringConcat node. + def StringConcat(left, right) + StringConcat.new(left: left, right: right, location: Location.default) + end + + # Create a new StringDVar node. + def StringDVar(variable) + StringDVar.new(variable: variable, location: Location.default) + end + + # Create a new StringEmbExpr node. + def StringEmbExpr(statements) + StringEmbExpr.new(statements: statements, location: Location.default) + end + + # Create a new StringLiteral node. + def StringLiteral(parts, quote) + StringLiteral.new(parts: parts, quote: quote, location: Location.default) + end + + # Create a new Super node. + def Super(arguments) + Super.new(arguments: arguments, location: Location.default) + end + + # Create a new SymBeg node. + def SymBeg(value) + SymBeg.new(value: value, location: Location.default) + end + + # Create a new SymbolContent node. + def SymbolContent(value) + SymbolContent.new(value: value, location: Location.default) + end + + # Create a new SymbolLiteral node. + def SymbolLiteral(value) + SymbolLiteral.new(value: value, location: Location.default) + end + + # Create a new Symbols node. + def Symbols(beginning, elements) + Symbols.new( + beginning: beginning, + elements: elements, + location: Location.default + ) + end + + # Create a new SymbolsBeg node. + def SymbolsBeg(value) + SymbolsBeg.new(value: value, location: Location.default) + end + + # Create a new TLambda node. + def TLambda(value) + TLambda.new(value: value, location: Location.default) + end + + # Create a new TLamBeg node. + def TLamBeg(value) + TLamBeg.new(value: value, location: Location.default) + end + + # Create a new TopConstField node. + def TopConstField(constant) + TopConstField.new(constant: constant, location: Location.default) + end + + # Create a new TopConstRef node. + def TopConstRef(constant) + TopConstRef.new(constant: constant, location: Location.default) + end + + # Create a new TStringBeg node. + def TStringBeg(value) + TStringBeg.new(value: value, location: Location.default) + end + + # Create a new TStringContent node. + def TStringContent(value) + TStringContent.new(value: value, location: Location.default) + end + + # Create a new TStringEnd node. + def TStringEnd(value) + TStringEnd.new(value: value, location: Location.default) + end + + # Create a new Not node. + def Not(statement, parentheses) + Not.new( + statement: statement, + parentheses: parentheses, + location: Location.default + ) + end + + # Create a new Unary node. + def Unary(operator, statement) + Unary.new( + operator: operator, + statement: statement, + location: Location.default + ) + end + + # Create a new Undef node. + def Undef(symbols) + Undef.new(symbols: symbols, location: Location.default) + end + + # Create a new UnlessNode node. + def UnlessNode(predicate, statements, consequent) + UnlessNode.new( + predicate: predicate, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new UntilNode node. + def UntilNode(predicate, statements) + UntilNode.new( + predicate: predicate, + statements: statements, + location: Location.default + ) + end + + # Create a new VarField node. + def VarField(value) + VarField.new(value: value, location: Location.default) + end + + # Create a new VarRef node. + def VarRef(value) + VarRef.new(value: value, location: Location.default) + end + + # Create a new PinnedVarRef node. + def PinnedVarRef(value) + PinnedVarRef.new(value: value, location: Location.default) + end + + # Create a new VCall node. + def VCall(value) + VCall.new(value: value, location: Location.default) + end + + # Create a new VoidStmt node. + def VoidStmt + VoidStmt.new(location: Location.default) + end + + # Create a new When node. + def When(arguments, statements, consequent) + When.new( + arguments: arguments, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new WhileNode node. + def WhileNode(predicate, statements) + WhileNode.new( + predicate: predicate, + statements: statements, + location: Location.default + ) + end + + # Create a new Word node. + def Word(parts) + Word.new(parts: parts, location: Location.default) + end + + # Create a new Words node. + def Words(beginning, elements) + Words.new( + beginning: beginning, + elements: elements, + location: Location.default + ) + end + + # Create a new WordsBeg node. + def WordsBeg(value) + WordsBeg.new(value: value, location: Location.default) + end + + # Create a new XString node. + def XString(parts) + XString.new(parts: parts, location: Location.default) + end + + # Create a new XStringLiteral node. + def XStringLiteral(parts) + XStringLiteral.new(parts: parts, location: Location.default) + end + + # Create a new YieldNode node. + def YieldNode(arguments) + YieldNode.new(arguments: arguments, location: Location.default) + end + + # Create a new ZSuper node. + def ZSuper + ZSuper.new(location: Location.default) + end + end +end diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb deleted file mode 100644 index 82155d37..00000000 --- a/lib/syntax_tree/visitor/compiler.rb +++ /dev/null @@ -1,2719 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class Visitor - # This class is an experiment in transforming Syntax Tree nodes into their - # corresponding YARV instruction sequences. It attempts to mirror the - # behavior of RubyVM::InstructionSequence.compile. - # - # You use this as with any other visitor. First you parse code into a tree, - # then you visit it with this compiler. Visiting the root node of the tree - # will return a SyntaxTree::Visitor::Compiler::InstructionSequence object. - # With that object you can call #to_a on it, which will return a serialized - # form of the instruction sequence as an array. This array _should_ mirror - # the array given by RubyVM::InstructionSequence#to_a. - # - # As an example, here is how you would compile a single expression: - # - # program = SyntaxTree.parse("1 + 2") - # program.accept(SyntaxTree::Visitor::Compiler.new).to_a - # - # [ - # "YARVInstructionSequence/SimpleDataFormat", - # 3, - # 1, - # 1, - # {:arg_size=>0, :local_size=>0, :stack_max=>2}, - # "", - # "", - # "", - # 1, - # :top, - # [], - # {}, - # [], - # [ - # [:putobject_INT2FIX_1_], - # [:putobject, 2], - # [:opt_plus, {:mid=>:+, :flag=>16, :orig_argc=>1}], - # [:leave] - # ] - # ] - # - # Note that this is the same output as calling: - # - # RubyVM::InstructionSequence.compile("1 + 2").to_a - # - class Compiler < BasicVisitor - # This visitor is responsible for converting Syntax Tree nodes into their - # corresponding Ruby structures. This is used to convert the operands of - # some instructions like putobject that push a Ruby object directly onto - # the stack. It is only used when the entire structure can be represented - # at compile-time, as opposed to constructed at run-time. - class RubyVisitor < BasicVisitor - # This error is raised whenever a node cannot be converted into a Ruby - # object at compile-time. - class CompilationError < StandardError - end - - # This will attempt to compile the given node. If it's possible, then - # it will return the compiled object. Otherwise it will return nil. - def self.compile(node) - node.accept(new) - rescue CompilationError - end - - def visit_array(node) - visit_all(node.contents.parts) - end - - def visit_bare_assoc_hash(node) - node.assocs.to_h do |assoc| - # We can only convert regular key-value pairs. A double splat ** - # operator means it has to be converted at run-time. - raise CompilationError unless assoc.is_a?(Assoc) - [visit(assoc.key), visit(assoc.value)] - end - end - - def visit_float(node) - node.value.to_f - end - - alias visit_hash visit_bare_assoc_hash - - def visit_imaginary(node) - node.value.to_c - end - - def visit_int(node) - node.value.to_i - end - - def visit_label(node) - node.value.chomp(":").to_sym - end - - def visit_mrhs(node) - visit_all(node.parts) - end - - def visit_qsymbols(node) - node.elements.map { |element| visit(element).to_sym } - end - - def visit_qwords(node) - visit_all(node.elements) - end - - def visit_range(node) - left, right = [visit(node.left), visit(node.right)] - node.operator.value === ".." ? left..right : left...right - end - - def visit_rational(node) - node.value.to_r - end - - def visit_regexp_literal(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - Regexp.new(node.parts.first.value, visit_regexp_literal_flags(node)) - else - # Any interpolation of expressions or variables will result in the - # regular expression being constructed at run-time. - raise CompilationError - end - end - - # This isn't actually a visit method, though maybe it should be. It is - # responsible for converting the set of string options on a regular - # expression into its equivalent integer. - def visit_regexp_literal_flags(node) - node - .options - .chars - .inject(0) do |accum, option| - accum | - case option - when "i" - Regexp::IGNORECASE - when "x" - Regexp::EXTENDED - when "m" - Regexp::MULTILINE - else - raise "Unknown regexp option: #{option}" - end - end - end - - def visit_symbol_literal(node) - node.value.value.to_sym - end - - def visit_symbols(node) - node.elements.map { |element| visit(element).to_sym } - end - - def visit_tstring_content(node) - node.value - end - - def visit_word(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - node.parts.first.value - else - # Any interpolation of expressions or variables will result in the - # string being constructed at run-time. - raise CompilationError - end - end - - def visit_words(node) - visit_all(node.elements) - end - - def visit_unsupported(_node) - raise CompilationError - end - - # Please forgive the metaprogramming here. This is used to create visit - # methods for every node that we did not explicitly handle. By default - # each of these methods will raise a CompilationError. - handled = instance_methods(false) - (Visitor.instance_methods(false) - handled).each do |method| - alias_method method, :visit_unsupported - end - end - - # This object is used to track the size of the stack at any given time. It - # is effectively a mini symbolic interpreter. It's necessary because when - # instruction sequences get serialized they include a :stack_max field on - # them. This field is used to determine how much stack space to allocate - # for the instruction sequence. - class Stack - attr_reader :current_size, :maximum_size - - def initialize - @current_size = 0 - @maximum_size = 0 - end - - def change_by(value) - @current_size += value - @maximum_size = @current_size if @current_size > @maximum_size - end - end - - # This represents every local variable associated with an instruction - # sequence. There are two kinds of locals: plain locals that are what you - # expect, and block proxy locals, which represent local variables - # associated with blocks that were passed into the current instruction - # sequence. - class LocalTable - # A local representing a block passed into the current instruction - # sequence. - class BlockLocal - attr_reader :name - - def initialize(name) - @name = name - end - end - - # A regular local variable. - class PlainLocal - attr_reader :name - - def initialize(name) - @name = name - end - end - - # The result of looking up a local variable in the current local table. - class Lookup - attr_reader :local, :index, :level - - def initialize(local, index, level) - @local = local - @index = index - @level = level - end - end - - attr_reader :locals - - def initialize - @locals = [] - end - - def find(name, level) - index = locals.index { |local| local.name == name } - Lookup.new(locals[index], index, level) if index - end - - def has?(name) - locals.any? { |local| local.name == name } - end - - def names - locals.map(&:name) - end - - def size - locals.length - end - - # Add a BlockLocal to the local table. - def block(name) - locals << BlockLocal.new(name) unless has?(name) - end - - # Add a PlainLocal to the local table. - def plain(name) - locals << PlainLocal.new(name) unless has?(name) - end - - # This is the offset from the top of the stack where this local variable - # lives. - def offset(index) - size - (index - 3) - 1 - end - end - - # This class is meant to mirror RubyVM::InstructionSequence. It contains a - # list of instructions along with the metadata pertaining to them. It also - # functions as a builder for the instruction sequence. - class InstructionSequence - MAGIC = "YARVInstructionSequence/SimpleDataFormat" - - # The type of the instruction sequence. - attr_reader :type - - # The name of the instruction sequence. - attr_reader :name - - # The parent instruction sequence, if there is one. - attr_reader :parent_iseq - - # The location of the root node of this instruction sequence. - attr_reader :location - - # This is the list of information about the arguments to this - # instruction sequence. - attr_accessor :argument_size - attr_reader :argument_options - - # The list of instructions for this instruction sequence. - attr_reader :insns - - # The table of local variables. - attr_reader :local_table - - # The hash of names of instance and class variables pointing to the - # index of their associated inline storage. - attr_reader :inline_storages - - # The index of the next inline storage that will be created. - attr_reader :storage_index - - # An object that will track the current size of the stack and the - # maximum size of the stack for this instruction sequence. - attr_reader :stack - - def initialize(type, name, parent_iseq, location) - @type = type - @name = name - @parent_iseq = parent_iseq - @location = location - - @argument_size = 0 - @argument_options = {} - - @local_table = LocalTable.new - @inline_storages = {} - @insns = [] - @storage_index = 0 - @stack = Stack.new - end - - def local_variable(name, level = 0) - if (lookup = local_table.find(name, level)) - lookup - elsif parent_iseq - parent_iseq.local_variable(name, level + 1) - end - end - - def push(insn) - insns << insn - insn - end - - def inline_storage - storage = storage_index - @storage_index += 1 - storage - end - - def inline_storage_for(name) - unless inline_storages.key?(name) - inline_storages[name] = inline_storage - end - - inline_storages[name] - end - - def length - insns.inject(0) do |sum, insn| - insn.is_a?(Array) ? sum + insn.length : sum - end - end - - def each_child - insns.each do |insn| - insn[1..].each do |operand| - yield operand if operand.is_a?(InstructionSequence) - end - end - end - - def to_a - versions = RUBY_VERSION.split(".").map(&:to_i) - - [ - MAGIC, - versions[0], - versions[1], - 1, - { - arg_size: argument_size, - local_size: local_table.size, - stack_max: stack.maximum_size - }, - name, - "", - "", - location.start_line, - type, - local_table.names, - argument_options, - [], - insns.map { |insn| serialize(insn) } - ] - end - - private - - def serialize(insn) - case insn[0] - when :checkkeyword, :getblockparam, :getblockparamproxy, - :getlocal_WC_0, :getlocal_WC_1, :getlocal, :setlocal_WC_0, - :setlocal_WC_1, :setlocal - iseq = self - - case insn[0] - when :getlocal_WC_1, :setlocal_WC_1 - iseq = iseq.parent_iseq - when :getblockparam, :getblockparamproxy, :getlocal, :setlocal - insn[2].times { iseq = iseq.parent_iseq } - end - - # Here we need to map the local variable index to the offset - # from the top of the stack where it will be stored. - [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] - when :defineclass - [insn[0], insn[1], insn[2].to_a, insn[3]] - when :definemethod, :definesmethod - [insn[0], insn[1], insn[2].to_a] - when :send - # For any instructions that push instruction sequences onto the - # stack, we need to call #to_a on them as well. - [insn[0], insn[1], (insn[2].to_a if insn[2])] - when :once - [insn[0], insn[1].to_a, insn[2]] - else - insn - end - end - end - - # This class serves as a layer of indirection between the instruction - # sequence and the compiler. It allows us to provide different behavior - # for certain instructions depending on the Ruby version. For example, - # class variable reads and writes gained an inline cache in Ruby 3.0. So - # we place the logic for checking the Ruby version in this class. - class Builder - attr_reader :iseq, :stack - attr_reader :frozen_string_literal, - :operands_unification, - :specialized_instruction - - def initialize( - iseq, - frozen_string_literal: false, - operands_unification: true, - specialized_instruction: true - ) - @iseq = iseq - @stack = iseq.stack - - @frozen_string_literal = frozen_string_literal - @operands_unification = operands_unification - @specialized_instruction = specialized_instruction - end - - # This creates a new label at the current length of the instruction - # sequence. It is used as the operand for jump instructions. - def label - name = :"label_#{iseq.length}" - iseq.insns.last == name ? name : event(name) - end - - def event(name) - iseq.push(name) - name - end - - def adjuststack(number) - stack.change_by(-number) - iseq.push([:adjuststack, number]) - end - - def anytostring - stack.change_by(-2 + 1) - iseq.push([:anytostring]) - end - - def branchif(index) - stack.change_by(-1) - iseq.push([:branchif, index]) - end - - def branchnil(index) - stack.change_by(-1) - iseq.push([:branchnil, index]) - end - - def branchunless(index) - stack.change_by(-1) - iseq.push([:branchunless, index]) - end - - def checkkeyword(index, keyword_index) - stack.change_by(+1) - iseq.push([:checkkeyword, index, keyword_index]) - end - - def concatarray - stack.change_by(-2 + 1) - iseq.push([:concatarray]) - end - - def concatstrings(number) - stack.change_by(-number + 1) - iseq.push([:concatstrings, number]) - end - - def defined(type, name, message) - stack.change_by(-1 + 1) - iseq.push([:defined, type, name, message]) - end - - def defineclass(name, class_iseq, flags) - stack.change_by(-2 + 1) - iseq.push([:defineclass, name, class_iseq, flags]) - end - - def definemethod(name, method_iseq) - stack.change_by(0) - iseq.push([:definemethod, name, method_iseq]) - end - - def definesmethod(name, method_iseq) - stack.change_by(-1) - iseq.push([:definesmethod, name, method_iseq]) - end - - def dup - stack.change_by(-1 + 2) - iseq.push([:dup]) - end - - def duparray(object) - stack.change_by(+1) - iseq.push([:duparray, object]) - end - - def duphash(object) - stack.change_by(+1) - iseq.push([:duphash, object]) - end - - def dupn(number) - stack.change_by(+number) - iseq.push([:dupn, number]) - end - - def expandarray(length, flag) - stack.change_by(-1 + length) - iseq.push([:expandarray, length, flag]) - end - - def getblockparam(index, level) - stack.change_by(+1) - iseq.push([:getblockparam, index, level]) - end - - def getblockparamproxy(index, level) - stack.change_by(+1) - iseq.push([:getblockparamproxy, index, level]) - end - - def getclassvariable(name) - stack.change_by(+1) - - if RUBY_VERSION >= "3.0" - iseq.push([:getclassvariable, name, iseq.inline_storage_for(name)]) - else - iseq.push([:getclassvariable, name]) - end - end - - def getconstant(name) - stack.change_by(-2 + 1) - iseq.push([:getconstant, name]) - end - - def getglobal(name) - stack.change_by(+1) - iseq.push([:getglobal, name]) - end - - def getinstancevariable(name) - stack.change_by(+1) - - if RUBY_VERSION >= "3.2" - iseq.push([:getinstancevariable, name, iseq.inline_storage]) - else - inline_storage = iseq.inline_storage_for(name) - iseq.push([:getinstancevariable, name, inline_storage]) - end - end - - def getlocal(index, level) - stack.change_by(+1) - - if operands_unification - # Specialize the getlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will look at the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - iseq.push([:getlocal_WC_0, index]) - when 1 - iseq.push([:getlocal_WC_1, index]) - else - iseq.push([:getlocal, index, level]) - end - else - iseq.push([:getlocal, index, level]) - end - end - - def getspecial(key, type) - stack.change_by(-0 + 1) - iseq.push([:getspecial, key, type]) - end - - def intern - stack.change_by(-1 + 1) - iseq.push([:intern]) - end - - def invokeblock(method_id, argc, flag) - stack.change_by(-argc + 1) - iseq.push([:invokeblock, call_data(method_id, argc, flag)]) - end - - def invokesuper(method_id, argc, flag, block_iseq) - stack.change_by(-(argc + 1) + 1) - - cdata = call_data(method_id, argc, flag) - iseq.push([:invokesuper, cdata, block_iseq]) - end - - def jump(index) - stack.change_by(0) - iseq.push([:jump, index]) - end - - def leave - stack.change_by(-1) - iseq.push([:leave]) - end - - def newarray(length) - stack.change_by(-length + 1) - iseq.push([:newarray, length]) - end - - def newhash(length) - stack.change_by(-length + 1) - iseq.push([:newhash, length]) - end - - def newrange(flag) - stack.change_by(-2 + 1) - iseq.push([:newrange, flag]) - end - - def nop - stack.change_by(0) - iseq.push([:nop]) - end - - def objtostring(method_id, argc, flag) - stack.change_by(-1 + 1) - iseq.push([:objtostring, call_data(method_id, argc, flag)]) - end - - def once(postexe_iseq, inline_storage) - stack.change_by(+1) - iseq.push([:once, postexe_iseq, inline_storage]) - end - - def opt_getconstant_path(names) - if RUBY_VERSION >= "3.2" - stack.change_by(+1) - iseq.push([:opt_getconstant_path, names]) - else - inline_storage = iseq.inline_storage - getinlinecache = opt_getinlinecache(-1, inline_storage) - - if names[0] == :"" - names.shift - pop - putobject(Object) - end - - names.each_with_index do |name, index| - putobject(index == 0) - getconstant(name) - end - - opt_setinlinecache(inline_storage) - getinlinecache[1] = label - end - end - - def opt_getinlinecache(offset, inline_storage) - stack.change_by(+1) - iseq.push([:opt_getinlinecache, offset, inline_storage]) - end - - def opt_newarray_max(length) - if specialized_instruction - stack.change_by(-length + 1) - iseq.push([:opt_newarray_max, length]) - else - newarray(length) - send(:max, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def opt_newarray_min(length) - if specialized_instruction - stack.change_by(-length + 1) - iseq.push([:opt_newarray_min, length]) - else - newarray(length) - send(:min, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def opt_setinlinecache(inline_storage) - stack.change_by(-1 + 1) - iseq.push([:opt_setinlinecache, inline_storage]) - end - - def opt_str_freeze(value) - if specialized_instruction - stack.change_by(+1) - iseq.push( - [ - :opt_str_freeze, - value, - call_data(:freeze, 0, VM_CALL_ARGS_SIMPLE) - ] - ) - else - putstring(value) - send(:freeze, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def opt_str_uminus(value) - if specialized_instruction - stack.change_by(+1) - iseq.push( - [:opt_str_uminus, value, call_data(:-@, 0, VM_CALL_ARGS_SIMPLE)] - ) - else - putstring(value) - send(:-@, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def pop - stack.change_by(-1) - iseq.push([:pop]) - end - - def putnil - stack.change_by(+1) - iseq.push([:putnil]) - end - - def putobject(object) - stack.change_by(+1) - - if operands_unification - # Specialize the putobject instruction based on the value of the - # object. If it's 0 or 1, then there's a specialized instruction - # that will push the object onto the stack and requires fewer - # operands. - if object.eql?(0) - iseq.push([:putobject_INT2FIX_0_]) - elsif object.eql?(1) - iseq.push([:putobject_INT2FIX_1_]) - else - iseq.push([:putobject, object]) - end - else - iseq.push([:putobject, object]) - end - end - - def putself - stack.change_by(+1) - iseq.push([:putself]) - end - - def putspecialobject(object) - stack.change_by(+1) - iseq.push([:putspecialobject, object]) - end - - def putstring(object) - stack.change_by(+1) - iseq.push([:putstring, object]) - end - - def send(method_id, argc, flag, block_iseq = nil) - stack.change_by(-(argc + 1) + 1) - cdata = call_data(method_id, argc, flag) - - if specialized_instruction - # Specialize the send instruction. If it doesn't have a block - # attached, then we will replace it with an opt_send_without_block - # and do further specializations based on the called method and the - # number of arguments. - - # stree-ignore - if !block_iseq && (flag & VM_CALL_ARGS_BLOCKARG) == 0 - case [method_id, argc] - when [:length, 0] then iseq.push([:opt_length, cdata]) - when [:size, 0] then iseq.push([:opt_size, cdata]) - when [:empty?, 0] then iseq.push([:opt_empty_p, cdata]) - when [:nil?, 0] then iseq.push([:opt_nil_p, cdata]) - when [:succ, 0] then iseq.push([:opt_succ, cdata]) - when [:!, 0] then iseq.push([:opt_not, cdata]) - when [:+, 1] then iseq.push([:opt_plus, cdata]) - when [:-, 1] then iseq.push([:opt_minus, cdata]) - when [:*, 1] then iseq.push([:opt_mult, cdata]) - when [:/, 1] then iseq.push([:opt_div, cdata]) - when [:%, 1] then iseq.push([:opt_mod, cdata]) - when [:==, 1] then iseq.push([:opt_eq, cdata]) - when [:=~, 1] then iseq.push([:opt_regexpmatch2, cdata]) - when [:<, 1] then iseq.push([:opt_lt, cdata]) - when [:<=, 1] then iseq.push([:opt_le, cdata]) - when [:>, 1] then iseq.push([:opt_gt, cdata]) - when [:>=, 1] then iseq.push([:opt_ge, cdata]) - when [:<<, 1] then iseq.push([:opt_ltlt, cdata]) - when [:[], 1] then iseq.push([:opt_aref, cdata]) - when [:&, 1] then iseq.push([:opt_and, cdata]) - when [:|, 1] then iseq.push([:opt_or, cdata]) - when [:[]=, 2] then iseq.push([:opt_aset, cdata]) - when [:!=, 1] - eql_data = call_data(:==, 1, VM_CALL_ARGS_SIMPLE) - iseq.push([:opt_neq, eql_data, cdata]) - else - iseq.push([:opt_send_without_block, cdata]) - end - else - iseq.push([:send, cdata, block_iseq]) - end - else - iseq.push([:send, cdata, block_iseq]) - end - end - - def setclassvariable(name) - stack.change_by(-1) - - if RUBY_VERSION >= "3.0" - iseq.push([:setclassvariable, name, iseq.inline_storage_for(name)]) - else - iseq.push([:setclassvariable, name]) - end - end - - def setconstant(name) - stack.change_by(-2) - iseq.push([:setconstant, name]) - end - - def setglobal(name) - stack.change_by(-1) - iseq.push([:setglobal, name]) - end - - def setinstancevariable(name) - stack.change_by(-1) - - if RUBY_VERSION >= "3.2" - iseq.push([:setinstancevariable, name, iseq.inline_storage]) - else - inline_storage = iseq.inline_storage_for(name) - iseq.push([:setinstancevariable, name, inline_storage]) - end - end - - def setlocal(index, level) - stack.change_by(-1) - - if operands_unification - # Specialize the setlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will write to the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - iseq.push([:setlocal_WC_0, index]) - when 1 - iseq.push([:setlocal_WC_1, index]) - else - iseq.push([:setlocal, index, level]) - end - else - iseq.push([:setlocal, index, level]) - end - end - - def setn(number) - stack.change_by(-1 + 1) - iseq.push([:setn, number]) - end - - def splatarray(flag) - stack.change_by(-1 + 1) - iseq.push([:splatarray, flag]) - end - - def swap - stack.change_by(-2 + 2) - iseq.push([:swap]) - end - - def topn(number) - stack.change_by(+1) - iseq.push([:topn, number]) - end - - def toregexp(options, length) - stack.change_by(-length + 1) - iseq.push([:toregexp, options, length]) - end - - private - - # This creates a call data object that is used as the operand for the - # send, invokesuper, and objtostring instructions. - def call_data(method_id, argc, flag) - { mid: method_id, flag: flag, orig_argc: argc } - end - end - - # These constants correspond to the putspecialobject instruction. They are - # used to represent special objects that are pushed onto the stack. - VM_SPECIAL_OBJECT_VMCORE = 1 - VM_SPECIAL_OBJECT_CBASE = 2 - VM_SPECIAL_OBJECT_CONST_BASE = 3 - - # These constants correspond to the flag passed as part of the call data - # structure on the send instruction. They are used to represent various - # metadata about the callsite (e.g., were keyword arguments used?, was a - # block given?, etc.). - VM_CALL_ARGS_SPLAT = 1 << 0 - VM_CALL_ARGS_BLOCKARG = 1 << 1 - VM_CALL_FCALL = 1 << 2 - VM_CALL_VCALL = 1 << 3 - VM_CALL_ARGS_SIMPLE = 1 << 4 - VM_CALL_BLOCKISEQ = 1 << 5 - VM_CALL_KWARG = 1 << 6 - VM_CALL_KW_SPLAT = 1 << 7 - VM_CALL_TAILCALL = 1 << 8 - VM_CALL_SUPER = 1 << 9 - VM_CALL_ZSUPER = 1 << 10 - VM_CALL_OPT_SEND = 1 << 11 - VM_CALL_KW_SPLAT_MUT = 1 << 12 - - # These constants correspond to the value passed as part of the defined - # instruction. It's an enum defined in the CRuby codebase that tells that - # instruction what kind of defined check to perform. - DEFINED_NIL = 1 - DEFINED_IVAR = 2 - DEFINED_LVAR = 3 - DEFINED_GVAR = 4 - DEFINED_CVAR = 5 - DEFINED_CONST = 6 - DEFINED_METHOD = 7 - DEFINED_YIELD = 8 - DEFINED_ZSUPER = 9 - DEFINED_SELF = 10 - DEFINED_TRUE = 11 - DEFINED_FALSE = 12 - DEFINED_ASGN = 13 - DEFINED_EXPR = 14 - DEFINED_REF = 15 - DEFINED_FUNC = 16 - DEFINED_CONST_FROM = 17 - - # These constants correspond to the value passed in the flags as part of - # the defineclass instruction. - VM_DEFINECLASS_TYPE_CLASS = 0 - VM_DEFINECLASS_TYPE_SINGLETON_CLASS = 1 - VM_DEFINECLASS_TYPE_MODULE = 2 - VM_DEFINECLASS_FLAG_SCOPED = 8 - VM_DEFINECLASS_FLAG_HAS_SUPERCLASS = 16 - - # These options mirror the compilation options that we currently support - # that can be also passed to RubyVM::InstructionSequence.compile. - attr_reader :frozen_string_literal, - :operands_unification, - :specialized_instruction - - # The current instruction sequence that is being compiled. - attr_reader :current_iseq - - # This is the current builder that is being used to construct the current - # instruction sequence. - attr_reader :builder - - # A boolean to track if we're currently compiling the last statement - # within a set of statements. This information is necessary to determine - # if we need to return the value of the last statement. - attr_reader :last_statement - - def initialize( - frozen_string_literal: false, - operands_unification: true, - specialized_instruction: true - ) - @frozen_string_literal = frozen_string_literal - @operands_unification = operands_unification - @specialized_instruction = specialized_instruction - - @current_iseq = nil - @builder = nil - @last_statement = false - end - - def visit_BEGIN(node) - visit(node.statements) - end - - def visit_CHAR(node) - if frozen_string_literal - builder.putobject(node.value[1..]) - else - builder.putstring(node.value[1..]) - end - end - - def visit_END(node) - name = "block in #{current_iseq.name}" - once_iseq = - with_instruction_sequence(:block, name, current_iseq, node) do - postexe_iseq = - with_instruction_sequence(:block, name, current_iseq, node) do - *statements, last_statement = node.statements.body - visit_all(statements) - with_last_statement { visit(last_statement) } - builder.leave - end - - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.send(:"core#set_postexe", 0, VM_CALL_FCALL, postexe_iseq) - builder.leave - end - - builder.once(once_iseq, current_iseq.inline_storage) - builder.pop - end - - def visit_alias(node) - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.putspecialobject(VM_SPECIAL_OBJECT_CBASE) - visit(node.left) - visit(node.right) - builder.send(:"core#set_method_alias", 3, VM_CALL_ARGS_SIMPLE) - end - - def visit_aref(node) - visit(node.collection) - visit(node.index) - builder.send(:[], 1, VM_CALL_ARGS_SIMPLE) - end - - def visit_arg_block(node) - visit(node.value) - end - - def visit_arg_paren(node) - visit(node.arguments) - end - - def visit_arg_star(node) - visit(node.value) - builder.splatarray(false) - end - - def visit_args(node) - visit_all(node.parts) - end - - def visit_array(node) - if (compiled = RubyVisitor.compile(node)) - builder.duparray(compiled) - else - length = 0 - - node.contents.parts.each do |part| - if part.is_a?(ArgStar) - if length > 0 - builder.newarray(length) - length = 0 - end - - visit(part.value) - builder.concatarray - else - visit(part) - length += 1 - end - end - - builder.newarray(length) if length > 0 - if length > 0 && length != node.contents.parts.length - builder.concatarray - end - end - end - - def visit_assign(node) - case node.target - when ARefField - builder.putnil - visit(node.target.collection) - visit(node.target.index) - visit(node.value) - builder.setn(3) - builder.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) - builder.pop - when ConstPathField - names = constant_names(node.target) - name = names.pop - - if RUBY_VERSION >= "3.2" - builder.opt_getconstant_path(names) - visit(node.value) - builder.swap - builder.topn(1) - builder.swap - builder.setconstant(name) - else - visit(node.value) - builder.dup if last_statement? - builder.opt_getconstant_path(names) - builder.setconstant(name) - end - when Field - builder.putnil - visit(node.target) - visit(node.value) - builder.setn(2) - builder.send(:"#{node.target.name.value}=", 1, VM_CALL_ARGS_SIMPLE) - builder.pop - when TopConstField - name = node.target.constant.value.to_sym - - if RUBY_VERSION >= "3.2" - builder.putobject(Object) - visit(node.value) - builder.swap - builder.topn(1) - builder.swap - builder.setconstant(name) - else - visit(node.value) - builder.dup if last_statement? - builder.putobject(Object) - builder.setconstant(name) - end - when VarField - visit(node.value) - builder.dup if last_statement? - - case node.target.value - when Const - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) - builder.setconstant(node.target.value.value.to_sym) - when CVar - builder.setclassvariable(node.target.value.value.to_sym) - when GVar - builder.setglobal(node.target.value.value.to_sym) - when Ident - local_variable = visit(node.target) - builder.setlocal(local_variable.index, local_variable.level) - when IVar - builder.setinstancevariable(node.target.value.value.to_sym) - end - end - end - - def visit_assoc(node) - visit(node.key) - visit(node.value) - end - - def visit_assoc_splat(node) - visit(node.value) - end - - def visit_backref(node) - builder.getspecial(1, 2 * node.value[1..].to_i) - end - - def visit_bare_assoc_hash(node) - if (compiled = RubyVisitor.compile(node)) - builder.duphash(compiled) - else - visit_all(node.assocs) - end - end - - def visit_binary(node) - case node.operator - when :"&&" - visit(node.left) - builder.dup - - branchunless = builder.branchunless(-1) - builder.pop - - visit(node.right) - branchunless[1] = builder.label - when :"||" - visit(node.left) - builder.dup - - branchif = builder.branchif(-1) - builder.pop - - visit(node.right) - branchif[1] = builder.label - else - visit(node.left) - visit(node.right) - builder.send(node.operator, 1, VM_CALL_ARGS_SIMPLE) - end - end - - def visit_block(node) - with_instruction_sequence( - :block, - "block in #{current_iseq.name}", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_B_CALL) - visit(node.block_var) - visit(node.bodystmt) - builder.event(:RUBY_EVENT_B_RETURN) - builder.leave - end - end - - def visit_block_var(node) - params = node.params - - if params.requireds.length == 1 && params.optionals.empty? && - !params.rest && params.posts.empty? && params.keywords.empty? && - !params.keyword_rest && !params.block - current_iseq.argument_options[:ambiguous_param0] = true - end - - visit(node.params) - - node.locals.each do |local| - current_iseq.local_table.plain(local.value.to_sym) - end - end - - def visit_blockarg(node) - current_iseq.argument_options[:block_start] = current_iseq.argument_size - current_iseq.local_table.block(node.name.value.to_sym) - current_iseq.argument_size += 1 - end - - def visit_bodystmt(node) - visit(node.statements) - end - - def visit_call(node) - if node.is_a?(CallNode) - return( - visit_call( - CommandCall.new( - receiver: node.receiver, - operator: node.operator, - message: node.message, - arguments: node.arguments, - block: nil, - location: node.location - ) - ) - ) - end - - arg_parts = argument_parts(node.arguments) - argc = arg_parts.length - - # First we're going to check if we're calling a method on an array - # literal without any arguments. In that case there are some - # specializations we might be able to perform. - if argc == 0 && (node.message.is_a?(Ident) || node.message.is_a?(Op)) - case node.receiver - when ArrayLiteral - parts = node.receiver.contents&.parts || [] - - if parts.none? { |part| part.is_a?(ArgStar) } && - RubyVisitor.compile(node.receiver).nil? - case node.message.value - when "max" - visit(node.receiver.contents) - builder.opt_newarray_max(parts.length) - return - when "min" - visit(node.receiver.contents) - builder.opt_newarray_min(parts.length) - return - end - end - when StringLiteral - if RubyVisitor.compile(node.receiver).nil? - case node.message.value - when "-@" - builder.opt_str_uminus(node.receiver.parts.first.value) - return - when "freeze" - builder.opt_str_freeze(node.receiver.parts.first.value) - return - end - end - end - end - - if node.receiver - if node.receiver.is_a?(VarRef) && - ( - lookup = - current_iseq.local_variable(node.receiver.value.value.to_sym) - ) && lookup.local.is_a?(LocalTable::BlockLocal) - builder.getblockparamproxy(lookup.index, lookup.level) - else - visit(node.receiver) - end - else - builder.putself - end - - branchnil = - if node.operator&.value == "&." - builder.dup - builder.branchnil(-1) - end - - flag = 0 - - arg_parts.each do |arg_part| - case arg_part - when ArgBlock - argc -= 1 - flag |= VM_CALL_ARGS_BLOCKARG - visit(arg_part) - when ArgStar - flag |= VM_CALL_ARGS_SPLAT - visit(arg_part) - when ArgsForward - flag |= VM_CALL_ARGS_SPLAT | VM_CALL_ARGS_BLOCKARG - - lookup = current_iseq.local_table.find(:*, 0) - builder.getlocal(lookup.index, lookup.level) - builder.splatarray(arg_parts.length != 1) - - lookup = current_iseq.local_table.find(:&, 0) - builder.getblockparamproxy(lookup.index, lookup.level) - when BareAssocHash - flag |= VM_CALL_KW_SPLAT - visit(arg_part) - else - visit(arg_part) - end - end - - block_iseq = visit(node.block) if node.block - flag |= VM_CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 - flag |= VM_CALL_FCALL if node.receiver.nil? - - builder.send(node.message.value.to_sym, argc, flag, block_iseq) - branchnil[1] = builder.label if branchnil - end - - def visit_case(node) - visit(node.value) if node.value - - clauses = [] - else_clause = nil - - current = node.consequent - - while current - clauses << current - - if (current = current.consequent).is_a?(Else) - else_clause = current - break - end - end - - branches = - clauses.map do |clause| - visit(clause.arguments) - builder.topn(1) - builder.send(:===, 1, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) - [clause, builder.branchif(:label_00)] - end - - builder.pop - - else_clause ? visit(else_clause) : builder.putnil - - builder.leave - - branches.each_with_index do |(clause, branchif), index| - builder.leave if index != 0 - branchif[1] = builder.label - builder.pop - visit(clause) - end - end - - def visit_class(node) - name = node.constant.constant.value.to_sym - class_iseq = - with_instruction_sequence( - :class, - "", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - builder.event(:RUBY_EVENT_END) - builder.leave - end - - flags = VM_DEFINECLASS_TYPE_CLASS - - case node.constant - when ConstPathRef - flags |= VM_DEFINECLASS_FLAG_SCOPED - visit(node.constant.parent) - when ConstRef - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) - when TopConstRef - flags |= VM_DEFINECLASS_FLAG_SCOPED - builder.putobject(Object) - end - - if node.superclass - flags |= VM_DEFINECLASS_FLAG_HAS_SUPERCLASS - visit(node.superclass) - else - builder.putnil - end - - builder.defineclass(name, class_iseq, flags) - end - - def visit_command(node) - visit_call( - CommandCall.new( - receiver: nil, - operator: nil, - message: node.message, - arguments: node.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_command_call(node) - visit_call( - CommandCall.new( - receiver: node.receiver, - operator: node.operator, - message: node.message, - arguments: node.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_const_path_field(node) - visit(node.parent) - end - - def visit_const_path_ref(node) - names = constant_names(node) - builder.opt_getconstant_path(names) - end - - def visit_def(node) - method_iseq = - with_instruction_sequence( - :method, - node.name.value, - current_iseq, - node - ) do - visit(node.params) if node.params - builder.event(:RUBY_EVENT_CALL) - visit(node.bodystmt) - builder.event(:RUBY_EVENT_RETURN) - builder.leave - end - - name = node.name.value.to_sym - - if node.target - visit(node.target) - builder.definesmethod(name, method_iseq) - else - builder.definemethod(name, method_iseq) - end - - builder.putobject(name) - end - - def visit_defined(node) - case node.value - when Assign - # If we're assigning to a local variable, then we need to make sure - # that we put it into the local table. - if node.value.target.is_a?(VarField) && - node.value.target.value.is_a?(Ident) - current_iseq.local_table.plain(node.value.target.value.value.to_sym) - end - - builder.putobject("assignment") - when VarRef - value = node.value.value - name = value.value.to_sym - - case value - when Const - builder.putnil - builder.defined(DEFINED_CONST, name, "constant") - when CVar - builder.putnil - builder.defined(DEFINED_CVAR, name, "class variable") - when GVar - builder.putnil - builder.defined(DEFINED_GVAR, name, "global-variable") - when Ident - builder.putobject("local-variable") - when IVar - builder.putnil - builder.defined(DEFINED_IVAR, name, "instance-variable") - when Kw - case name - when :false - builder.putobject("false") - when :nil - builder.putobject("nil") - when :self - builder.putobject("self") - when :true - builder.putobject("true") - end - end - when VCall - builder.putself - - name = node.value.value.value.to_sym - builder.defined(DEFINED_FUNC, name, "method") - when YieldNode - builder.putnil - builder.defined(DEFINED_YIELD, false, "yield") - when ZSuper - builder.putnil - builder.defined(DEFINED_ZSUPER, false, "super") - else - builder.putobject("expression") - end - end - - def visit_dyna_symbol(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - builder.putobject(node.parts.first.value.to_sym) - end - end - - def visit_else(node) - visit(node.statements) - builder.pop unless last_statement? - end - - def visit_elsif(node) - visit_if( - IfNode.new( - predicate: node.predicate, - statements: node.statements, - consequent: node.consequent, - location: node.location - ) - ) - end - - def visit_field(node) - visit(node.parent) - end - - def visit_float(node) - builder.putobject(node.accept(RubyVisitor.new)) - end - - def visit_for(node) - visit(node.collection) - - name = node.index.value.value.to_sym - current_iseq.local_table.plain(name) - - block_iseq = - with_instruction_sequence( - :block, - "block in #{current_iseq.name}", - current_iseq, - node.statements - ) do - current_iseq.argument_options[:lead_num] ||= 0 - current_iseq.argument_options[:lead_num] += 1 - current_iseq.argument_options[:ambiguous_param0] = true - - current_iseq.argument_size += 1 - current_iseq.local_table.plain(2) - - builder.getlocal(0, 0) - - local_variable = current_iseq.local_variable(name) - builder.setlocal(local_variable.index, local_variable.level) - - builder.event(:RUBY_EVENT_B_CALL) - builder.nop - - visit(node.statements) - builder.event(:RUBY_EVENT_B_RETURN) - builder.leave - end - - builder.send(:each, 0, 0, block_iseq) - end - - def visit_hash(node) - builder.duphash(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - visit_all(node.assocs) - builder.newhash(node.assocs.length * 2) - end - - def visit_heredoc(node) - if node.beginning.value.end_with?("`") - visit_xstring_literal(node) - elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - builder.concatstrings(length) - end - end - - def visit_if(node) - visit(node.predicate) - branchunless = builder.branchunless(-1) - visit(node.statements) - - if last_statement? - builder.leave - branchunless[1] = builder.label - - node.consequent ? visit(node.consequent) : builder.putnil - else - builder.pop - - if node.consequent - jump = builder.jump(-1) - branchunless[1] = builder.label - visit(node.consequent) - jump[1] = builder.label - else - branchunless[1] = builder.label - end - end - end - - def visit_if_op(node) - visit_if( - IfNode.new( - predicate: node.predicate, - statements: node.truthy, - consequent: - Else.new( - keyword: Kw.new(value: "else", location: Location.default), - statements: node.falsy, - location: Location.default - ), - location: Location.default - ) - ) - end - - def visit_imaginary(node) - builder.putobject(node.accept(RubyVisitor.new)) - end - - def visit_int(node) - builder.putobject(node.accept(RubyVisitor.new)) - end - - def visit_kwrest_param(node) - current_iseq.argument_options[:kwrest] = current_iseq.argument_size - current_iseq.argument_size += 1 - current_iseq.local_table.plain(node.name.value.to_sym) - end - - def visit_label(node) - builder.putobject(node.accept(RubyVisitor.new)) - end - - def visit_lambda(node) - lambda_iseq = - with_instruction_sequence( - :block, - "block in #{current_iseq.name}", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_B_CALL) - visit(node.params) - visit(node.statements) - builder.event(:RUBY_EVENT_B_RETURN) - builder.leave - end - - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.send(:lambda, 0, VM_CALL_FCALL, lambda_iseq) - end - - def visit_lambda_var(node) - visit_block_var(node) - end - - def visit_massign(node) - visit(node.value) - builder.dup - visit(node.target) - end - - def visit_method_add_block(node) - visit_call( - CommandCall.new( - receiver: node.call.receiver, - operator: node.call.operator, - message: node.call.message, - arguments: node.call.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_mlhs(node) - lookups = [] - - node.parts.each do |part| - case part - when VarField - lookups << visit(part) - end - end - - builder.expandarray(lookups.length, 0) - - lookups.each { |lookup| builder.setlocal(lookup.index, lookup.level) } - end - - def visit_module(node) - name = node.constant.constant.value.to_sym - module_iseq = - with_instruction_sequence( - :class, - "", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - builder.event(:RUBY_EVENT_END) - builder.leave - end - - flags = VM_DEFINECLASS_TYPE_MODULE - - case node.constant - when ConstPathRef - flags |= VM_DEFINECLASS_FLAG_SCOPED - visit(node.constant.parent) - when ConstRef - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) - when TopConstRef - flags |= VM_DEFINECLASS_FLAG_SCOPED - builder.putobject(Object) - end - - builder.putnil - builder.defineclass(name, module_iseq, flags) - end - - def visit_mrhs(node) - if (compiled = RubyVisitor.compile(node)) - builder.duparray(compiled) - else - visit_all(node.parts) - builder.newarray(node.parts.length) - end - end - - def visit_not(node) - visit(node.statement) - builder.send(:!, 0, VM_CALL_ARGS_SIMPLE) - end - - def visit_opassign(node) - flag = VM_CALL_ARGS_SIMPLE - if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) - flag |= VM_CALL_FCALL - end - - case (operator = node.operator.value.chomp("=").to_sym) - when :"&&" - branchunless = nil - - with_opassign(node) do - builder.dup - branchunless = builder.branchunless(-1) - builder.pop - visit(node.value) - end - - case node.target - when ARefField - builder.leave - branchunless[1] = builder.label - builder.setn(3) - builder.adjuststack(3) - when ConstPathField, TopConstField - branchunless[1] = builder.label - builder.swap - builder.pop - else - branchunless[1] = builder.label - end - when :"||" - if node.target.is_a?(ConstPathField) || - node.target.is_a?(TopConstField) - opassign_defined(node) - builder.swap - builder.pop - elsif node.target.is_a?(VarField) && - [Const, CVar, GVar].include?(node.target.value.class) - opassign_defined(node) - else - branchif = nil - - with_opassign(node) do - builder.dup - branchif = builder.branchif(-1) - builder.pop - visit(node.value) - end - - if node.target.is_a?(ARefField) - builder.leave - branchif[1] = builder.label - builder.setn(3) - builder.adjuststack(3) - else - branchif[1] = builder.label - end - end - else - with_opassign(node) do - visit(node.value) - builder.send(operator, 1, flag) - end - end - end - - def visit_params(node) - argument_options = current_iseq.argument_options - - if node.requireds.any? - argument_options[:lead_num] = 0 - - node.requireds.each do |required| - current_iseq.local_table.plain(required.value.to_sym) - current_iseq.argument_size += 1 - argument_options[:lead_num] += 1 - end - end - - node.optionals.each do |(optional, value)| - index = current_iseq.local_table.size - name = optional.value.to_sym - - current_iseq.local_table.plain(name) - current_iseq.argument_size += 1 - - unless argument_options.key?(:opt) - argument_options[:opt] = [builder.label] - end - - visit(value) - builder.setlocal(index, 0) - current_iseq.argument_options[:opt] << builder.label - end - - visit(node.rest) if node.rest - - if node.posts.any? - argument_options[:post_start] = current_iseq.argument_size - argument_options[:post_num] = 0 - - node.posts.each do |post| - current_iseq.local_table.plain(post.value.to_sym) - current_iseq.argument_size += 1 - argument_options[:post_num] += 1 - end - end - - if node.keywords.any? - argument_options[:kwbits] = 0 - argument_options[:keyword] = [] - checkkeywords = [] - - node.keywords.each_with_index do |(keyword, value), keyword_index| - name = keyword.value.chomp(":").to_sym - index = current_iseq.local_table.size - - current_iseq.local_table.plain(name) - current_iseq.argument_size += 1 - argument_options[:kwbits] += 1 - - if value.nil? - argument_options[:keyword] << name - else - begin - compiled = value.accept(RubyVisitor.new) - argument_options[:keyword] << [name, compiled] - rescue RubyVisitor::CompilationError - argument_options[:keyword] << [name] - checkkeywords << builder.checkkeyword(-1, keyword_index) - branchif = builder.branchif(-1) - visit(value) - builder.setlocal(index, 0) - branchif[1] = builder.label - end - end - end - - name = node.keyword_rest ? 3 : 2 - current_iseq.argument_size += 1 - current_iseq.local_table.plain(name) - - lookup = current_iseq.local_table.find(name, 0) - checkkeywords.each { |checkkeyword| checkkeyword[1] = lookup.index } - end - - if node.keyword_rest.is_a?(ArgsForward) - current_iseq.local_table.plain(:*) - current_iseq.local_table.plain(:&) - - current_iseq.argument_options[ - :rest_start - ] = current_iseq.argument_size - current_iseq.argument_options[ - :block_start - ] = current_iseq.argument_size + 1 - - current_iseq.argument_size += 2 - elsif node.keyword_rest - visit(node.keyword_rest) - end - - visit(node.block) if node.block - end - - def visit_paren(node) - visit(node.contents) - end - - def visit_program(node) - node.statements.body.each do |statement| - break unless statement.is_a?(Comment) - - if statement.value == "# frozen_string_literal: true" - @frozen_string_literal = true - end - end - - preexes = [] - statements = [] - - node.statements.body.each do |statement| - case statement - when Comment, EmbDoc, EndContent, VoidStmt - # ignore - when BEGINBlock - preexes << statement - else - statements << statement - end - end - - with_instruction_sequence(:top, "", nil, node) do - visit_all(preexes) - - if statements.empty? - builder.putnil - else - *statements, last_statement = statements - visit_all(statements) - with_last_statement { visit(last_statement) } - end - - builder.leave - end - end - - def visit_qsymbols(node) - builder.duparray(node.accept(RubyVisitor.new)) - end - - def visit_qwords(node) - if frozen_string_literal - builder.duparray(node.accept(RubyVisitor.new)) - else - visit_all(node.elements) - builder.newarray(node.elements.length) - end - end - - def visit_range(node) - builder.putobject(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - visit(node.left) - visit(node.right) - builder.newrange(node.operator.value == ".." ? 0 : 1) - end - - def visit_rational(node) - builder.putobject(node.accept(RubyVisitor.new)) - end - - def visit_regexp_literal(node) - builder.putobject(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - flags = RubyVisitor.new.visit_regexp_literal_flags(node) - length = visit_string_parts(node) - builder.toregexp(flags, length) - end - - def visit_rest_param(node) - current_iseq.local_table.plain(node.name.value.to_sym) - current_iseq.argument_options[:rest_start] = current_iseq.argument_size - current_iseq.argument_size += 1 - end - - def visit_sclass(node) - visit(node.target) - builder.putnil - - singleton_iseq = - with_instruction_sequence( - :class, - "singleton class", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - builder.event(:RUBY_EVENT_END) - builder.leave - end - - builder.defineclass( - :singletonclass, - singleton_iseq, - VM_DEFINECLASS_TYPE_SINGLETON_CLASS - ) - end - - def visit_statements(node) - statements = - node.body.select do |statement| - case statement - when Comment, EmbDoc, EndContent, VoidStmt - false - else - true - end - end - - statements.empty? ? builder.putnil : visit_all(statements) - end - - def visit_string_concat(node) - value = node.left.parts.first.value + node.right.parts.first.value - content = TStringContent.new(value: value, location: node.location) - - literal = - StringLiteral.new( - parts: [content], - quote: node.left.quote, - location: node.location - ) - visit_string_literal(literal) - end - - def visit_string_embexpr(node) - visit(node.statements) - end - - def visit_string_literal(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - builder.concatstrings(length) - end - end - - def visit_super(node) - builder.putself - visit(node.arguments) - builder.invokesuper( - nil, - argument_parts(node.arguments).length, - VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE | VM_CALL_SUPER, - nil - ) - end - - def visit_symbol_literal(node) - builder.putobject(node.accept(RubyVisitor.new)) - end - - def visit_symbols(node) - builder.duparray(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - node.elements.each do |element| - if element.parts.length == 1 && - element.parts.first.is_a?(TStringContent) - builder.putobject(element.parts.first.value.to_sym) - else - length = visit_string_parts(element) - builder.concatstrings(length) - builder.intern - end - end - - builder.newarray(node.elements.length) - end - - def visit_top_const_ref(node) - builder.opt_getconstant_path(constant_names(node)) - end - - def visit_tstring_content(node) - if frozen_string_literal - builder.putobject(node.accept(RubyVisitor.new)) - else - builder.putstring(node.accept(RubyVisitor.new)) - end - end - - def visit_unary(node) - method_id = - case node.operator - when "+", "-" - "#{node.operator}@" - else - node.operator - end - - visit_call( - CommandCall.new( - receiver: node.statement, - operator: nil, - message: Ident.new(value: method_id, location: Location.default), - arguments: nil, - block: nil, - location: Location.default - ) - ) - end - - def visit_undef(node) - node.symbols.each_with_index do |symbol, index| - builder.pop if index != 0 - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.putspecialobject(VM_SPECIAL_OBJECT_CBASE) - visit(symbol) - builder.send(:"core#undef_method", 2, VM_CALL_ARGS_SIMPLE) - end - end - - def visit_unless(node) - visit(node.predicate) - branchunless = builder.branchunless(-1) - node.consequent ? visit(node.consequent) : builder.putnil - - if last_statement? - builder.leave - branchunless[1] = builder.label - - visit(node.statements) - else - builder.pop - - if node.consequent - jump = builder.jump(-1) - branchunless[1] = builder.label - visit(node.consequent) - jump[1] = builder.label - else - branchunless[1] = builder.label - end - end - end - - def visit_until(node) - jumps = [] - - jumps << builder.jump(-1) - builder.putnil - builder.pop - jumps << builder.jump(-1) - - label = builder.label - visit(node.statements) - builder.pop - jumps.each { |jump| jump[1] = builder.label } - - visit(node.predicate) - builder.branchunless(label) - builder.putnil if last_statement? - end - - def visit_var_field(node) - case node.value - when CVar, IVar - name = node.value.value.to_sym - current_iseq.inline_storage_for(name) - when Ident - name = node.value.value.to_sym - - if (local_variable = current_iseq.local_variable(name)) - local_variable - else - current_iseq.local_table.plain(name) - current_iseq.local_variable(name) - end - end - end - - def visit_var_ref(node) - case node.value - when Const - builder.opt_getconstant_path(constant_names(node)) - when CVar - name = node.value.value.to_sym - builder.getclassvariable(name) - when GVar - builder.getglobal(node.value.value.to_sym) - when Ident - lookup = current_iseq.local_variable(node.value.value.to_sym) - - case lookup.local - when LocalTable::BlockLocal - builder.getblockparam(lookup.index, lookup.level) - when LocalTable::PlainLocal - builder.getlocal(lookup.index, lookup.level) - end - when IVar - name = node.value.value.to_sym - builder.getinstancevariable(name) - when Kw - case node.value.value - when "false" - builder.putobject(false) - when "nil" - builder.putnil - when "self" - builder.putself - when "true" - builder.putobject(true) - end - end - end - - def visit_vcall(node) - builder.putself - - flag = VM_CALL_FCALL | VM_CALL_VCALL | VM_CALL_ARGS_SIMPLE - builder.send(node.value.value.to_sym, 0, flag) - end - - def visit_when(node) - visit(node.statements) - end - - def visit_while(node) - jumps = [] - - jumps << builder.jump(-1) - builder.putnil - builder.pop - jumps << builder.jump(-1) - - label = builder.label - visit(node.statements) - builder.pop - jumps.each { |jump| jump[1] = builder.label } - - visit(node.predicate) - builder.branchif(label) - builder.putnil if last_statement? - end - - def visit_word(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - builder.concatstrings(length) - end - end - - def visit_words(node) - converted = nil - - if frozen_string_literal - begin - converted = node.accept(RubyVisitor.new) - rescue RubyVisitor::CompilationError - end - end - - if converted - builder.duparray(converted) - else - visit_all(node.elements) - builder.newarray(node.elements.length) - end - end - - def visit_xstring_literal(node) - builder.putself - length = visit_string_parts(node) - builder.concatstrings(node.parts.length) if length > 1 - builder.send(:`, 1, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) - end - - def visit_yield(node) - parts = argument_parts(node.arguments) - visit_all(parts) - builder.invokeblock(nil, parts.length, VM_CALL_ARGS_SIMPLE) - end - - def visit_zsuper(_node) - builder.putself - builder.invokesuper( - nil, - 0, - VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE | VM_CALL_SUPER | VM_CALL_ZSUPER, - nil - ) - end - - private - - # This is a helper that is used in places where arguments may be present - # or they may be wrapped in parentheses. It's meant to descend down the - # tree and return an array of argument nodes. - def argument_parts(node) - case node - when nil - [] - when Args - node.parts - when ArgParen - if node.arguments.is_a?(ArgsForward) - [node.arguments] - else - node.arguments.parts - end - when Paren - node.contents.parts - end - end - - # Constant names when they are being assigned or referenced come in as a - # tree, but it's more convenient to work with them as an array. This - # method converts them into that array. This is nice because it's the - # operand that goes to opt_getconstant_path in Ruby 3.2. - def constant_names(node) - current = node - names = [] - - while current.is_a?(ConstPathField) || current.is_a?(ConstPathRef) - names.unshift(current.constant.value.to_sym) - current = current.parent - end - - case current - when VarField, VarRef - names.unshift(current.value.value.to_sym) - when TopConstRef - names.unshift(current.constant.value.to_sym) - names.unshift(:"") - end - - names - end - - # For the most part when an OpAssign (operator assignment) node with a ||= - # operator is being compiled it's a matter of reading the target, checking - # if the value should be evaluated, evaluating it if so, and then writing - # the result back to the target. - # - # However, in certain kinds of assignments (X, ::X, X::Y, @@x, and $x) we - # first check if the value is defined using the defined instruction. I - # don't know why it is necessary, and suspect that it isn't. - def opassign_defined(node) - case node.target - when ConstPathField - visit(node.target.parent) - name = node.target.constant.value.to_sym - - builder.dup - builder.defined(DEFINED_CONST_FROM, name, true) - when TopConstField - name = node.target.constant.value.to_sym - - builder.putobject(Object) - builder.dup - builder.defined(DEFINED_CONST_FROM, name, true) - when VarField - name = node.target.value.value.to_sym - builder.putnil - - case node.target.value - when Const - builder.defined(DEFINED_CONST, name, true) - when CVar - builder.defined(DEFINED_CVAR, name, true) - when GVar - builder.defined(DEFINED_GVAR, name, true) - end - end - - branchunless = builder.branchunless(-1) - - case node.target - when ConstPathField, TopConstField - builder.dup - builder.putobject(true) - builder.getconstant(name) - when VarField - case node.target.value - when Const - builder.opt_getconstant_path(constant_names(node.target)) - when CVar - builder.getclassvariable(name) - when GVar - builder.getglobal(name) - end - end - - builder.dup - branchif = builder.branchif(-1) - builder.pop - - branchunless[1] = builder.label - visit(node.value) - - case node.target - when ConstPathField, TopConstField - builder.dupn(2) - builder.swap - builder.setconstant(name) - when VarField - builder.dup - - case node.target.value - when Const - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) - builder.setconstant(name) - when CVar - builder.setclassvariable(name) - when GVar - builder.setglobal(name) - end - end - - branchif[1] = builder.label - end - - # Whenever a value is interpolated into a string-like structure, these - # three instructions are pushed. - def push_interpolate - builder.dup - builder.objtostring(:to_s, 0, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) - builder.anytostring - end - - # There are a lot of nodes in the AST that act as contains of parts of - # strings. This includes things like string literals, regular expressions, - # heredocs, etc. This method will visit all the parts of a string within - # those containers. - def visit_string_parts(node) - length = 0 - - unless node.parts.first.is_a?(TStringContent) - builder.putobject("") - length += 1 - end - - node.parts.each do |part| - case part - when StringDVar - visit(part.variable) - push_interpolate - when StringEmbExpr - visit(part) - push_interpolate - when TStringContent - builder.putobject(part.accept(RubyVisitor.new)) - end - - length += 1 - end - - length - end - - # The current instruction sequence that we're compiling is always stored - # on the compiler. When we descend into a node that has its own - # instruction sequence, this method can be called to temporarily set the - # new value of the instruction sequence, yield, and then set it back. - def with_instruction_sequence(type, name, parent_iseq, node) - previous_iseq = current_iseq - previous_builder = builder - - begin - iseq = InstructionSequence.new(type, name, parent_iseq, node.location) - - @current_iseq = iseq - @builder = - Builder.new( - iseq, - frozen_string_literal: frozen_string_literal, - operands_unification: operands_unification, - specialized_instruction: specialized_instruction - ) - - yield - iseq - ensure - @current_iseq = previous_iseq - @builder = previous_builder - end - end - - # When we're compiling the last statement of a set of statements within a - # scope, the instructions sometimes change from pops to leaves. These - # kinds of peephole optimizations can reduce the overall number of - # instructions. Therefore, we keep track of whether we're compiling the - # last statement of a scope and allow visit methods to query that - # information. - def with_last_statement - previous = @last_statement - @last_statement = true - - begin - yield - ensure - @last_statement = previous - end - end - - def last_statement? - @last_statement - end - - # OpAssign nodes can have a number of different kinds of nodes as their - # "target" (i.e., the left-hand side of the assignment). When compiling - # these nodes we typically need to first fetch the current value of the - # variable, then perform some kind of action, then store the result back - # into the variable. This method handles that by first fetching the value, - # then yielding to the block, then storing the result. - def with_opassign(node) - case node.target - when ARefField - builder.putnil - visit(node.target.collection) - visit(node.target.index) - - builder.dupn(2) - builder.send(:[], 1, VM_CALL_ARGS_SIMPLE) - - yield - - builder.setn(3) - builder.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) - builder.pop - when ConstPathField - name = node.target.constant.value.to_sym - - visit(node.target.parent) - builder.dup - builder.putobject(true) - builder.getconstant(name) - - yield - - if node.operator.value == "&&=" - builder.dupn(2) - else - builder.swap - builder.topn(1) - end - - builder.swap - builder.setconstant(name) - when TopConstField - name = node.target.constant.value.to_sym - - builder.putobject(Object) - builder.dup - builder.putobject(true) - builder.getconstant(name) - - yield - - if node.operator.value == "&&=" - builder.dupn(2) - else - builder.swap - builder.topn(1) - end - - builder.swap - builder.setconstant(name) - when VarField - case node.target.value - when Const - names = constant_names(node.target) - builder.opt_getconstant_path(names) - - yield - - builder.dup - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) - builder.setconstant(names.last) - when CVar - name = node.target.value.value.to_sym - builder.getclassvariable(name) - - yield - - builder.dup - builder.setclassvariable(name) - when GVar - name = node.target.value.value.to_sym - builder.getglobal(name) - - yield - - builder.dup - builder.setglobal(name) - when Ident - local_variable = visit(node.target) - builder.getlocal(local_variable.index, local_variable.level) - - yield - - builder.dup - builder.setlocal(local_variable.index, local_variable.level) - when IVar - name = node.target.value.value.to_sym - builder.getinstancevariable(name) - - yield - - builder.dup - builder.setinstancevariable(name) - end - end - end - end - end -end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb new file mode 100644 index 00000000..df8bc3ce --- /dev/null +++ b/lib/syntax_tree/yarv.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module SyntaxTree + # This module provides an object representation of the YARV bytecode. + module YARV + # Compile the given source into a YARV instruction sequence. + def self.compile(source, **options) + SyntaxTree.parse(source).accept(Compiler.new(**options)) + end + end +end diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb new file mode 100644 index 00000000..0fb27f7e --- /dev/null +++ b/lib/syntax_tree/yarv/bf.rb @@ -0,0 +1,176 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # Parses the given source code into a syntax tree, compiles that syntax tree + # into YARV bytecode. + class Bf + attr_reader :source + + def initialize(source) + @source = source + end + + def compile + # Set up the top-level instruction sequence that will be returned. + iseq = InstructionSequence.new(:top, "", nil, location) + + # Set up the $tape global variable that will hold our state. + iseq.duphash({ 0 => 0 }) + iseq.setglobal(:$tape) + iseq.getglobal(:$tape) + iseq.putobject(0) + iseq.send(YARV.calldata(:default=, 1)) + + # Set up the $cursor global variable that will hold the current position + # in the tape. + iseq.putobject(0) + iseq.setglobal(:$cursor) + + stack = [] + source + .each_char + .chunk do |char| + # For each character, we're going to assign a type to it. This + # allows a couple of optimizations to be made by combining multiple + # instructions into single instructions, e.g., +++ becomes a single + # change_by(3) instruction. + case char + when "+", "-" + :change + when ">", "<" + :shift + when "." + :output + when "," + :input + when "[", "]" + :loop + else + :ignored + end + end + .each do |type, chunk| + # For each chunk, we're going to emit the appropriate instruction. + case type + when :change + change_by(iseq, chunk.count("+") - chunk.count("-")) + when :shift + shift_by(iseq, chunk.count(">") - chunk.count("<")) + when :output + chunk.length.times { output_char(iseq) } + when :input + chunk.length.times { input_char(iseq) } + when :loop + chunk.each do |char| + case char + when "[" + stack << loop_start(iseq) + when "]" + loop_end(iseq, *stack.pop) + end + end + end + end + + iseq.leave + iseq + end + + private + + # This is the location of the top instruction sequence, derived from the + # source string. + def location + Location.new( + start_line: 1, + start_char: 0, + start_column: 0, + end_line: source.count("\n") + 1, + end_char: source.size, + end_column: source.size - (source.rindex("\n") || 0) - 1 + ) + end + + # $tape[$cursor] += value + def change_by(iseq, value) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(YARV.calldata(:[], 1)) + + if value < 0 + iseq.putobject(-value) + iseq.send(YARV.calldata(:-, 1)) + else + iseq.putobject(value) + iseq.send(YARV.calldata(:+, 1)) + end + + iseq.send(YARV.calldata(:[]=, 2)) + end + + # $cursor += value + def shift_by(iseq, value) + iseq.getglobal(:$cursor) + + if value < 0 + iseq.putobject(-value) + iseq.send(YARV.calldata(:-, 1)) + else + iseq.putobject(value) + iseq.send(YARV.calldata(:+, 1)) + end + + iseq.setglobal(:$cursor) + end + + # $stdout.putc($tape[$cursor].chr) + def output_char(iseq) + iseq.getglobal(:$stdout) + + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(YARV.calldata(:[], 1)) + iseq.send(YARV.calldata(:chr)) + + iseq.send(YARV.calldata(:putc, 1)) + end + + # $tape[$cursor] = $stdin.getc.ord + def input_char(iseq) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + + iseq.getglobal(:$stdin) + iseq.send(YARV.calldata(:getc)) + iseq.send(YARV.calldata(:ord)) + + iseq.send(YARV.calldata(:[]=, 2)) + end + + # unless $tape[$cursor] == 0 + def loop_start(iseq) + start_label = iseq.label + + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(YARV.calldata(:[], 1)) + + iseq.putobject(0) + iseq.send(YARV.calldata(:==, 1)) + + branchunless = iseq.branchunless(-1) + [start_label, branchunless] + end + + # Jump back to the start of the loop. + def loop_end(iseq, start_label, branchunless) + iseq.jump(start_label) + branchunless.patch!(iseq) + end + end + end +end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb new file mode 100644 index 00000000..45f2bb59 --- /dev/null +++ b/lib/syntax_tree/yarv/compiler.rb @@ -0,0 +1,2164 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This class is an experiment in transforming Syntax Tree nodes into their + # corresponding YARV instruction sequences. It attempts to mirror the + # behavior of RubyVM::InstructionSequence.compile. + # + # You use this as with any other visitor. First you parse code into a tree, + # then you visit it with this compiler. Visiting the root node of the tree + # will return a SyntaxTree::Visitor::Compiler::InstructionSequence object. + # With that object you can call #to_a on it, which will return a serialized + # form of the instruction sequence as an array. This array _should_ mirror + # the array given by RubyVM::InstructionSequence#to_a. + # + # As an example, here is how you would compile a single expression: + # + # program = SyntaxTree.parse("1 + 2") + # program.accept(SyntaxTree::YARV::Compiler.new).to_a + # + # [ + # "YARVInstructionSequence/SimpleDataFormat", + # 3, + # 1, + # 1, + # {:arg_size=>0, :local_size=>0, :stack_max=>2}, + # "", + # "", + # "", + # 1, + # :top, + # [], + # {}, + # [], + # [ + # [:putobject_INT2FIX_1_], + # [:putobject, 2], + # [:opt_plus, {:mid=>:+, :flag=>16, :orig_argc=>1}], + # [:leave] + # ] + # ] + # + # Note that this is the same output as calling: + # + # RubyVM::InstructionSequence.compile("1 + 2").to_a + # + class Compiler < BasicVisitor + # This visitor is responsible for converting Syntax Tree nodes into their + # corresponding Ruby structures. This is used to convert the operands of + # some instructions like putobject that push a Ruby object directly onto + # the stack. It is only used when the entire structure can be represented + # at compile-time, as opposed to constructed at run-time. + class RubyVisitor < BasicVisitor + # This error is raised whenever a node cannot be converted into a Ruby + # object at compile-time. + class CompilationError < StandardError + end + + # This will attempt to compile the given node. If it's possible, then + # it will return the compiled object. Otherwise it will return nil. + def self.compile(node) + node.accept(new) + rescue CompilationError + end + + def visit_array(node) + visit_all(node.contents.parts) + end + + def visit_bare_assoc_hash(node) + node.assocs.to_h do |assoc| + # We can only convert regular key-value pairs. A double splat ** + # operator means it has to be converted at run-time. + raise CompilationError unless assoc.is_a?(Assoc) + [visit(assoc.key), visit(assoc.value)] + end + end + + def visit_float(node) + node.value.to_f + end + + alias visit_hash visit_bare_assoc_hash + + def visit_imaginary(node) + node.value.to_c + end + + def visit_int(node) + node.value.to_i + end + + def visit_label(node) + node.value.chomp(":").to_sym + end + + def visit_mrhs(node) + visit_all(node.parts) + end + + def visit_qsymbols(node) + node.elements.map { |element| visit(element).to_sym } + end + + def visit_qwords(node) + visit_all(node.elements) + end + + def visit_range(node) + left, right = [visit(node.left), visit(node.right)] + node.operator.value === ".." ? left..right : left...right + end + + def visit_rational(node) + node.value.to_r + end + + def visit_regexp_literal(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + Regexp.new(node.parts.first.value, visit_regexp_literal_flags(node)) + else + # Any interpolation of expressions or variables will result in the + # regular expression being constructed at run-time. + raise CompilationError + end + end + + # This isn't actually a visit method, though maybe it should be. It is + # responsible for converting the set of string options on a regular + # expression into its equivalent integer. + def visit_regexp_literal_flags(node) + node + .options + .chars + .inject(0) do |accum, option| + accum | + case option + when "i" + Regexp::IGNORECASE + when "x" + Regexp::EXTENDED + when "m" + Regexp::MULTILINE + else + raise "Unknown regexp option: #{option}" + end + end + end + + def visit_symbol_literal(node) + node.value.value.to_sym + end + + def visit_symbols(node) + node.elements.map { |element| visit(element).to_sym } + end + + def visit_tstring_content(node) + node.value + end + + def visit_var_ref(node) + raise CompilationError unless node.value.is_a?(Kw) + + case node.value.value + when "nil" + nil + when "true" + true + when "false" + false + else + raise CompilationError + end + end + + def visit_word(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + node.parts.first.value + else + # Any interpolation of expressions or variables will result in the + # string being constructed at run-time. + raise CompilationError + end + end + + def visit_words(node) + visit_all(node.elements) + end + + def visit_unsupported(_node) + raise CompilationError + end + + # Please forgive the metaprogramming here. This is used to create visit + # methods for every node that we did not explicitly handle. By default + # each of these methods will raise a CompilationError. + handled = instance_methods(false) + (Visitor.instance_methods(false) - handled).each do |method| + alias_method method, :visit_unsupported + end + end + + # These options mirror the compilation options that we currently support + # that can be also passed to RubyVM::InstructionSequence.compile. + attr_reader :frozen_string_literal, + :operands_unification, + :specialized_instruction + + # The current instruction sequence that is being compiled. + attr_reader :iseq + + # A boolean to track if we're currently compiling the last statement + # within a set of statements. This information is necessary to determine + # if we need to return the value of the last statement. + attr_reader :last_statement + + def initialize( + frozen_string_literal: false, + operands_unification: true, + specialized_instruction: true + ) + @frozen_string_literal = frozen_string_literal + @operands_unification = operands_unification + @specialized_instruction = specialized_instruction + + @iseq = nil + @last_statement = false + end + + def visit_BEGIN(node) + visit(node.statements) + end + + def visit_CHAR(node) + if frozen_string_literal + iseq.putobject(node.value[1..]) + else + iseq.putstring(node.value[1..]) + end + end + + def visit_END(node) + once_iseq = + with_child_iseq(iseq.block_child_iseq(node.location)) do + postexe_iseq = + with_child_iseq(iseq.block_child_iseq(node.location)) do + iseq.event(:RUBY_EVENT_B_CALL) + + *statements, last_statement = node.statements.body + visit_all(statements) + with_last_statement { visit(last_statement) } + + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave + end + + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.send( + YARV.calldata(:"core#set_postexe", 0, CallData::CALL_FCALL), + postexe_iseq + ) + iseq.leave + end + + iseq.once(once_iseq, iseq.inline_storage) + iseq.pop + end + + def visit_alias(node) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putspecialobject(PutSpecialObject::OBJECT_CBASE) + visit(node.left) + visit(node.right) + iseq.send(YARV.calldata(:"core#set_method_alias", 3)) + end + + def visit_aref(node) + calldata = YARV.calldata(:[], 1) + visit(node.collection) + + if !frozen_string_literal && specialized_instruction && + (node.index.parts.length == 1) + arg = node.index.parts.first + + if arg.is_a?(StringLiteral) && (arg.parts.length == 1) + string_part = arg.parts.first + + if string_part.is_a?(TStringContent) + iseq.opt_aref_with(string_part.value, calldata) + return + end + end + end + + visit(node.index) + iseq.send(calldata) + end + + def visit_arg_block(node) + visit(node.value) + end + + def visit_arg_paren(node) + visit(node.arguments) + end + + def visit_arg_star(node) + visit(node.value) + iseq.splatarray(false) + end + + def visit_args(node) + visit_all(node.parts) + end + + def visit_array(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duparray(compiled) + elsif node.contents && node.contents.parts.length == 1 && + node.contents.parts.first.is_a?(BareAssocHash) && + node.contents.parts.first.assocs.length == 1 && + node.contents.parts.first.assocs.first.is_a?(AssocSplat) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.newhash(0) + visit(node.contents.parts.first) + iseq.send(YARV.calldata(:"core#hash_merge_kwd", 2)) + iseq.newarraykwsplat(1) + else + length = 0 + + node.contents.parts.each do |part| + if part.is_a?(ArgStar) + if length > 0 + iseq.newarray(length) + length = 0 + end + + visit(part.value) + iseq.concatarray + else + visit(part) + length += 1 + end + end + + iseq.newarray(length) if length > 0 + iseq.concatarray if length > 0 && length != node.contents.parts.length + end + end + + def visit_aryptn(node) + match_failures = [] + jumps_to_exit = [] + + # If there's a constant, then check if we match against that constant or + # not first. Branch to failure if we don't. + if node.constant + iseq.dup + visit(node.constant) + iseq.checkmatch(CheckMatch::TYPE_CASE) + match_failures << iseq.branchunless(-1) + end + + # First, check if the #deconstruct cache is nil. If it is, we're going + # to call #deconstruct on the object and cache the result. + iseq.topn(2) + branchnil = iseq.branchnil(-1) + + # Next, ensure that the cached value was cached correctly, otherwise + # fail the match. + iseq.topn(2) + match_failures << iseq.branchunless(-1) + + # Since we have a valid cached value, we can skip past the part where we + # call #deconstruct on the object. + iseq.pop + iseq.topn(1) + jump = iseq.jump(-1) + + # Check if the object responds to #deconstruct, fail the match + # otherwise. + branchnil.patch!(iseq) + iseq.dup + iseq.putobject(:deconstruct) + iseq.send(YARV.calldata(:respond_to?, 1)) + iseq.setn(3) + match_failures << iseq.branchunless(-1) + + # Call #deconstruct and ensure that it's an array, raise an error + # otherwise. + iseq.send(YARV.calldata(:deconstruct)) + iseq.setn(2) + iseq.dup + iseq.checktype(CheckType::TYPE_ARRAY) + match_error = iseq.branchunless(-1) + + # Ensure that the deconstructed array has the correct size, fail the + # match otherwise. + jump.patch!(iseq) + iseq.dup + iseq.send(YARV.calldata(:length)) + iseq.putobject(node.requireds.length) + iseq.send(YARV.calldata(:==, 1)) + match_failures << iseq.branchunless(-1) + + # For each required element, check if the deconstructed array contains + # the element, otherwise jump out to the top-level match failure. + iseq.dup + node.requireds.each_with_index do |required, index| + iseq.putobject(index) + iseq.send(YARV.calldata(:[], 1)) + + case required + when VarField + lookup = visit(required) + iseq.setlocal(lookup.index, lookup.level) + else + visit(required) + iseq.checkmatch(CheckMatch::TYPE_CASE) + match_failures << iseq.branchunless(-1) + end + + if index < node.requireds.length - 1 + iseq.dup + else + iseq.pop + jumps_to_exit << iseq.jump(-1) + end + end + + # Set up the routine here to raise an error to indicate that the type of + # the deconstructed array was incorrect. + match_error.patch!(iseq) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putobject(TypeError) + iseq.putobject("deconstruct must return Array") + iseq.send(YARV.calldata(:"core#raise", 2)) + iseq.pop + + # Patch all of the match failures to jump here so that we pop a final + # value before returning to the parent node. + match_failures.each { |match_failure| match_failure.patch!(iseq) } + iseq.pop + jumps_to_exit + end + + def visit_assign(node) + case node.target + when ARefField + calldata = YARV.calldata(:[]=, 2) + + if !frozen_string_literal && specialized_instruction && + (node.target.index.parts.length == 1) + arg = node.target.index.parts.first + + if arg.is_a?(StringLiteral) && (arg.parts.length == 1) + string_part = arg.parts.first + + if string_part.is_a?(TStringContent) + visit(node.target.collection) + visit(node.value) + iseq.swap + iseq.topn(1) + iseq.opt_aset_with(string_part.value, calldata) + iseq.pop + return + end + end + end + + iseq.putnil + visit(node.target.collection) + visit(node.target.index) + visit(node.value) + iseq.setn(3) + iseq.send(calldata) + iseq.pop + when ConstPathField + names = constant_names(node.target) + name = names.pop + + if RUBY_VERSION >= "3.2" + iseq.opt_getconstant_path(names) + visit(node.value) + iseq.swap + iseq.topn(1) + iseq.swap + iseq.setconstant(name) + else + visit(node.value) + iseq.dup if last_statement? + iseq.opt_getconstant_path(names) + iseq.setconstant(name) + end + when Field + iseq.putnil + visit(node.target) + visit(node.value) + iseq.setn(2) + iseq.send(YARV.calldata(:"#{node.target.name.value}=", 1)) + iseq.pop + when TopConstField + name = node.target.constant.value.to_sym + + if RUBY_VERSION >= "3.2" + iseq.putobject(Object) + visit(node.value) + iseq.swap + iseq.topn(1) + iseq.swap + iseq.setconstant(name) + else + visit(node.value) + iseq.dup if last_statement? + iseq.putobject(Object) + iseq.setconstant(name) + end + when VarField + visit(node.value) + iseq.dup if last_statement? + + case node.target.value + when Const + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + iseq.setconstant(node.target.value.value.to_sym) + when CVar + iseq.setclassvariable(node.target.value.value.to_sym) + when GVar + iseq.setglobal(node.target.value.value.to_sym) + when Ident + lookup = visit(node.target) + + if lookup.local.is_a?(LocalTable::BlockLocal) + iseq.setblockparam(lookup.index, lookup.level) + else + iseq.setlocal(lookup.index, lookup.level) + end + when IVar + iseq.setinstancevariable(node.target.value.value.to_sym) + end + end + end + + def visit_assoc(node) + visit(node.key) + visit(node.value) + end + + def visit_assoc_splat(node) + visit(node.value) + end + + def visit_backref(node) + iseq.getspecial(GetSpecial::SVAR_BACKREF, node.value[1..].to_i << 1) + end + + def visit_bare_assoc_hash(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duphash(compiled) + else + visit_all(node.assocs) + end + end + + def visit_binary(node) + case node.operator + when :"&&" + visit(node.left) + iseq.dup + + branchunless = iseq.branchunless(-1) + iseq.pop + + visit(node.right) + branchunless.patch!(iseq) + when :"||" + visit(node.left) + iseq.dup + + branchif = iseq.branchif(-1) + iseq.pop + + visit(node.right) + branchif.patch!(iseq) + else + visit(node.left) + visit(node.right) + iseq.send(YARV.calldata(node.operator, 1)) + end + end + + def visit_block(node) + with_child_iseq(iseq.block_child_iseq(node.location)) do + iseq.event(:RUBY_EVENT_B_CALL) + visit(node.block_var) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave + end + end + + def visit_block_var(node) + params = node.params + + if params.requireds.length == 1 && params.optionals.empty? && + !params.rest && params.posts.empty? && params.keywords.empty? && + !params.keyword_rest && !params.block + iseq.argument_options[:ambiguous_param0] = true + end + + visit(node.params) + + node.locals.each { |local| iseq.local_table.plain(local.value.to_sym) } + end + + def visit_blockarg(node) + iseq.argument_options[:block_start] = iseq.argument_size + iseq.local_table.block(node.name.value.to_sym) + iseq.argument_size += 1 + end + + def visit_bodystmt(node) + visit(node.statements) + end + + def visit_call(node) + if node.is_a?(CallNode) + return( + visit_call( + CommandCall.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + block: nil, + location: node.location + ) + ) + ) + end + + arg_parts = argument_parts(node.arguments) + argc = arg_parts.length + + # First we're going to check if we're calling a method on an array + # literal without any arguments. In that case there are some + # specializations we might be able to perform. + if argc == 0 && (node.message.is_a?(Ident) || node.message.is_a?(Op)) + case node.receiver + when ArrayLiteral + parts = node.receiver.contents&.parts || [] + + if parts.none? { |part| part.is_a?(ArgStar) } && + RubyVisitor.compile(node.receiver).nil? + case node.message.value + when "max" + visit(node.receiver.contents) + iseq.opt_newarray_max(parts.length) + return + when "min" + visit(node.receiver.contents) + iseq.opt_newarray_min(parts.length) + return + end + end + when StringLiteral + if RubyVisitor.compile(node.receiver).nil? + case node.message.value + when "-@" + iseq.opt_str_uminus(node.receiver.parts.first.value) + return + when "freeze" + iseq.opt_str_freeze(node.receiver.parts.first.value) + return + end + end + end + end + + if node.receiver + if node.receiver.is_a?(VarRef) + lookup = iseq.local_variable(node.receiver.value.value.to_sym) + + if lookup.local.is_a?(LocalTable::BlockLocal) + iseq.getblockparamproxy(lookup.index, lookup.level) + else + visit(node.receiver) + end + else + visit(node.receiver) + end + else + iseq.putself + end + + branchnil = + if node.operator&.value == "&." + iseq.dup + iseq.branchnil(-1) + end + + flag = 0 + + arg_parts.each do |arg_part| + case arg_part + when ArgBlock + argc -= 1 + flag |= CallData::CALL_ARGS_BLOCKARG + visit(arg_part) + when ArgStar + flag |= CallData::CALL_ARGS_SPLAT + visit(arg_part) + when ArgsForward + flag |= CallData::CALL_ARGS_SPLAT + flag |= CallData::CALL_ARGS_BLOCKARG + + lookup = iseq.local_table.find(:*) + iseq.getlocal(lookup.index, lookup.level) + iseq.splatarray(arg_parts.length != 1) + + lookup = iseq.local_table.find(:&) + iseq.getblockparamproxy(lookup.index, lookup.level) + when BareAssocHash + flag |= CallData::CALL_KW_SPLAT + visit(arg_part) + else + visit(arg_part) + end + end + + block_iseq = visit(node.block) if node.block + flag |= CallData::CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 + flag |= CallData::CALL_FCALL if node.receiver.nil? + + iseq.send( + YARV.calldata(node.message.value.to_sym, argc, flag), + block_iseq + ) + branchnil.patch!(iseq) if branchnil + end + + def visit_case(node) + visit(node.value) if node.value + + clauses = [] + else_clause = nil + current = node.consequent + + while current + clauses << current + + if (current = current.consequent).is_a?(Else) + else_clause = current + break + end + end + + branches = + clauses.map do |clause| + visit(clause.arguments) + iseq.topn(1) + iseq.send( + YARV.calldata( + :===, + 1, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE + ) + ) + [clause, iseq.branchif(:label_00)] + end + + iseq.pop + else_clause ? visit(else_clause) : iseq.putnil + iseq.leave + + branches.each_with_index do |(clause, branchif), index| + iseq.leave if index != 0 + branchif.patch!(iseq) + iseq.pop + visit(clause) + end + end + + def visit_class(node) + name = node.constant.constant.value.to_sym + class_iseq = + with_child_iseq(iseq.class_child_iseq(name, node.location)) do + iseq.event(:RUBY_EVENT_CLASS) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_END) + iseq.leave + end + + flags = DefineClass::TYPE_CLASS + + case node.constant + when ConstPathRef + flags |= DefineClass::FLAG_SCOPED + visit(node.constant.parent) + when ConstRef + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + when TopConstRef + flags |= DefineClass::FLAG_SCOPED + iseq.putobject(Object) + end + + if node.superclass + flags |= DefineClass::FLAG_HAS_SUPERCLASS + visit(node.superclass) + else + iseq.putnil + end + + iseq.defineclass(name, class_iseq, flags) + end + + def visit_command(node) + visit_call( + CommandCall.new( + receiver: nil, + operator: nil, + message: node.message, + arguments: node.arguments, + block: node.block, + location: node.location + ) + ) + end + + def visit_command_call(node) + visit_call( + CommandCall.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + block: node.block, + location: node.location + ) + ) + end + + def visit_const_path_field(node) + visit(node.parent) + end + + def visit_const_path_ref(node) + names = constant_names(node) + iseq.opt_getconstant_path(names) + end + + def visit_def(node) + name = node.name.value.to_sym + method_iseq = iseq.method_child_iseq(name.to_s, node.location) + + with_child_iseq(method_iseq) do + visit(node.params) if node.params + iseq.event(:RUBY_EVENT_CALL) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_RETURN) + iseq.leave + end + + if node.target + visit(node.target) + iseq.definesmethod(name, method_iseq) + else + iseq.definemethod(name, method_iseq) + end + + iseq.putobject(name) + end + + def visit_defined(node) + case node.value + when Assign + # If we're assigning to a local variable, then we need to make sure + # that we put it into the local table. + if node.value.target.is_a?(VarField) && + node.value.target.value.is_a?(Ident) + iseq.local_table.plain(node.value.target.value.value.to_sym) + end + + iseq.putobject("assignment") + when VarRef + value = node.value.value + name = value.value.to_sym + + case value + when Const + iseq.putnil + iseq.defined(Defined::TYPE_CONST, name, "constant") + when CVar + iseq.putnil + iseq.defined(Defined::TYPE_CVAR, name, "class variable") + when GVar + iseq.putnil + iseq.defined(Defined::TYPE_GVAR, name, "global-variable") + when Ident + iseq.putobject("local-variable") + when IVar + iseq.putnil + iseq.defined(Defined::TYPE_IVAR, name, "instance-variable") + when Kw + case name + when :false + iseq.putobject("false") + when :nil + iseq.putobject("nil") + when :self + iseq.putobject("self") + when :true + iseq.putobject("true") + end + end + when VCall + iseq.putself + + name = node.value.value.value.to_sym + iseq.defined(Defined::TYPE_FUNC, name, "method") + when YieldNode + iseq.putnil + iseq.defined(Defined::TYPE_YIELD, false, "yield") + when ZSuper + iseq.putnil + iseq.defined(Defined::TYPE_ZSUPER, false, "super") + else + iseq.putobject("expression") + end + end + + def visit_dyna_symbol(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + iseq.putobject(node.parts.first.value.to_sym) + end + end + + def visit_else(node) + visit(node.statements) + iseq.pop unless last_statement? + end + + def visit_elsif(node) + visit_if( + IfNode.new( + predicate: node.predicate, + statements: node.statements, + consequent: node.consequent, + location: node.location + ) + ) + end + + def visit_field(node) + visit(node.parent) + end + + def visit_float(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_for(node) + visit(node.collection) + + name = node.index.value.value.to_sym + iseq.local_table.plain(name) + + block_iseq = + with_child_iseq(iseq.block_child_iseq(node.statements.location)) do + iseq.argument_options[:lead_num] ||= 0 + iseq.argument_options[:lead_num] += 1 + iseq.argument_options[:ambiguous_param0] = true + + iseq.argument_size += 1 + iseq.local_table.plain(2) + + iseq.getlocal(0, 0) + + local_variable = iseq.local_variable(name) + iseq.setlocal(local_variable.index, local_variable.level) + + iseq.event(:RUBY_EVENT_B_CALL) + iseq.nop + + visit(node.statements) + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave + end + + iseq.send(YARV.calldata(:each, 0, 0), block_iseq) + end + + def visit_hash(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duphash(compiled) + else + visit_all(node.assocs) + iseq.newhash(node.assocs.length * 2) + end + end + + def visit_heredoc(node) + if node.beginning.value.end_with?("`") + visit_xstring_literal(node) + elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = visit_string_parts(node) + iseq.concatstrings(length) + end + end + + def visit_if(node) + if node.predicate.is_a?(RangeNode) + iseq.getspecial(GetSpecial::SVAR_FLIPFLOP_START, 0) + branchif = iseq.branchif(-1) + + visit(node.predicate.left) + branchunless_true = iseq.branchunless(-1) + + iseq.putobject(true) + iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) + branchif.patch!(iseq) + + visit(node.predicate.right) + branchunless_false = iseq.branchunless(-1) + + iseq.putobject(false) + iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) + branchunless_false.patch!(iseq) + + visit(node.statements) + iseq.leave + branchunless_true.patch!(iseq) + iseq.putnil + else + visit(node.predicate) + branchunless = iseq.branchunless(-1) + visit(node.statements) + + if last_statement? + iseq.leave + branchunless.patch!(iseq) + + node.consequent ? visit(node.consequent) : iseq.putnil + else + iseq.pop + + if node.consequent + jump = iseq.jump(-1) + branchunless.patch!(iseq) + visit(node.consequent) + jump.patch!(iseq) + else + branchunless.patch!(iseq) + end + end + end + end + + def visit_if_op(node) + visit_if( + IfNode.new( + predicate: node.predicate, + statements: node.truthy, + consequent: + Else.new( + keyword: Kw.new(value: "else", location: Location.default), + statements: node.falsy, + location: Location.default + ), + location: Location.default + ) + ) + end + + def visit_imaginary(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_int(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_kwrest_param(node) + iseq.argument_options[:kwrest] = iseq.argument_size + iseq.argument_size += 1 + iseq.local_table.plain(node.name.value.to_sym) + end + + def visit_label(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_lambda(node) + lambda_iseq = + with_child_iseq(iseq.block_child_iseq(node.location)) do + iseq.event(:RUBY_EVENT_B_CALL) + visit(node.params) + visit(node.statements) + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave + end + + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.send(YARV.calldata(:lambda, 0, CallData::CALL_FCALL), lambda_iseq) + end + + def visit_lambda_var(node) + visit_block_var(node) + end + + def visit_massign(node) + visit(node.value) + iseq.dup + visit(node.target) + end + + def visit_method_add_block(node) + visit_call( + CommandCall.new( + receiver: node.call.receiver, + operator: node.call.operator, + message: node.call.message, + arguments: node.call.arguments, + block: node.block, + location: node.location + ) + ) + end + + def visit_mlhs(node) + lookups = [] + node.parts.each do |part| + case part + when VarField + lookups << visit(part) + end + end + + iseq.expandarray(lookups.length, 0) + lookups.each { |lookup| iseq.setlocal(lookup.index, lookup.level) } + end + + def visit_module(node) + name = node.constant.constant.value.to_sym + module_iseq = + with_child_iseq(iseq.module_child_iseq(name, node.location)) do + iseq.event(:RUBY_EVENT_CLASS) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_END) + iseq.leave + end + + flags = DefineClass::TYPE_MODULE + + case node.constant + when ConstPathRef + flags |= DefineClass::FLAG_SCOPED + visit(node.constant.parent) + when ConstRef + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + when TopConstRef + flags |= DefineClass::FLAG_SCOPED + iseq.putobject(Object) + end + + iseq.putnil + iseq.defineclass(name, module_iseq, flags) + end + + def visit_mrhs(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duparray(compiled) + else + visit_all(node.parts) + iseq.newarray(node.parts.length) + end + end + + def visit_not(node) + visit(node.statement) + iseq.send(YARV.calldata(:!)) + end + + def visit_opassign(node) + flag = CallData::CALL_ARGS_SIMPLE + if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) + flag |= CallData::CALL_FCALL + end + + case (operator = node.operator.value.chomp("=").to_sym) + when :"&&" + branchunless = nil + + with_opassign(node) do + iseq.dup + branchunless = iseq.branchunless(-1) + iseq.pop + visit(node.value) + end + + case node.target + when ARefField + iseq.leave + branchunless.patch!(iseq) + iseq.setn(3) + iseq.adjuststack(3) + when ConstPathField, TopConstField + branchunless.patch!(iseq) + iseq.swap + iseq.pop + else + branchunless.patch!(iseq) + end + when :"||" + if node.target.is_a?(ConstPathField) || + node.target.is_a?(TopConstField) + opassign_defined(node) + iseq.swap + iseq.pop + elsif node.target.is_a?(VarField) && + [Const, CVar, GVar].include?(node.target.value.class) + opassign_defined(node) + else + branchif = nil + + with_opassign(node) do + iseq.dup + branchif = iseq.branchif(-1) + iseq.pop + visit(node.value) + end + + if node.target.is_a?(ARefField) + iseq.leave + branchif.patch!(iseq) + iseq.setn(3) + iseq.adjuststack(3) + else + branchif.patch!(iseq) + end + end + else + with_opassign(node) do + visit(node.value) + iseq.send(YARV.calldata(operator, 1, flag)) + end + end + end + + def visit_params(node) + argument_options = iseq.argument_options + + if node.requireds.any? + argument_options[:lead_num] = 0 + + node.requireds.each do |required| + iseq.local_table.plain(required.value.to_sym) + iseq.argument_size += 1 + argument_options[:lead_num] += 1 + end + end + + node.optionals.each do |(optional, value)| + index = iseq.local_table.size + name = optional.value.to_sym + + iseq.local_table.plain(name) + iseq.argument_size += 1 + + argument_options[:opt] = [iseq.label] unless argument_options.key?( + :opt + ) + + visit(value) + iseq.setlocal(index, 0) + iseq.argument_options[:opt] << iseq.label + end + + visit(node.rest) if node.rest + + if node.posts.any? + argument_options[:post_start] = iseq.argument_size + argument_options[:post_num] = 0 + + node.posts.each do |post| + iseq.local_table.plain(post.value.to_sym) + iseq.argument_size += 1 + argument_options[:post_num] += 1 + end + end + + if node.keywords.any? + argument_options[:kwbits] = 0 + argument_options[:keyword] = [] + + keyword_bits_name = node.keyword_rest ? 3 : 2 + iseq.argument_size += 1 + keyword_bits_index = iseq.local_table.locals.size + node.keywords.size + + node.keywords.each_with_index do |(keyword, value), keyword_index| + name = keyword.value.chomp(":").to_sym + index = iseq.local_table.size + + iseq.local_table.plain(name) + iseq.argument_size += 1 + argument_options[:kwbits] += 1 + + if value.nil? + argument_options[:keyword] << name + elsif (compiled = RubyVisitor.compile(value)) + argument_options[:keyword] << [name, compiled] + else + argument_options[:keyword] << [name] + iseq.checkkeyword(keyword_bits_index, keyword_index) + branchif = iseq.branchif(-1) + visit(value) + iseq.setlocal(index, 0) + branchif.patch!(iseq) + end + end + + iseq.local_table.plain(keyword_bits_name) + end + + if node.keyword_rest.is_a?(ArgsForward) + iseq.local_table.plain(:*) + iseq.local_table.plain(:&) + + iseq.argument_options[:rest_start] = iseq.argument_size + iseq.argument_options[:block_start] = iseq.argument_size + 1 + + iseq.argument_size += 2 + elsif node.keyword_rest + visit(node.keyword_rest) + end + + visit(node.block) if node.block + end + + def visit_paren(node) + visit(node.contents) + end + + def visit_program(node) + node.statements.body.each do |statement| + break unless statement.is_a?(Comment) + + if statement.value == "# frozen_string_literal: true" + @frozen_string_literal = true + end + end + + preexes = [] + statements = [] + + node.statements.body.each do |statement| + case statement + when Comment, EmbDoc, EndContent, VoidStmt + # ignore + when BEGINBlock + preexes << statement + else + statements << statement + end + end + + top_iseq = + InstructionSequence.new( + :top, + "", + nil, + node.location, + frozen_string_literal: frozen_string_literal, + operands_unification: operands_unification, + specialized_instruction: specialized_instruction + ) + + with_child_iseq(top_iseq) do + visit_all(preexes) + + if statements.empty? + iseq.putnil + else + *statements, last_statement = statements + visit_all(statements) + with_last_statement { visit(last_statement) } + end + + iseq.leave + end + end + + def visit_qsymbols(node) + iseq.duparray(node.accept(RubyVisitor.new)) + end + + def visit_qwords(node) + if frozen_string_literal + iseq.duparray(node.accept(RubyVisitor.new)) + else + visit_all(node.elements) + iseq.newarray(node.elements.length) + end + end + + def visit_range(node) + if (compiled = RubyVisitor.compile(node)) + iseq.putobject(compiled) + else + visit(node.left) + visit(node.right) + iseq.newrange(node.operator.value == ".." ? 0 : 1) + end + end + + def visit_rassign(node) + iseq.putnil + + if node.operator.is_a?(Kw) + jumps = [] + + visit(node.value) + iseq.dup + + case node.pattern + when VarField + lookup = visit(node.pattern) + iseq.setlocal(lookup.index, lookup.level) + jumps << iseq.jump(-1) + else + jumps.concat(visit(node.pattern)) + end + + iseq.pop + iseq.pop + iseq.putobject(false) + iseq.leave + + jumps.each { |jump| jump.patch!(iseq) } + iseq.adjuststack(2) + iseq.putobject(true) + else + jumps_to_match = [] + + iseq.putnil + iseq.putobject(false) + iseq.putnil + iseq.putnil + visit(node.value) + iseq.dup + + # Visit the pattern. If it matches, + case node.pattern + when VarField + lookup = visit(node.pattern) + iseq.setlocal(lookup.index, lookup.level) + jumps_to_match << iseq.jump(-1) + else + jumps_to_match.concat(visit(node.pattern)) + end + + # First we're going to push the core onto the stack, then we'll check + # if the value to match is truthy. If it is, we'll jump down to raise + # NoMatchingPatternKeyError. Otherwise we'll raise + # NoMatchingPatternError. + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.topn(4) + branchif_no_key = iseq.branchif(-1) + + # Here we're going to raise NoMatchingPatternError. + iseq.putobject(NoMatchingPatternError) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putobject("%p: %s") + iseq.topn(4) + iseq.topn(7) + iseq.send(YARV.calldata(:"core#sprintf", 3)) + iseq.send(YARV.calldata(:"core#raise", 2)) + jump_to_exit = iseq.jump(-1) + + # Here we're going to raise NoMatchingPatternKeyError. + branchif_no_key.patch!(iseq) + iseq.putobject(NoMatchingPatternKeyError) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putobject("%p: %s") + iseq.topn(4) + iseq.topn(7) + iseq.send(YARV.calldata(:"core#sprintf", 3)) + iseq.topn(7) + iseq.topn(9) + iseq.send( + YARV.calldata(:new, 1, CallData::CALL_KWARG, %i[matchee key]) + ) + iseq.send(YARV.calldata(:"core#raise", 1)) + + # This runs when the pattern fails to match. + jump_to_exit.patch!(iseq) + iseq.adjuststack(7) + iseq.putnil + iseq.leave + + # This runs when the pattern matches successfully. + jumps_to_match.each { |jump| jump.patch!(iseq) } + iseq.adjuststack(6) + iseq.putnil + end + end + + def visit_rational(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_regexp_literal(node) + if (compiled = RubyVisitor.compile(node)) + iseq.putobject(compiled) + else + flags = RubyVisitor.new.visit_regexp_literal_flags(node) + length = visit_string_parts(node) + iseq.toregexp(flags, length) + end + end + + def visit_rest_param(node) + iseq.local_table.plain(node.name.value.to_sym) + iseq.argument_options[:rest_start] = iseq.argument_size + iseq.argument_size += 1 + end + + def visit_sclass(node) + visit(node.target) + iseq.putnil + + singleton_iseq = + with_child_iseq(iseq.singleton_class_child_iseq(node.location)) do + iseq.event(:RUBY_EVENT_CLASS) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_END) + iseq.leave + end + + iseq.defineclass( + :singletonclass, + singleton_iseq, + DefineClass::TYPE_SINGLETON_CLASS + ) + end + + def visit_statements(node) + statements = + node.body.select do |statement| + case statement + when Comment, EmbDoc, EndContent, VoidStmt + false + else + true + end + end + + statements.empty? ? iseq.putnil : visit_all(statements) + end + + def visit_string_concat(node) + value = node.left.parts.first.value + node.right.parts.first.value + + visit_string_literal( + StringLiteral.new( + parts: [TStringContent.new(value: value, location: node.location)], + quote: node.left.quote, + location: node.location + ) + ) + end + + def visit_string_embexpr(node) + visit(node.statements) + end + + def visit_string_literal(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = visit_string_parts(node) + iseq.concatstrings(length) + end + end + + def visit_super(node) + iseq.putself + visit(node.arguments) + iseq.invokesuper( + YARV.calldata( + nil, + argument_parts(node.arguments).length, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE | + CallData::CALL_SUPER + ), + nil + ) + end + + def visit_symbol_literal(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_symbols(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duparray(compiled) + else + node.elements.each do |element| + if element.parts.length == 1 && + element.parts.first.is_a?(TStringContent) + iseq.putobject(element.parts.first.value.to_sym) + else + length = visit_string_parts(element) + iseq.concatstrings(length) + iseq.intern + end + end + + iseq.newarray(node.elements.length) + end + end + + def visit_top_const_ref(node) + iseq.opt_getconstant_path(constant_names(node)) + end + + def visit_tstring_content(node) + if frozen_string_literal + iseq.putobject(node.accept(RubyVisitor.new)) + else + iseq.putstring(node.accept(RubyVisitor.new)) + end + end + + def visit_unary(node) + method_id = + case node.operator + when "+", "-" + "#{node.operator}@" + else + node.operator + end + + visit_call( + CommandCall.new( + receiver: node.statement, + operator: nil, + message: Ident.new(value: method_id, location: Location.default), + arguments: nil, + block: nil, + location: Location.default + ) + ) + end + + def visit_undef(node) + node.symbols.each_with_index do |symbol, index| + iseq.pop if index != 0 + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putspecialobject(PutSpecialObject::OBJECT_CBASE) + visit(symbol) + iseq.send(YARV.calldata(:"core#undef_method", 2)) + end + end + + def visit_unless(node) + visit(node.predicate) + branchunless = iseq.branchunless(-1) + node.consequent ? visit(node.consequent) : iseq.putnil + + if last_statement? + iseq.leave + branchunless.patch!(iseq) + + visit(node.statements) + else + iseq.pop + + if node.consequent + jump = iseq.jump(-1) + branchunless.patch!(iseq) + visit(node.consequent) + jump.patch!(iseq.label) + else + branchunless.patch!(iseq) + end + end + end + + def visit_until(node) + jumps = [] + + jumps << iseq.jump(-1) + iseq.putnil + iseq.pop + jumps << iseq.jump(-1) + + label = iseq.label + visit(node.statements) + iseq.pop + jumps.each { |jump| jump.patch!(iseq) } + + visit(node.predicate) + iseq.branchunless(label) + iseq.putnil if last_statement? + end + + def visit_var_field(node) + case node.value + when CVar, IVar + name = node.value.value.to_sym + iseq.inline_storage_for(name) + when Ident + name = node.value.value.to_sym + + if (local_variable = iseq.local_variable(name)) + local_variable + else + iseq.local_table.plain(name) + iseq.local_variable(name) + end + end + end + + def visit_var_ref(node) + case node.value + when Const + iseq.opt_getconstant_path(constant_names(node)) + when CVar + name = node.value.value.to_sym + iseq.getclassvariable(name) + when GVar + iseq.getglobal(node.value.value.to_sym) + when Ident + lookup = iseq.local_variable(node.value.value.to_sym) + + case lookup.local + when LocalTable::BlockLocal + iseq.getblockparam(lookup.index, lookup.level) + when LocalTable::PlainLocal + iseq.getlocal(lookup.index, lookup.level) + end + when IVar + name = node.value.value.to_sym + iseq.getinstancevariable(name) + when Kw + case node.value.value + when "false" + iseq.putobject(false) + when "nil" + iseq.putnil + when "self" + iseq.putself + when "true" + iseq.putobject(true) + end + end + end + + def visit_vcall(node) + iseq.putself + iseq.send( + YARV.calldata( + node.value.value.to_sym, + 0, + CallData::CALL_FCALL | CallData::CALL_VCALL | + CallData::CALL_ARGS_SIMPLE + ) + ) + end + + def visit_when(node) + visit(node.statements) + end + + def visit_while(node) + jumps = [] + + jumps << iseq.jump(-1) + iseq.putnil + iseq.pop + jumps << iseq.jump(-1) + + label = iseq.label + visit(node.statements) + iseq.pop + jumps.each { |jump| jump.patch!(iseq) } + + visit(node.predicate) + iseq.branchif(label) + iseq.putnil if last_statement? + end + + def visit_word(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = visit_string_parts(node) + iseq.concatstrings(length) + end + end + + def visit_words(node) + if frozen_string_literal && (compiled = RubyVisitor.compile(node)) + iseq.duparray(compiled) + else + visit_all(node.elements) + iseq.newarray(node.elements.length) + end + end + + def visit_xstring_literal(node) + iseq.putself + length = visit_string_parts(node) + iseq.concatstrings(node.parts.length) if length > 1 + iseq.send( + YARV.calldata( + :`, + 1, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE + ) + ) + end + + def visit_yield(node) + parts = argument_parts(node.arguments) + visit_all(parts) + iseq.invokeblock(YARV.calldata(nil, parts.length)) + end + + def visit_zsuper(_node) + iseq.putself + iseq.invokesuper( + YARV.calldata( + nil, + 0, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE | + CallData::CALL_SUPER | CallData::CALL_ZSUPER + ), + nil + ) + end + + private + + # This is a helper that is used in places where arguments may be present + # or they may be wrapped in parentheses. It's meant to descend down the + # tree and return an array of argument nodes. + def argument_parts(node) + case node + when nil + [] + when Args + node.parts + when ArgParen + if node.arguments.is_a?(ArgsForward) + [node.arguments] + else + node.arguments.parts + end + when Paren + node.contents.parts + end + end + + # Constant names when they are being assigned or referenced come in as a + # tree, but it's more convenient to work with them as an array. This + # method converts them into that array. This is nice because it's the + # operand that goes to opt_getconstant_path in Ruby 3.2. + def constant_names(node) + current = node + names = [] + + while current.is_a?(ConstPathField) || current.is_a?(ConstPathRef) + names.unshift(current.constant.value.to_sym) + current = current.parent + end + + case current + when VarField, VarRef + names.unshift(current.value.value.to_sym) + when TopConstRef + names.unshift(current.constant.value.to_sym) + names.unshift(:"") + end + + names + end + + # For the most part when an OpAssign (operator assignment) node with a ||= + # operator is being compiled it's a matter of reading the target, checking + # if the value should be evaluated, evaluating it if so, and then writing + # the result back to the target. + # + # However, in certain kinds of assignments (X, ::X, X::Y, @@x, and $x) we + # first check if the value is defined using the defined instruction. I + # don't know why it is necessary, and suspect that it isn't. + def opassign_defined(node) + case node.target + when ConstPathField + visit(node.target.parent) + name = node.target.constant.value.to_sym + + iseq.dup + iseq.defined(Defined::TYPE_CONST_FROM, name, true) + when TopConstField + name = node.target.constant.value.to_sym + + iseq.putobject(Object) + iseq.dup + iseq.defined(Defined::TYPE_CONST_FROM, name, true) + when VarField + name = node.target.value.value.to_sym + iseq.putnil + + case node.target.value + when Const + iseq.defined(Defined::TYPE_CONST, name, true) + when CVar + iseq.defined(Defined::TYPE_CVAR, name, true) + when GVar + iseq.defined(Defined::TYPE_GVAR, name, true) + end + end + + branchunless = iseq.branchunless(-1) + + case node.target + when ConstPathField, TopConstField + iseq.dup + iseq.putobject(true) + iseq.getconstant(name) + when VarField + case node.target.value + when Const + iseq.opt_getconstant_path(constant_names(node.target)) + when CVar + iseq.getclassvariable(name) + when GVar + iseq.getglobal(name) + end + end + + iseq.dup + branchif = iseq.branchif(-1) + iseq.pop + + branchunless.patch!(iseq) + visit(node.value) + + case node.target + when ConstPathField, TopConstField + iseq.dupn(2) + iseq.swap + iseq.setconstant(name) + when VarField + iseq.dup + + case node.target.value + when Const + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + iseq.setconstant(name) + when CVar + iseq.setclassvariable(name) + when GVar + iseq.setglobal(name) + end + end + + branchif.patch!(iseq) + end + + # Whenever a value is interpolated into a string-like structure, these + # three instructions are pushed. + def push_interpolate + iseq.dup + iseq.objtostring( + YARV.calldata( + :to_s, + 0, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE + ) + ) + iseq.anytostring + end + + # There are a lot of nodes in the AST that act as contains of parts of + # strings. This includes things like string literals, regular expressions, + # heredocs, etc. This method will visit all the parts of a string within + # those containers. + def visit_string_parts(node) + length = 0 + + unless node.parts.first.is_a?(TStringContent) + iseq.putobject("") + length += 1 + end + + node.parts.each do |part| + case part + when StringDVar + visit(part.variable) + push_interpolate + when StringEmbExpr + visit(part) + push_interpolate + when TStringContent + iseq.putobject(part.accept(RubyVisitor.new)) + end + + length += 1 + end + + length + end + + # The current instruction sequence that we're compiling is always stored + # on the compiler. When we descend into a node that has its own + # instruction sequence, this method can be called to temporarily set the + # new value of the instruction sequence, yield, and then set it back. + def with_child_iseq(child_iseq) + parent_iseq = iseq + + begin + @iseq = child_iseq + yield + child_iseq + ensure + @iseq = parent_iseq + end + end + + # When we're compiling the last statement of a set of statements within a + # scope, the instructions sometimes change from pops to leaves. These + # kinds of peephole optimizations can reduce the overall number of + # instructions. Therefore, we keep track of whether we're compiling the + # last statement of a scope and allow visit methods to query that + # information. + def with_last_statement + previous = @last_statement + @last_statement = true + + begin + yield + ensure + @last_statement = previous + end + end + + def last_statement? + @last_statement + end + + # OpAssign nodes can have a number of different kinds of nodes as their + # "target" (i.e., the left-hand side of the assignment). When compiling + # these nodes we typically need to first fetch the current value of the + # variable, then perform some kind of action, then store the result back + # into the variable. This method handles that by first fetching the value, + # then yielding to the block, then storing the result. + def with_opassign(node) + case node.target + when ARefField + iseq.putnil + visit(node.target.collection) + visit(node.target.index) + + iseq.dupn(2) + iseq.send(YARV.calldata(:[], 1)) + + yield + + iseq.setn(3) + iseq.send(YARV.calldata(:[]=, 2)) + iseq.pop + when ConstPathField + name = node.target.constant.value.to_sym + + visit(node.target.parent) + iseq.dup + iseq.putobject(true) + iseq.getconstant(name) + + yield + + if node.operator.value == "&&=" + iseq.dupn(2) + else + iseq.swap + iseq.topn(1) + end + + iseq.swap + iseq.setconstant(name) + when TopConstField + name = node.target.constant.value.to_sym + + iseq.putobject(Object) + iseq.dup + iseq.putobject(true) + iseq.getconstant(name) + + yield + + if node.operator.value == "&&=" + iseq.dupn(2) + else + iseq.swap + iseq.topn(1) + end + + iseq.swap + iseq.setconstant(name) + when VarField + case node.target.value + when Const + names = constant_names(node.target) + iseq.opt_getconstant_path(names) + + yield + + iseq.dup + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + iseq.setconstant(names.last) + when CVar + name = node.target.value.value.to_sym + iseq.getclassvariable(name) + + yield + + iseq.dup + iseq.setclassvariable(name) + when GVar + name = node.target.value.value.to_sym + iseq.getglobal(name) + + yield + + iseq.dup + iseq.setglobal(name) + when Ident + local_variable = visit(node.target) + iseq.getlocal(local_variable.index, local_variable.level) + + yield + + iseq.dup + iseq.setlocal(local_variable.index, local_variable.level) + when IVar + name = node.target.value.value.to_sym + iseq.getinstancevariable(name) + + yield + + iseq.dup + iseq.setinstancevariable(name) + end + end + end + end + end +end diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb new file mode 100644 index 00000000..d606e3cc --- /dev/null +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -0,0 +1,256 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This class is responsible for taking a compiled instruction sequence and + # walking through it to generate equivalent Ruby code. + class Disassembler + # When we're disassmebling, we use a looped case statement to emulate + # jumping around in the same way the virtual machine would. This class + # provides convenience methods for generating the AST nodes that have to + # do with that label. + class DisasmLabel + include DSL + attr_reader :name + + def initialize(name) + @name = name + end + + def field + VarField(Ident(name)) + end + + def ref + VarRef(Ident(name)) + end + end + + include DSL + attr_reader :iseq, :disasm_label + + def initialize(iseq) + @iseq = iseq + @disasm_label = DisasmLabel.new("__disasm_label") + end + + def to_ruby + Program(disassemble(iseq)) + end + + private + + def node_for(value) + case value + when Integer + Int(value.to_s) + when Symbol + SymbolLiteral(Ident(value.to_s)) + end + end + + def disassemble(iseq) + label = :label_0 + clauses = {} + clause = [] + + iseq.insns.each do |insn| + case insn + when Symbol + if insn.start_with?("label_") + unless clause.last.is_a?(Next) + clause << Assign(disasm_label.field, node_for(insn)) + end + + clauses[label] = clause + clause = [] + label = insn + end + when BranchUnless + body = [ + Assign(disasm_label.field, node_for(insn.label)), + Next(Args([])) + ] + + clause << IfNode(clause.pop, Statements(body), nil) + when Dup + clause << clause.last + when DupHash + assocs = + insn.object.map do |key, value| + Assoc(node_for(key), node_for(value)) + end + + clause << HashLiteral(LBrace("{"), assocs) + when GetGlobal + clause << VarRef(GVar(insn.name.to_s)) + when GetLocalWC0 + local = iseq.local_table.locals[insn.index] + clause << VarRef(Ident(local.name.to_s)) + when Jump + clause << Assign(disasm_label.field, node_for(insn.label)) + clause << Next(Args([])) + when Leave + value = Args([clause.pop]) + clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) + when OptAnd, OptDiv, OptEq, OptGE, OptGT, OptLE, OptLT, OptLTLT, + OptMinus, OptMod, OptMult, OptOr, OptPlus + left, right = clause.pop(2) + clause << Binary(left, insn.calldata.method, right) + when OptAref + collection, arg = clause.pop(2) + clause << ARef(collection, Args([arg])) + when OptAset + collection, arg, value = clause.pop(3) + + clause << if value.is_a?(Binary) && value.left.is_a?(ARef) && + collection === value.left.collection && + arg === value.left.index.parts[0] + OpAssign( + ARefField(collection, Args([arg])), + Op("#{value.operator}="), + value.right + ) + else + Assign(ARefField(collection, Args([arg])), value) + end + when OptNEq + left, right = clause.pop(2) + clause << Binary(left, :"!=", right) + when OptSendWithoutBlock + method = insn.calldata.method.to_s + argc = insn.calldata.argc + + if insn.calldata.flag?(CallData::CALL_FCALL) + if argc == 0 + clause.pop + clause << CallNode(nil, nil, Ident(method), Args([])) + elsif argc == 1 && method.end_with?("=") + _receiver, argument = clause.pop(2) + clause << Assign( + CallNode(nil, nil, Ident(method[0..-2]), nil), + argument + ) + else + _receiver, *arguments = clause.pop(argc + 1) + clause << CallNode( + nil, + nil, + Ident(method), + ArgParen(Args(arguments)) + ) + end + else + if argc == 0 + clause << CallNode(clause.pop, Period("."), Ident(method), nil) + elsif argc == 1 && method.end_with?("=") + receiver, argument = clause.pop(2) + clause << Assign( + CallNode(receiver, Period("."), Ident(method[0..-2]), nil), + argument + ) + else + receiver, *arguments = clause.pop(argc + 1) + clause << CallNode( + receiver, + Period("."), + Ident(method), + ArgParen(Args(arguments)) + ) + end + end + when PutObject + case insn.object + when Float + clause << FloatLiteral(insn.object.inspect) + when Integer + clause << Int(insn.object.inspect) + else + raise "Unknown object type: #{insn.object.class.name}" + end + when PutObjectInt2Fix0 + clause << Int("0") + when PutObjectInt2Fix1 + clause << Int("1") + when PutSelf + clause << VarRef(Kw("self")) + when SetGlobal + target = GVar(insn.name.to_s) + value = clause.pop + + clause << if value.is_a?(Binary) && VarRef(target) === value.left + OpAssign(VarField(target), Op("#{value.operator}="), value.right) + else + Assign(VarField(target), value) + end + when SetLocalWC0 + target = Ident(local_name(insn.index, 0)) + value = clause.pop + + clause << if value.is_a?(Binary) && VarRef(target) === value.left + OpAssign(VarField(target), Op("#{value.operator}="), value.right) + else + Assign(VarField(target), value) + end + else + raise "Unknown instruction #{insn[0]}" + end + end + + # If there's only one clause, then we don't need a case statement, and + # we can just disassemble the first clause. + clauses[label] = clause + return Statements(clauses.values.first) if clauses.size == 1 + + # Here we're going to build up a big case statement that will handle all + # of the different labels. + current = nil + clauses.reverse_each do |current_label, current_clause| + current = + When( + Args([node_for(current_label)]), + Statements(current_clause), + current + ) + end + switch = Case(Kw("case"), disasm_label.ref, current) + + # Here we're going to make sure that any locals that were established in + # the label_0 block are initialized so that scoping rules work + # correctly. + stack = [] + locals = [disasm_label.name] + + clauses[:label_0].each do |node| + if node.is_a?(Assign) && node.target.is_a?(VarField) && + node.target.value.is_a?(Ident) + value = node.target.value.value + next if locals.include?(value) + + stack << Assign(node.target, VarRef(Kw("nil"))) + locals << value + end + end + + # Finally, we'll set up the initial label and loop the entire case + # statement. + stack << Assign(disasm_label.field, node_for(:label_0)) + stack << MethodAddBlock( + CallNode(nil, nil, Ident("loop"), Args([])), + BlockNode( + Kw("do"), + nil, + BodyStmt(Statements([switch]), nil, nil, nil, nil) + ) + ) + Statements(stack) + end + + def local_name(index, level) + current = iseq + level.times { current = current.parent_iseq } + current.local_table.locals[index].name.to_s + end + end + end +end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb new file mode 100644 index 00000000..411f4692 --- /dev/null +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -0,0 +1,675 @@ +# frozen_string_literal: true + +module SyntaxTree + # This module provides an object representation of the YARV bytecode. + module YARV + # This class is meant to mirror RubyVM::InstructionSequence. It contains a + # list of instructions along with the metadata pertaining to them. It also + # functions as a builder for the instruction sequence. + class InstructionSequence + MAGIC = "YARVInstructionSequence/SimpleDataFormat" + + # This provides a handle to the rb_iseq_load function, which allows you to + # pass a serialized iseq to Ruby and have it return a + # RubyVM::InstructionSequence object. + ISEQ_LOAD = + begin + Fiddle::Function.new( + Fiddle::Handle::DEFAULT["rb_iseq_load"], + [Fiddle::TYPE_VOIDP] * 3, + Fiddle::TYPE_VOIDP + ) + rescue NameError + end + + # This object is used to track the size of the stack at any given time. It + # is effectively a mini symbolic interpreter. It's necessary because when + # instruction sequences get serialized they include a :stack_max field on + # them. This field is used to determine how much stack space to allocate + # for the instruction sequence. + class Stack + attr_reader :current_size, :maximum_size + + def initialize + @current_size = 0 + @maximum_size = 0 + end + + def change_by(value) + @current_size += value + @maximum_size = @current_size if @current_size > @maximum_size + end + end + + # The type of the instruction sequence. + attr_reader :type + + # The name of the instruction sequence. + attr_reader :name + + # The parent instruction sequence, if there is one. + attr_reader :parent_iseq + + # The location of the root node of this instruction sequence. + attr_reader :location + + # This is the list of information about the arguments to this + # instruction sequence. + attr_accessor :argument_size + attr_reader :argument_options + + # The list of instructions for this instruction sequence. + attr_reader :insns + + # The table of local variables. + attr_reader :local_table + + # The hash of names of instance and class variables pointing to the + # index of their associated inline storage. + attr_reader :inline_storages + + # The index of the next inline storage that will be created. + attr_reader :storage_index + + # An object that will track the current size of the stack and the + # maximum size of the stack for this instruction sequence. + attr_reader :stack + + # These are various compilation options provided. + attr_reader :frozen_string_literal, + :operands_unification, + :specialized_instruction + + def initialize( + type, + name, + parent_iseq, + location, + frozen_string_literal: false, + operands_unification: true, + specialized_instruction: true + ) + @type = type + @name = name + @parent_iseq = parent_iseq + @location = location + + @argument_size = 0 + @argument_options = {} + + @local_table = LocalTable.new + @inline_storages = {} + @insns = [] + @storage_index = 0 + @stack = Stack.new + + @frozen_string_literal = frozen_string_literal + @operands_unification = operands_unification + @specialized_instruction = specialized_instruction + end + + ########################################################################## + # Query methods + ########################################################################## + + def local_variable(name, level = 0) + if (lookup = local_table.find(name, level)) + lookup + elsif parent_iseq + parent_iseq.local_variable(name, level + 1) + end + end + + def inline_storage + storage = storage_index + @storage_index += 1 + storage + end + + def inline_storage_for(name) + inline_storages[name] = inline_storage unless inline_storages.key?(name) + + inline_storages[name] + end + + def length + insns.inject(0) do |sum, insn| + case insn + when Integer, Symbol + sum + else + sum + insn.length + end + end + end + + def eval + raise "Unsupported platform" if ISEQ_LOAD.nil? + compiled = to_a + + # Temporary hack until we get these working. + compiled[4][:node_id] = 11 + compiled[4][:node_ids] = [1, 0, 3, 2, 6, 7, 9, -1] + + Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)).eval + end + + def to_a + versions = RUBY_VERSION.split(".").map(&:to_i) + + [ + MAGIC, + versions[0], + versions[1], + 1, + { + arg_size: argument_size, + local_size: local_table.size, + stack_max: stack.maximum_size + }, + name, + "", + "", + location.start_line, + type, + local_table.names, + argument_options, + [], + insns.map do |insn| + insn.is_a?(Integer) || insn.is_a?(Symbol) ? insn : insn.to_a(self) + end + ] + end + + ########################################################################## + # Child instruction sequence methods + ########################################################################## + + def child_iseq(type, name, location) + InstructionSequence.new( + type, + name, + self, + location, + frozen_string_literal: frozen_string_literal, + operands_unification: operands_unification, + specialized_instruction: specialized_instruction + ) + end + + def block_child_iseq(location) + current = self + current = current.parent_iseq while current.type == :block + child_iseq(:block, "block in #{current.name}", location) + end + + def class_child_iseq(name, location) + child_iseq(:class, "", location) + end + + def method_child_iseq(name, location) + child_iseq(:method, name, location) + end + + def module_child_iseq(name, location) + child_iseq(:class, "", location) + end + + def singleton_class_child_iseq(location) + child_iseq(:class, "singleton class", location) + end + + ########################################################################## + # Instruction push methods + ########################################################################## + + def push(insn) + insns << insn + + case insn + when Integer, Symbol, Array + insn + else + stack.change_by(-insn.pops + insn.pushes) + insn + end + end + + # This creates a new label at the current length of the instruction + # sequence. It is used as the operand for jump instructions. + def label + name = :"label_#{length}" + insns.last == name ? name : event(name) + end + + def event(name) + push(name) + end + + def adjuststack(number) + push(AdjustStack.new(number)) + end + + def anytostring + push(AnyToString.new) + end + + def branchif(label) + push(BranchIf.new(label)) + end + + def branchnil(label) + push(BranchNil.new(label)) + end + + def branchunless(label) + push(BranchUnless.new(label)) + end + + def checkkeyword(keyword_bits_index, keyword_index) + push(CheckKeyword.new(keyword_bits_index, keyword_index)) + end + + def checkmatch(type) + push(CheckMatch.new(type)) + end + + def checktype(type) + push(CheckType.new(type)) + end + + def concatarray + push(ConcatArray.new) + end + + def concatstrings(number) + push(ConcatStrings.new(number)) + end + + def defined(type, name, message) + push(Defined.new(type, name, message)) + end + + def defineclass(name, class_iseq, flags) + push(DefineClass.new(name, class_iseq, flags)) + end + + def definemethod(name, method_iseq) + push(DefineMethod.new(name, method_iseq)) + end + + def definesmethod(name, method_iseq) + push(DefineSMethod.new(name, method_iseq)) + end + + def dup + push(Dup.new) + end + + def duparray(object) + push(DupArray.new(object)) + end + + def duphash(object) + push(DupHash.new(object)) + end + + def dupn(number) + push(DupN.new(number)) + end + + def expandarray(length, flags) + push(ExpandArray.new(length, flags)) + end + + def getblockparam(index, level) + push(GetBlockParam.new(index, level)) + end + + def getblockparamproxy(index, level) + push(GetBlockParamProxy.new(index, level)) + end + + def getclassvariable(name) + if RUBY_VERSION < "3.0" + push(Legacy::GetClassVariable.new(name)) + else + push(GetClassVariable.new(name, inline_storage_for(name))) + end + end + + def getconstant(name) + push(GetConstant.new(name)) + end + + def getglobal(name) + push(GetGlobal.new(name)) + end + + def getinstancevariable(name) + if RUBY_VERSION < "3.2" + push(GetInstanceVariable.new(name, inline_storage_for(name))) + else + push(GetInstanceVariable.new(name, inline_storage)) + end + end + + def getlocal(index, level) + if operands_unification + # Specialize the getlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will look at the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + push(GetLocalWC0.new(index)) + when 1 + push(GetLocalWC1.new(index)) + else + push(GetLocal.new(index, level)) + end + else + push(GetLocal.new(index, level)) + end + end + + def getspecial(key, type) + push(GetSpecial.new(key, type)) + end + + def intern + push(Intern.new) + end + + def invokeblock(calldata) + push(InvokeBlock.new(calldata)) + end + + def invokesuper(calldata, block_iseq) + push(InvokeSuper.new(calldata, block_iseq)) + end + + def jump(label) + push(Jump.new(label)) + end + + def leave + push(Leave.new) + end + + def newarray(number) + push(NewArray.new(number)) + end + + def newarraykwsplat(number) + push(NewArrayKwSplat.new(number)) + end + + def newhash(number) + push(NewHash.new(number)) + end + + def newrange(exclude_end) + push(NewRange.new(exclude_end)) + end + + def nop + push(Nop.new) + end + + def objtostring(calldata) + push(ObjToString.new(calldata)) + end + + def once(iseq, cache) + push(Once.new(iseq, cache)) + end + + def opt_aref_with(object, calldata) + push(OptArefWith.new(object, calldata)) + end + + def opt_aset_with(object, calldata) + push(OptAsetWith.new(object, calldata)) + end + + def opt_getconstant_path(names) + if RUBY_VERSION < "3.2" + cache = inline_storage + getinlinecache = opt_getinlinecache(-1, cache) + + if names[0] == :"" + names.shift + pop + putobject(Object) + end + + names.each_with_index do |name, index| + putobject(index == 0) + getconstant(name) + end + + opt_setinlinecache(cache) + getinlinecache.patch!(self) + else + push(OptGetConstantPath.new(names)) + end + end + + def opt_getinlinecache(label, cache) + push(Legacy::OptGetInlineCache.new(label, cache)) + end + + def opt_newarray_max(length) + if specialized_instruction + push(OptNewArrayMax.new(length)) + else + newarray(length) + send(YARV.calldata(:max)) + end + end + + def opt_newarray_min(length) + if specialized_instruction + push(OptNewArrayMin.new(length)) + else + newarray(length) + send(YARV.calldata(:min)) + end + end + + def opt_setinlinecache(cache) + push(Legacy::OptSetInlineCache.new(cache)) + end + + def opt_str_freeze(object) + if specialized_instruction + push(OptStrFreeze.new(object, YARV.calldata(:freeze))) + else + putstring(object) + send(YARV.calldata(:freeze)) + end + end + + def opt_str_uminus(object) + if specialized_instruction + push(OptStrUMinus.new(object, YARV.calldata(:-@))) + else + putstring(object) + send(YARV.calldata(:-@)) + end + end + + def pop + push(Pop.new) + end + + def putnil + push(PutNil.new) + end + + def putobject(object) + if operands_unification + # Specialize the putobject instruction based on the value of the + # object. If it's 0 or 1, then there's a specialized instruction + # that will push the object onto the stack and requires fewer + # operands. + if object.eql?(0) + push(PutObjectInt2Fix0.new) + elsif object.eql?(1) + push(PutObjectInt2Fix1.new) + else + push(PutObject.new(object)) + end + else + push(PutObject.new(object)) + end + end + + def putself + push(PutSelf.new) + end + + def putspecialobject(object) + push(PutSpecialObject.new(object)) + end + + def putstring(object) + push(PutString.new(object)) + end + + def send(calldata, block_iseq = nil) + if specialized_instruction && !block_iseq && + !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) + # Specialize the send instruction. If it doesn't have a block + # attached, then we will replace it with an opt_send_without_block + # and do further specializations based on the called method and the + # number of arguments. + case [calldata.method, calldata.argc] + when [:length, 0] + push(OptLength.new(calldata)) + when [:size, 0] + push(OptSize.new(calldata)) + when [:empty?, 0] + push(OptEmptyP.new(calldata)) + when [:nil?, 0] + push(OptNilP.new(calldata)) + when [:succ, 0] + push(OptSucc.new(calldata)) + when [:!, 0] + push(OptNot.new(calldata)) + when [:+, 1] + push(OptPlus.new(calldata)) + when [:-, 1] + push(OptMinus.new(calldata)) + when [:*, 1] + push(OptMult.new(calldata)) + when [:/, 1] + push(OptDiv.new(calldata)) + when [:%, 1] + push(OptMod.new(calldata)) + when [:==, 1] + push(OptEq.new(calldata)) + when [:!=, 1] + push(OptNEq.new(YARV.calldata(:==, 1), calldata)) + when [:=~, 1] + push(OptRegExpMatch2.new(calldata)) + when [:<, 1] + push(OptLT.new(calldata)) + when [:<=, 1] + push(OptLE.new(calldata)) + when [:>, 1] + push(OptGT.new(calldata)) + when [:>=, 1] + push(OptGE.new(calldata)) + when [:<<, 1] + push(OptLTLT.new(calldata)) + when [:[], 1] + push(OptAref.new(calldata)) + when [:&, 1] + push(OptAnd.new(calldata)) + when [:|, 1] + push(OptOr.new(calldata)) + when [:[]=, 2] + push(OptAset.new(calldata)) + else + push(OptSendWithoutBlock.new(calldata)) + end + else + push(Send.new(calldata, block_iseq)) + end + end + + def setblockparam(index, level) + push(SetBlockParam.new(index, level)) + end + + def setclassvariable(name) + if RUBY_VERSION < "3.0" + push(Legacy::SetClassVariable.new(name)) + else + push(SetClassVariable.new(name, inline_storage_for(name))) + end + end + + def setconstant(name) + push(SetConstant.new(name)) + end + + def setglobal(name) + push(SetGlobal.new(name)) + end + + def setinstancevariable(name) + if RUBY_VERSION < "3.2" + push(SetInstanceVariable.new(name, inline_storage_for(name))) + else + push(SetInstanceVariable.new(name, inline_storage)) + end + end + + def setlocal(index, level) + if operands_unification + # Specialize the setlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will write to the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + push(SetLocalWC0.new(index)) + when 1 + push(SetLocalWC1.new(index)) + else + push(SetLocal.new(index, level)) + end + else + push(SetLocal.new(index, level)) + end + end + + def setn(number) + push(SetN.new(number)) + end + + def setspecial(key) + push(SetSpecial.new(key)) + end + + def splatarray(flag) + push(SplatArray.new(flag)) + end + + def swap + push(Swap.new) + end + + def topn(number) + push(TopN.new(number)) + end + + def toregexp(options, length) + push(ToRegExp.new(options, length)) + end + end + end +end diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb new file mode 100644 index 00000000..5a23bbf0 --- /dev/null +++ b/lib/syntax_tree/yarv/instructions.rb @@ -0,0 +1,3607 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This is an operand to various YARV instructions that represents the + # information about a specific call site. + class CallData + CALL_ARGS_SPLAT = 1 << 0 + CALL_ARGS_BLOCKARG = 1 << 1 + CALL_FCALL = 1 << 2 + CALL_VCALL = 1 << 3 + CALL_ARGS_SIMPLE = 1 << 4 + CALL_BLOCKISEQ = 1 << 5 + CALL_KWARG = 1 << 6 + CALL_KW_SPLAT = 1 << 7 + CALL_TAILCALL = 1 << 8 + CALL_SUPER = 1 << 9 + CALL_ZSUPER = 1 << 10 + CALL_OPT_SEND = 1 << 11 + CALL_KW_SPLAT_MUT = 1 << 12 + + attr_reader :method, :argc, :flags, :kw_arg + + def initialize( + method, + argc = 0, + flags = CallData::CALL_ARGS_SIMPLE, + kw_arg = nil + ) + @method = method + @argc = argc + @flags = flags + @kw_arg = kw_arg + end + + def flag?(mask) + (flags & mask) > 0 + end + + def to_h + result = { mid: method, flag: flags, orig_argc: argc } + result[:kw_arg] = kw_arg if kw_arg + result + end + end + + # A convenience method for creating a CallData object. + def self.calldata( + method, + argc = 0, + flags = CallData::CALL_ARGS_SIMPLE, + kw_arg = nil + ) + CallData.new(method, argc, flags, kw_arg) + end + + # ### Summary + # + # `adjuststack` accepts a single integer argument and removes that many + # elements from the top of the stack. + # + # ### Usage + # + # ~~~ruby + # x = [true] + # x[0] ||= nil + # x[0] + # ~~~ + # + class AdjustStack + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:adjuststack, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 0 + end + end + + # ### Summary + # + # `anytostring` ensures that the value on top of the stack is a string. + # + # It pops two values off the stack. If the first value is a string it + # pushes it back on the stack. If the first value is not a string, it uses + # Ruby's built in string coercion to coerce the second value to a string + # and then pushes that back on the stack. + # + # This is used in conjunction with `objtostring` as a fallback for when an + # object's `to_s` method does not return a string. + # + # ### Usage + # + # ~~~ruby + # "#{5}" + # ~~~ + # + class AnyToString + def to_a(_iseq) + [:anytostring] + end + + def length + 1 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `branchif` has one argument: the jump index. It pops one value off the + # stack: the jump condition. + # + # If the value popped off the stack is true, `branchif` jumps to + # the jump index and continues executing there. + # + # ### Usage + # + # ~~~ruby + # x = true + # x ||= "foo" + # puts x + # ~~~ + # + class BranchIf + attr_reader :label + + def initialize(label) + @label = label + end + + def patch!(iseq) + @label = iseq.label + end + + def to_a(_iseq) + [:branchif, label] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `branchnil` has one argument: the jump index. It pops one value off the + # stack: the jump condition. + # + # If the value popped off the stack is nil, `branchnil` jumps to + # the jump index and continues executing there. + # + # ### Usage + # + # ~~~ruby + # x = nil + # if x&.to_s + # puts "hi" + # end + # ~~~ + # + class BranchNil + attr_reader :label + + def initialize(label) + @label = label + end + + def patch!(iseq) + @label = iseq.label + end + + def to_a(_iseq) + [:branchnil, label] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `branchunless` has one argument: the jump index. It pops one value off + # the stack: the jump condition. + # + # If the value popped off the stack is false or nil, `branchunless` jumps + # to the jump index and continues executing there. + # + # ### Usage + # + # ~~~ruby + # if 2 + 3 + # puts "foo" + # end + # ~~~ + # + class BranchUnless + attr_reader :label + + def initialize(label) + @label = label + end + + def patch!(iseq) + @label = iseq.label + end + + def to_a(_iseq) + [:branchunless, label] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `checkkeyword` checks if a keyword was passed at the callsite that + # called into the method represented by the instruction sequence. It has + # two arguments: the index of the local variable that stores the keywords + # metadata and the index of the keyword within that metadata. It pushes + # a boolean onto the stack indicating whether or not the keyword was + # given. + # + # ### Usage + # + # ~~~ruby + # def evaluate(value: rand) + # value + # end + # + # evaluate(value: 3) + # ~~~ + # + class CheckKeyword + attr_reader :keyword_bits_index, :keyword_index + + def initialize(keyword_bits_index, keyword_index) + @keyword_bits_index = keyword_bits_index + @keyword_index = keyword_index + end + + def patch!(iseq) + @label = iseq.label + end + + def to_a(iseq) + [ + :checkkeyword, + iseq.local_table.offset(keyword_bits_index), + keyword_index + ] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `checkmatch` checks if the current pattern matches the current value. It + # pops the target and the pattern off the stack and pushes a boolean onto + # the stack if it matches or not. + # + # ### Usage + # + # ~~~ruby + # foo in Foo + # ~~~ + # + class CheckMatch + TYPE_WHEN = 1 + TYPE_CASE = 2 + TYPE_RESCUE = 3 + + attr_reader :type + + def initialize(type) + @type = type + end + + def to_a(_iseq) + [:checkmatch, type] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `checktype` checks if the value on top of the stack is of a certain type. + # The type is the only argument. It pops the value off the stack and pushes + # a boolean onto the stack indicating whether or not the value is of the + # given type. + # + # ### Usage + # + # ~~~ruby + # foo in [bar] + # ~~~ + # + class CheckType + TYPE_OBJECT = 0x01 + TYPE_CLASS = 0x02 + TYPE_MODULE = 0x03 + TYPE_FLOAT = 0x04 + TYPE_STRING = 0x05 + TYPE_REGEXP = 0x06 + TYPE_ARRAY = 0x07 + TYPE_HASH = 0x08 + TYPE_STRUCT = 0x09 + TYPE_BIGNUM = 0x0a + TYPE_FILE = 0x0b + TYPE_DATA = 0x0c + TYPE_MATCH = 0x0d + TYPE_COMPLEX = 0x0e + TYPE_RATIONAL = 0x0f + TYPE_NIL = 0x11 + TYPE_TRUE = 0x12 + TYPE_FALSE = 0x13 + TYPE_SYMBOL = 0x14 + TYPE_FIXNUM = 0x15 + TYPE_UNDEF = 0x16 + + attr_reader :type + + def initialize(type) + @type = type + end + + def to_a(_iseq) + [:checktype, type] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + # TODO: This is incorrect. The instruction only pushes a single value + # onto the stack. However, if this is set to 1, we no longer match the + # output of RubyVM::InstructionSequence. So leaving this here until we + # can investigate further. + 2 + end + end + + # ### Summary + # + # `concatarray` concatenates the two Arrays on top of the stack. + # + # It coerces the two objects at the top of the stack into Arrays by + # calling `to_a` if necessary, and makes sure to `dup` the first Array if + # it was already an Array, to avoid mutating it when concatenating. + # + # ### Usage + # + # ~~~ruby + # [1, *2] + # ~~~ + # + class ConcatArray + def to_a(_iseq) + [:concatarray] + end + + def length + 1 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `concatstrings` pops a number of strings from the stack joins them + # together into a single string and pushes that string back on the stack. + # + # This does no coercion and so is always used in conjunction with + # `objtostring` and `anytostring` to ensure the stack contents are always + # strings. + # + # ### Usage + # + # ~~~ruby + # "#{5}" + # ~~~ + # + class ConcatStrings + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:concatstrings, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + end + + # ### Summary + # + # `defineclass` defines a class. First it pops the superclass off the + # stack, then it pops the object off the stack that the class should be + # defined under. It has three arguments: the name of the constant, the + # instruction sequence associated with the class, and various flags that + # indicate if it is a singleton class, a module, or a regular class. + # + # ### Usage + # + # ~~~ruby + # class Foo + # end + # ~~~ + # + class DefineClass + TYPE_CLASS = 0 + TYPE_SINGLETON_CLASS = 1 + TYPE_MODULE = 2 + FLAG_SCOPED = 8 + FLAG_HAS_SUPERCLASS = 16 + + attr_reader :name, :class_iseq, :flags + + def initialize(name, class_iseq, flags) + @name = name + @class_iseq = class_iseq + @flags = flags + end + + def to_a(_iseq) + [:defineclass, name, class_iseq.to_a, flags] + end + + def length + 4 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `defined` checks if the top value of the stack is defined. If it is, it + # pushes its value onto the stack. Otherwise it pushes `nil`. + # + # ### Usage + # + # ~~~ruby + # defined?(x) + # ~~~ + # + class Defined + TYPE_NIL = 1 + TYPE_IVAR = 2 + TYPE_LVAR = 3 + TYPE_GVAR = 4 + TYPE_CVAR = 5 + TYPE_CONST = 6 + TYPE_METHOD = 7 + TYPE_YIELD = 8 + TYPE_ZSUPER = 9 + TYPE_SELF = 10 + TYPE_TRUE = 11 + TYPE_FALSE = 12 + TYPE_ASGN = 13 + TYPE_EXPR = 14 + TYPE_REF = 15 + TYPE_FUNC = 16 + TYPE_CONST_FROM = 17 + + attr_reader :type, :name, :message + + def initialize(type, name, message) + @type = type + @name = name + @message = message + end + + def to_a(_iseq) + [:defined, type, name, message] + end + + def length + 4 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `definemethod` defines a method on the class of the current value of + # `self`. It accepts two arguments. The first is the name of the method + # being defined. The second is the instruction sequence representing the + # body of the method. + # + # ### Usage + # + # ~~~ruby + # def value = "value" + # ~~~ + # + class DefineMethod + attr_reader :name, :method_iseq + + def initialize(name, method_iseq) + @name = name + @method_iseq = method_iseq + end + + def to_a(_iseq) + [:definemethod, name, method_iseq.to_a] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `definesmethod` defines a method on the singleton class of the current + # value of `self`. It accepts two arguments. The first is the name of the + # method being defined. The second is the instruction sequence representing + # the body of the method. It pops the object off the stack that the method + # should be defined on. + # + # ### Usage + # + # ~~~ruby + # def self.value = "value" + # ~~~ + # + class DefineSMethod + attr_reader :name, :method_iseq + + def initialize(name, method_iseq) + @name = name + @method_iseq = method_iseq + end + + def to_a(_iseq) + [:definesmethod, name, method_iseq.to_a] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `dup` copies the top value of the stack and pushes it onto the stack. + # + # ### Usage + # + # ~~~ruby + # $global = 5 + # ~~~ + # + class Dup + def to_a(_iseq) + [:dup] + end + + def length + 1 + end + + def pops + 1 + end + + def pushes + 2 + end + end + + # ### Summary + # + # `duparray` dups an Array literal and pushes it onto the stack. + # + # ### Usage + # + # ~~~ruby + # [true] + # ~~~ + # + class DupArray + attr_reader :object + + def initialize(object) + @object = object + end + + def to_a(_iseq) + [:duparray, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `duphash` dups a Hash literal and pushes it onto the stack. + # + # ### Usage + # + # ~~~ruby + # { a: 1 } + # ~~~ + # + class DupHash + attr_reader :object + + def initialize(object) + @object = object + end + + def to_a(_iseq) + [:duphash, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `dupn` duplicates the top `n` stack elements. + # + # ### Usage + # + # ~~~ruby + # Object::X ||= true + # ~~~ + # + class DupN + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:dupn, number] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + number + end + end + + # ### Summary + # + # `expandarray` looks at the top of the stack, and if the value is an array + # it replaces it on the stack with `number` elements of the array, or `nil` + # if the elements are missing. + # + # ### Usage + # + # ~~~ruby + # x, = [true, false, nil] + # ~~~ + # + class ExpandArray + attr_reader :number, :flags + + def initialize(number, flags) + @number = number + @flags = flags + end + + def to_a(_iseq) + [:expandarray, number, flags] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + number + end + end + + # ### Summary + # + # `getblockparam` is a similar instruction to `getlocal` in that it looks + # for a local variable in the current instruction sequence's local table and + # walks recursively up the parent instruction sequences until it finds it. + # The local it retrieves, however, is a special block local that was passed + # to the current method. It pushes the value of the block local onto the + # stack. + # + # ### Usage + # + # ~~~ruby + # def foo(&block) + # block + # end + # ~~~ + # + class GetBlockParam + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def to_a(iseq) + current = iseq + level.times { current = iseq.parent_iseq } + [:getblockparam, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getblockparamproxy` is almost the same as `getblockparam` except that it + # pushes a proxy object onto the stack instead of the actual value of the + # block local. This is used when a method is being called on the block + # local. + # + # ### Usage + # + # ~~~ruby + # def foo(&block) + # block.call + # end + # ~~~ + # + class GetBlockParamProxy + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def to_a(iseq) + current = iseq + level.times { current = iseq.parent_iseq } + [:getblockparamproxy, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getclassvariable` looks for a class variable in the current class and + # pushes its value onto the stack. It uses an inline cache to reduce the + # need to lookup the class variable in the class hierarchy every time. + # + # ### Usage + # + # ~~~ruby + # @@class_variable + # ~~~ + # + class GetClassVariable + attr_reader :name, :cache + + def initialize(name, cache) + @name = name + @cache = cache + end + + def to_a(_iseq) + [:getclassvariable, name, cache] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getconstant` performs a constant lookup and pushes the value of the + # constant onto the stack. It pops both the class it should look in and + # whether or not it should look globally as well. + # + # ### Usage + # + # ~~~ruby + # Constant + # ~~~ + # + class GetConstant + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:getconstant, name] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getglobal` pushes the value of a global variables onto the stack. + # + # ### Usage + # + # ~~~ruby + # $$ + # ~~~ + # + class GetGlobal + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:getglobal, name] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getinstancevariable` pushes the value of an instance variable onto the + # stack. It uses an inline cache to avoid having to look up the instance + # variable in the class hierarchy every time. + # + # This instruction has two forms, but both have the same structure. Before + # Ruby 3.2, the inline cache corresponded to both the get and set + # instructions and could be shared. Since Ruby 3.2, it uses object shapes + # instead so the caches are unique per instruction. + # + # ### Usage + # + # ~~~ruby + # @instance_variable + # ~~~ + # + class GetInstanceVariable + attr_reader :name, :cache + + def initialize(name, cache) + @name = name + @cache = cache + end + + def to_a(_iseq) + [:getinstancevariable, name, cache] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getlocal_WC_0` is a specialized version of the `getlocal` instruction. It + # fetches the value of a local variable from the current frame determined by + # the index given as its only argument. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # value + # ~~~ + # + class GetLocalWC0 + attr_reader :index + + def initialize(index) + @index = index + end + + def to_a(iseq) + [:getlocal_WC_0, iseq.local_table.offset(index)] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getlocal_WC_1` is a specialized version of the `getlocal` instruction. It + # fetches the value of a local variable from the parent frame determined by + # the index given as its only argument. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # self.then { value } + # ~~~ + # + class GetLocalWC1 + attr_reader :index + + def initialize(index) + @index = index + end + + def to_a(iseq) + [:getlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getlocal` fetches the value of a local variable from a frame determined + # by the level and index arguments. The level is the number of frames back + # to look and the index is the index in the local table. It pushes the value + # it finds onto the stack. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # tap { tap { value } } + # ~~~ + # + class GetLocal + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def to_a(iseq) + current = iseq + level.times { current = current.parent_iseq } + [:getlocal, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getspecial` pushes the value of a special local variable onto the stack. + # + # ### Usage + # + # ~~~ruby + # [true] + # ~~~ + # + class GetSpecial + SVAR_LASTLINE = 0 # $_ + SVAR_BACKREF = 1 # $~ + SVAR_FLIPFLOP_START = 2 # flipflop + + attr_reader :key, :type + + def initialize(key, type) + @key = key + @type = type + end + + def to_a(_iseq) + [:getspecial, key, type] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `intern` converts the top element of the stack to a symbol and pushes the + # symbol onto the stack. + # + # ### Usage + # + # ~~~ruby + # :"#{"foo"}" + # ~~~ + # + class Intern + def to_a(_iseq) + [:intern] + end + + def length + 1 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `invokeblock` invokes the block given to the current method. It pops the + # arguments for the block off the stack and pushes the result of running the + # block onto the stack. + # + # ### Usage + # + # ~~~ruby + # def foo + # yield + # end + # ~~~ + # + class InvokeBlock + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:invokeblock, calldata.to_h] + end + + def length + 2 + end + + def pops + calldata.argc + end + + def pushes + 1 + end + end + + # ### Summary + # + # `invokesuper` is similar to the `send` instruction, except that it calls + # the super method. It pops the receiver and arguments off the stack and + # pushes the return value onto the stack. + # + # ### Usage + # + # ~~~ruby + # def foo + # super + # end + # ~~~ + # + class InvokeSuper + attr_reader :calldata, :block_iseq + + def initialize(calldata, block_iseq) + @calldata = calldata + @block_iseq = block_iseq + end + + def to_a(_iseq) + [:invokesuper, calldata.to_h, block_iseq&.to_a] + end + + def length + 1 + end + + def pops + argb = (calldata.flag?(CallData::CALL_ARGS_BLOCKARG) ? 1 : 0) + argb + calldata.argc + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `jump` unconditionally jumps to the label given as its only argument. + # + # ### Usage + # + # ~~~ruby + # x = 0 + # if x == 0 + # puts "0" + # else + # puts "2" + # end + # ~~~ + # + class Jump + attr_reader :label + + def initialize(label) + @label = label + end + + def patch!(iseq) + @label = iseq.label + end + + def to_a(_iseq) + [:jump, label] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `leave` exits the current frame. + # + # ### Usage + # + # ~~~ruby + # ;; + # ~~~ + # + class Leave + def to_a(_iseq) + [:leave] + end + + def length + 1 + end + + def pops + 1 + end + + def pushes + # TODO: This is wrong. It should be 1. But it's 0 for now because + # otherwise the stack size is incorrectly calculated. + 0 + end + end + + # ### Summary + # + # `newarray` puts a new array initialized with `number` values from the + # stack. It pops `number` values off the stack and pushes the array onto the + # stack. + # + # ### Usage + # + # ~~~ruby + # ["string"] + # ~~~ + # + class NewArray + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:newarray, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + end + + # ### Summary + # + # `newarraykwsplat` is a specialized version of `newarray` that takes a ** + # splat argument. It pops `number` values off the stack and pushes the array + # onto the stack. + # + # ### Usage + # + # ~~~ruby + # ["string", **{ foo: "bar" }] + # ~~~ + # + class NewArrayKwSplat + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:newarraykwsplat, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + end + + # ### Summary + # + # `newhash` puts a new hash onto the stack, using `number` elements from the + # stack. `number` needs to be even. It pops `number` elements off the stack + # and pushes a hash onto the stack. + # + # ### Usage + # + # ~~~ruby + # def foo(key, value) + # { key => value } + # end + # ~~~ + # + class NewHash + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:newhash, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + end + + # ### Summary + # + # `newrange` creates a new range object from the top two values on the + # stack. It pops both of them off, and then pushes on the new range. It + # takes one argument which is 0 if the end is included or 1 if the end value + # is excluded. + # + # ### Usage + # + # ~~~ruby + # x = 0 + # y = 1 + # p (x..y), (x...y) + # ~~~ + # + class NewRange + attr_reader :exclude_end + + def initialize(exclude_end) + @exclude_end = exclude_end + end + + def to_a(_iseq) + [:newrange, exclude_end] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `nop` is a no-operation instruction. It is used to pad the instruction + # sequence so there is a place for other instructions to jump to. + # + # ### Usage + # + # ~~~ruby + # raise rescue true + # ~~~ + # + class Nop + def to_a(_iseq) + [:nop] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `objtostring` pops a value from the stack, calls `to_s` on that value and + # then pushes the result back to the stack. + # + # It has various fast paths for classes like String, Symbol, Module, Class, + # etc. For everything else it calls `to_s`. + # + # ### Usage + # + # ~~~ruby + # "#{5}" + # ~~~ + # + class ObjToString + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:objtostring, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `once` is an instruction that wraps an instruction sequence and ensures + # that is it only ever executed once for the lifetime of the program. It + # uses a cache to ensure that it is only executed once. It pushes the result + # of running the instruction sequence onto the stack. + # + # ### Usage + # + # ~~~ruby + # END { puts "END" } + # ~~~ + # + class Once + attr_reader :iseq, :cache + + def initialize(iseq, cache) + @iseq = iseq + @cache = cache + end + + def to_a(_iseq) + [:once, iseq.to_a, cache] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_and` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `&` operator is used. There is a fast path for if + # both operands are integers. It pops both the receiver and the argument off + # the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 & 3 + # ~~~ + # + class OptAnd + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_and, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_aref` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `[]` operator is used. There are fast paths if the + # receiver is an integer, array, or hash. + # + # ### Usage + # + # ~~~ruby + # 7[2] + # ~~~ + # + class OptAref + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_aref, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_aref_with` is a specialization of the `opt_aref` instruction that + # occurs when the `[]` operator is used with a string argument known at + # compile time. There are fast paths if the receiver is a hash. It pops the + # receiver off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # { 'test' => true }['test'] + # ~~~ + # + class OptArefWith + attr_reader :object, :calldata + + def initialize(object, calldata) + @object = object + @calldata = calldata + end + + def to_a(_iseq) + [:opt_aref_with, object, calldata.to_h] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_aset` is an instruction for setting the hash value by the key in + # the `recv[obj] = set` format. It is a specialization of the + # `opt_send_without_block` instruction. It pops the receiver, the key, and + # the value off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # {}[:key] = value + # ~~~ + # + class OptAset + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_aset, calldata.to_h] + end + + def length + 2 + end + + def pops + 3 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_aset_with` is an instruction for setting the hash value by the known + # string key in the `recv[obj] = set` format. It pops the receiver and the + # value off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # {}["key"] = value + # ~~~ + # + class OptAsetWith + attr_reader :object, :calldata + + def initialize(object, calldata) + @object = object + @calldata = calldata + end + + def to_a(_iseq) + [:opt_aset_with, object, calldata.to_h] + end + + def length + 3 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_div` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `/` operator is used. There are fast paths for if + # both operands are integers, or if both operands are floats. It pops both + # the receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 / 3 + # ~~~ + # + class OptDiv + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_div, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_empty_p` is an optimization applied when the method `empty?` is + # called. It pops the receiver off the stack and pushes on the result of the + # method call. + # + # ### Usage + # + # ~~~ruby + # "".empty? + # ~~~ + # + class OptEmptyP + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_empty_p, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_eq` is a specialization of the `opt_send_without_block` instruction + # that occurs when the == operator is used. Fast paths exist when both + # operands are integers, floats, symbols or strings. It pops both the + # receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 == 2 + # ~~~ + # + class OptEq + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_eq, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_ge` is a specialization of the `opt_send_without_block` instruction + # that occurs when the >= operator is used. Fast paths exist when both + # operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 4 >= 3 + # ~~~ + # + class OptGE + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_ge, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_getconstant_path` performs a constant lookup on a chain of constant + # names. It accepts as its argument an array of constant names, and pushes + # the value of the constant onto the stack. + # + # ### Usage + # + # ~~~ruby + # ::Object + # ~~~ + # + class OptGetConstantPath + attr_reader :names + + def initialize(names) + @names = names + end + + def to_a(_iseq) + [:opt_getconstant_path, names] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_gt` is a specialization of the `opt_send_without_block` instruction + # that occurs when the > operator is used. Fast paths exist when both + # operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 4 > 3 + # ~~~ + # + class OptGT + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_gt, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_le` is a specialization of the `opt_send_without_block` instruction + # that occurs when the <= operator is used. Fast paths exist when both + # operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 3 <= 4 + # ~~~ + # + class OptLE + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_le, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_length` is a specialization of `opt_send_without_block`, when the + # `length` method is called. There are fast paths when the receiver is + # either a string, hash, or array. It pops the receiver off the stack and + # pushes on the result of the method call. + # + # ### Usage + # + # ~~~ruby + # "".length + # ~~~ + # + class OptLength + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_length, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_lt` is a specialization of the `opt_send_without_block` instruction + # that occurs when the < operator is used. Fast paths exist when both + # operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 3 < 4 + # ~~~ + # + class OptLT + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_lt, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_ltlt` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `<<` operator is used. Fast paths exists when the + # receiver is either a String or an Array. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # "" << 2 + # ~~~ + # + class OptLTLT + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_ltlt, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_minus` is a specialization of the `opt_send_without_block` + # instruction that occurs when the `-` operator is used. There are fast + # paths for if both operands are integers or if both operands are floats. It + # pops both the receiver and the argument off the stack and pushes on the + # result. + # + # ### Usage + # + # ~~~ruby + # 3 - 2 + # ~~~ + # + class OptMinus + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_minus, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_mod` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `%` operator is used. There are fast paths for if + # both operands are integers or if both operands are floats. It pops both + # the receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 4 % 2 + # ~~~ + # + class OptMod + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_mod, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_mult` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `*` operator is used. There are fast paths for if + # both operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 3 * 2 + # ~~~ + # + class OptMult + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_mult, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_neq` is an optimization that tests whether two values at the top of + # the stack are not equal by testing their equality and calling the `!` on + # the result. This allows `opt_neq` to use the fast paths optimized in + # `opt_eq` when both operands are Integers, Floats, Symbols, or Strings. It + # pops both the receiver and the argument off the stack and pushes on the + # result. + # + # ### Usage + # + # ~~~ruby + # 2 != 2 + # ~~~ + # + class OptNEq + attr_reader :eq_calldata, :neq_calldata + + def initialize(eq_calldata, neq_calldata) + @eq_calldata = eq_calldata + @neq_calldata = neq_calldata + end + + def to_a(_iseq) + [:opt_neq, eq_calldata.to_h, neq_calldata.to_h] + end + + def length + 3 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_newarray_max` is a specialization that occurs when the `max` method + # is called on an array literal. It pops the values of the array off the + # stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # [1, 2, 3].max + # ~~~ + # + class OptNewArrayMax + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:opt_newarray_max, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_newarray_min` is a specialization that occurs when the `min` method + # is called on an array literal. It pops the values of the array off the + # stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # [1, 2, 3].min + # ~~~ + # + class OptNewArrayMin + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:opt_newarray_min, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_nil_p` is an optimization applied when the method `nil?` is called. + # It returns true immediately when the receiver is `nil` and defers to the + # `nil?` method in other cases. It pops the receiver off the stack and + # pushes on the result. + # + # ### Usage + # + # ~~~ruby + # "".nil? + # ~~~ + # + class OptNilP + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_nil_p, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_not` negates the value on top of the stack by calling the `!` method + # on it. It pops the receiver off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # !true + # ~~~ + # + class OptNot + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_not, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_or` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `|` operator is used. There is a fast path for if + # both operands are integers. It pops both the receiver and the argument off + # the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 | 3 + # ~~~ + # + class OptOr + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_or, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_plus` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `+` operator is used. There are fast paths for if + # both operands are integers, floats, strings, or arrays. It pops both the + # receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 + 3 + # ~~~ + # + class OptPlus + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_plus, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_regexpmatch2` is a specialization of the `opt_send_without_block` + # instruction that occurs when the `=~` operator is used. It pops both the + # receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # /a/ =~ "a" + # ~~~ + # + class OptRegExpMatch2 + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_regexpmatch2, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_send_without_block` is a specialization of the send instruction that + # occurs when a method is being called without a block. It pops the receiver + # and the arguments off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # puts "Hello, world!" + # ~~~ + # + class OptSendWithoutBlock + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_send_without_block, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + calldata.argc + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_size` is a specialization of `opt_send_without_block`, when the + # `size` method is called. There are fast paths when the receiver is either + # a string, hash, or array. It pops the receiver off the stack and pushes on + # the result. + # + # ### Usage + # + # ~~~ruby + # "".size + # ~~~ + # + class OptSize + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_size, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_str_freeze` pushes a frozen known string value with no interpolation + # onto the stack using the #freeze method. If the method gets overridden, + # this will fall back to a send. + # + # ### Usage + # + # ~~~ruby + # "hello".freeze + # ~~~ + # + class OptStrFreeze + attr_reader :object, :calldata + + def initialize(object, calldata) + @object = object + @calldata = calldata + end + + def to_a(_iseq) + [:opt_str_freeze, object, calldata.to_h] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_str_uminus` pushes a frozen known string value with no interpolation + # onto the stack. If the method gets overridden, this will fall back to a + # send. + # + # ### Usage + # + # ~~~ruby + # -"string" + # ~~~ + # + class OptStrUMinus + attr_reader :object, :calldata + + def initialize(object, calldata) + @object = object + @calldata = calldata + end + + def to_a(_iseq) + [:opt_str_uminus, object, calldata.to_h] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_succ` is a specialization of the `opt_send_without_block` instruction + # when the method being called is `succ`. Fast paths exist when the receiver + # is either a String or a Fixnum. It pops the receiver off the stack and + # pushes on the result. + # + # ### Usage + # + # ~~~ruby + # "".succ + # ~~~ + # + class OptSucc + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_succ, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `pop` pops the top value off the stack. + # + # ### Usage + # + # ~~~ruby + # a ||= 2 + # ~~~ + # + class Pop + def to_a(_iseq) + [:pop] + end + + def length + 1 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `putnil` pushes a global nil object onto the stack. + # + # ### Usage + # + # ~~~ruby + # nil + # ~~~ + # + class PutNil + def to_a(_iseq) + [:putnil] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `putobject` pushes a known value onto the stack. + # + # ### Usage + # + # ~~~ruby + # 5 + # ~~~ + # + class PutObject + attr_reader :object + + def initialize(object) + @object = object + end + + def to_a(_iseq) + [:putobject, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `putobject_INT2FIX_0_` pushes 0 on the stack. It is a specialized + # instruction resulting from the operand unification optimization. It is + # equivalent to `putobject 0`. + # + # ### Usage + # + # ~~~ruby + # 0 + # ~~~ + # + class PutObjectInt2Fix0 + def to_a(_iseq) + [:putobject_INT2FIX_0_] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `putobject_INT2FIX_1_` pushes 1 on the stack. It is a specialized + # instruction resulting from the operand unification optimization. It is + # equivalent to `putobject 1`. + # + # ### Usage + # + # ~~~ruby + # 1 + # ~~~ + # + class PutObjectInt2Fix1 + def to_a(_iseq) + [:putobject_INT2FIX_1_] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `putself` pushes the current value of self onto the stack. + # + # ### Usage + # + # ~~~ruby + # puts "Hello, world!" + # ~~~ + # + class PutSelf + def to_a(_iseq) + [:putself] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `putspecialobject` pushes one of three special objects onto the stack. + # These are either the VM core special object, the class base special + # object, or the constant base special object. + # + # ### Usage + # + # ~~~ruby + # alias foo bar + # ~~~ + # + class PutSpecialObject + OBJECT_VMCORE = 1 + OBJECT_CBASE = 2 + OBJECT_CONST_BASE = 3 + + attr_reader :object + + def initialize(object) + @object = object + end + + def to_a(_iseq) + [:putspecialobject, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `putstring` pushes an unfrozen string literal onto the stack. + # + # ### Usage + # + # ~~~ruby + # "foo" + # ~~~ + # + class PutString + attr_reader :object + + def initialize(object) + @object = object + end + + def to_a(_iseq) + [:putstring, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `send` invokes a method with an optional block. It pops its receiver and + # the arguments for the method off the stack and pushes the return value + # onto the stack. It has two arguments: the calldata for the call site and + # the optional block instruction sequence. + # + # ### Usage + # + # ~~~ruby + # "hello".tap { |i| p i } + # ~~~ + # + class Send + attr_reader :calldata, :block_iseq + + def initialize(calldata, block_iseq) + @calldata = calldata + @block_iseq = block_iseq + end + + def to_a(_iseq) + [:send, calldata.to_h, block_iseq&.to_a] + end + + def length + 3 + end + + def pops + argb = (calldata.flag?(CallData::CALL_ARGS_BLOCKARG) ? 1 : 0) + argb + calldata.argc + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `setblockparam` sets the value of a block local variable on a frame + # determined by the level and index arguments. The level is the number of + # frames back to look and the index is the index in the local table. It pops + # the value it is setting off the stack. + # + # ### Usage + # + # ~~~ruby + # def foo(&bar) + # bar = baz + # end + # ~~~ + # + class SetBlockParam + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def to_a(iseq) + current = iseq + level.times { current = current.parent_iseq } + [:setblockparam, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setclassvariable` looks for a class variable in the current class and + # sets its value to the value it pops off the top of the stack. It uses an + # inline cache to reduce the need to lookup the class variable in the class + # hierarchy every time. + # + # ### Usage + # + # ~~~ruby + # @@class_variable = 1 + # ~~~ + # + class SetClassVariable + attr_reader :name, :cache + + def initialize(name, cache) + @name = name + @cache = cache + end + + def to_a(_iseq) + [:setclassvariable, name, cache] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setconstant` pops two values off the stack: the value to set the + # constant to and the constant base to set it in. + # + # ### Usage + # + # ~~~ruby + # Constant = 1 + # ~~~ + # + class SetConstant + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:setconstant, name] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setglobal` sets the value of a global variable to a value popped off the + # top of the stack. + # + # ### Usage + # + # ~~~ruby + # $global = 5 + # ~~~ + # + class SetGlobal + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:setglobal, name] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setinstancevariable` pops a value off the top of the stack and then sets + # the instance variable associated with the instruction to that value. + # + # This instruction has two forms, but both have the same structure. Before + # Ruby 3.2, the inline cache corresponded to both the get and set + # instructions and could be shared. Since Ruby 3.2, it uses object shapes + # instead so the caches are unique per instruction. + # + # ### Usage + # + # ~~~ruby + # @instance_variable = 1 + # ~~~ + # + class SetInstanceVariable + attr_reader :name, :cache + + def initialize(name, cache) + @name = name + @cache = cache + end + + def to_a(_iseq) + [:setinstancevariable, name, cache] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setlocal` sets the value of a local variable on a frame determined by the + # level and index arguments. The level is the number of frames back to + # look and the index is the index in the local table. It pops the value it + # is setting off the stack. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # tap { tap { value = 10 } } + # ~~~ + # + class SetLocal + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def to_a(iseq) + current = iseq + level.times { current = current.parent_iseq } + [:setlocal, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setlocal_WC_0` is a specialized version of the `setlocal` instruction. It + # sets the value of a local variable on the current frame to the value at + # the top of the stack as determined by the index given as its only + # argument. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # ~~~ + # + class SetLocalWC0 + attr_reader :index + + def initialize(index) + @index = index + end + + def to_a(iseq) + [:setlocal_WC_0, iseq.local_table.offset(index)] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setlocal_WC_1` is a specialized version of the `setlocal` instruction. It + # sets the value of a local variable on the parent frame to the value at the + # top of the stack as determined by the index given as its only argument. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # self.then { value = 10 } + # ~~~ + # + class SetLocalWC1 + attr_reader :index + + def initialize(index) + @index = index + end + + def to_a(iseq) + [:setlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setn` sets a value in the stack to a value popped off the top of the + # stack. It then pushes that value onto the top of the stack as well. + # + # ### Usage + # + # ~~~ruby + # {}[:key] = 'val' + # ~~~ + # + class SetN + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:setn, number] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `setspecial` pops a value off the top of the stack and sets a special + # local variable to that value. The special local variable is determined by + # the key given as its only argument. + # + # ### Usage + # + # ~~~ruby + # baz if (foo == 1) .. (bar == 1) + # ~~~ + # + class SetSpecial + attr_reader :key + + def initialize(key) + @key = key + end + + def to_a(_iseq) + [:setspecial, key] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `splatarray` coerces the array object at the top of the stack into Array + # by calling `to_a`. It pushes a duplicate of the array if there is a flag, + # and the original array if there isn't one. + # + # ### Usage + # + # ~~~ruby + # x = *(5) + # ~~~ + # + class SplatArray + attr_reader :flag + + def initialize(flag) + @flag = flag + end + + def to_a(_iseq) + [:splatarray, flag] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `swap` swaps the top two elements in the stack. + # + # ### TracePoint + # + # `swap` does not dispatch any events. + # + # ### Usage + # + # ~~~ruby + # !!defined?([[]]) + # ~~~ + # + class Swap + def to_a(_iseq) + [:swap] + end + + def length + 1 + end + + def pops + 2 + end + + def pushes + 2 + end + end + + # ### Summary + # + # `topn` pushes a single value onto the stack that is a copy of the value + # within the stack that is `number` of slots down from the top. + # + # ### Usage + # + # ~~~ruby + # case 3 + # when 1..5 + # puts "foo" + # end + # ~~~ + # + class TopN + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:topn, number] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `toregexp` pops a number of values off the stack, combines them into a new + # regular expression, and pushes the new regular expression onto the stack. + # + # ### Usage + # + # ~~~ruby + # /foo #{bar}/ + # ~~~ + # + class ToRegExp + attr_reader :options, :length + + def initialize(options, length) + @options = options + @length = length + end + + def to_a(_iseq) + [:toregexp, options, length] + end + + def pops + length + end + + def pushes + 1 + end + end + end +end diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb new file mode 100644 index 00000000..45dfe768 --- /dev/null +++ b/lib/syntax_tree/yarv/legacy.rb @@ -0,0 +1,169 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This module contains the instructions that used to be a part of YARV but + # have been replaced or removed in more recent versions. + module Legacy + # ### Summary + # + # `getclassvariable` looks for a class variable in the current class and + # pushes its value onto the stack. + # + # This version of the `getclassvariable` instruction is no longer used + # since in Ruby 3.0 it gained an inline cache.` + # + # ### Usage + # + # ~~~ruby + # @@class_variable + # ~~~ + # + class GetClassVariable + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:getclassvariable, name] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_getinlinecache` is a wrapper around a series of `putobject` and + # `getconstant` instructions that allows skipping past them if the inline + # cache is currently set. It pushes the value of the cache onto the stack + # if it is set, otherwise it pushes `nil`. + # + # This instruction is no longer used since in Ruby 3.2 it was replaced by + # the consolidated `opt_getconstant_path` instruction. + # + # ### Usage + # + # ~~~ruby + # Constant + # ~~~ + # + class OptGetInlineCache + attr_reader :label, :cache + + def initialize(label, cache) + @label = label + @cache = cache + end + + def patch!(iseq) + @label = iseq.label + end + + def to_a(_iseq) + [:opt_getinlinecache, label, cache] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_setinlinecache` sets an inline cache for a constant lookup. It pops + # the value it should set off the top of the stack. It then pushes that + # value back onto the top of the stack. + # + # This instruction is no longer used since in Ruby 3.2 it was replaced by + # the consolidated `opt_getconstant_path` instruction. + # + # ### Usage + # + # ~~~ruby + # Constant + # ~~~ + # + class OptSetInlineCache + attr_reader :cache + + def initialize(cache) + @cache = cache + end + + def to_a(_iseq) + [:opt_setinlinecache, cache] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `setclassvariable` looks for a class variable in the current class and + # sets its value to the value it pops off the top of the stack. + # + # This version of the `setclassvariable` instruction is no longer used + # since in Ruby 3.0 it gained an inline cache. + # + # ### Usage + # + # ~~~ruby + # @@class_variable = 1 + # ~~~ + # + class SetClassVariable + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:setclassvariable, name] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + end + end +end diff --git a/lib/syntax_tree/yarv/local_table.rb b/lib/syntax_tree/yarv/local_table.rb new file mode 100644 index 00000000..5eac346c --- /dev/null +++ b/lib/syntax_tree/yarv/local_table.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This represents every local variable associated with an instruction + # sequence. There are two kinds of locals: plain locals that are what you + # expect, and block proxy locals, which represent local variables + # associated with blocks that were passed into the current instruction + # sequence. + class LocalTable + # A local representing a block passed into the current instruction + # sequence. + class BlockLocal + attr_reader :name + + def initialize(name) + @name = name + end + end + + # A regular local variable. + class PlainLocal + attr_reader :name + + def initialize(name) + @name = name + end + end + + # The result of looking up a local variable in the current local table. + class Lookup + attr_reader :local, :index, :level + + def initialize(local, index, level) + @local = local + @index = index + @level = level + end + end + + attr_reader :locals + + def initialize + @locals = [] + end + + def find(name, level = 0) + index = locals.index { |local| local.name == name } + Lookup.new(locals[index], index, level) if index + end + + def has?(name) + locals.any? { |local| local.name == name } + end + + def names + locals.map(&:name) + end + + def size + locals.length + end + + # Add a BlockLocal to the local table. + def block(name) + locals << BlockLocal.new(name) unless has?(name) + end + + # Add a PlainLocal to the local table. + def plain(name) + locals << PlainLocal.new(name) unless has?(name) + end + + # This is the offset from the top of the stack where this local variable + # lives. + def offset(index) + size - (index - 3) - 1 + end + end + end +end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index cf0667bb..6b185dea 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -2,18 +2,13 @@ return if !defined?(RubyVM::InstructionSequence) || RUBY_VERSION < "3.1" require_relative "test_helper" -require "fiddle" module SyntaxTree class CompilerTest < Minitest::Test - ISEQ_LOAD = - Fiddle::Function.new( - Fiddle::Handle::DEFAULT["rb_iseq_load"], - [Fiddle::TYPE_VOIDP] * 3, - Fiddle::TYPE_VOIDP - ) - CASES = [ + # Hooks + "BEGIN { a = 1 }", + "a = 1; END { a = 1 }; a", # Various literals placed on the stack "true", "false", @@ -206,6 +201,8 @@ class CompilerTest < Minitest::Test "foo[bar] ||= 1", "foo[bar] <<= 1", "foo[bar] ^= 1", + "foo['true']", + "foo['true'] = 1", # Constants (single) "Foo", "Foo = 1", @@ -288,6 +285,7 @@ class CompilerTest < Minitest::Test "foo ? bar : baz", "case foo when bar then 1 end", "case foo when bar then 1 else 2 end", + "baz if (foo == 1) .. (bar == 1)", # Constructed values "foo..bar", "foo...bar", @@ -313,6 +311,7 @@ class CompilerTest < Minitest::Test "[1, 2, 3].min", "[foo, bar, baz].min", "[foo, bar, baz].min(1)", + "[**{ x: true }][0][:x]", # Core method calls "alias foo bar", "alias :foo :bar", @@ -364,6 +363,7 @@ class CompilerTest < Minitest::Test "def foo(bar, *baz, &qux); end", "def foo(&qux); qux; end", "def foo(&qux); qux.call; end", + "def foo(&qux); qux = bar; end", "def foo(bar:); end", "def foo(bar:, baz:); end", "def foo(bar: 1); end", @@ -416,7 +416,14 @@ class CompilerTest < Minitest::Test "-> {}", "-> (bar) do end", "-> (bar) {}", - "-> (bar; baz) { }" + "-> (bar; baz) { }", + # Pattern matching + "foo in bar", + "foo in [bar]", + "foo in [bar, baz]", + "foo in [1, 2, 3, bar, 4, 5, 6, baz]", + "foo in Foo[1, 2, 3, bar, 4, 5, 6, baz]", + "foo => bar" ] # These are the combinations of instructions that we're going to test. @@ -457,7 +464,7 @@ def serialize_iseq(iseq) when Array insn.map do |operand| if operand.is_a?(Array) && - operand[0] == Visitor::Compiler::InstructionSequence::MAGIC + operand[0] == YARV::InstructionSequence::MAGIC serialize_iseq(operand) else operand @@ -478,20 +485,12 @@ def assert_compiles(source, **options) assert_equal( serialize_iseq(RubyVM::InstructionSequence.compile(source, **options)), - serialize_iseq(program.accept(Visitor::Compiler.new(**options))) + serialize_iseq(program.accept(YARV::Compiler.new(**options))) ) end def assert_evaluates(expected, source, **options) - program = SyntaxTree.parse(source) - compiled = program.accept(Visitor::Compiler.new(**options)).to_a - - # Temporary hack until we get these working. - compiled[4][:node_id] = 11 - compiled[4][:node_ids] = [1, 0, 3, 2, 6, 7, 9, -1] - - iseq = Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)) - assert_equal expected, iseq.eval + assert_equal expected, YARV.compile(source, **options).eval end end end diff --git a/test/yarv_test.rb b/test/yarv_test.rb new file mode 100644 index 00000000..02514a93 --- /dev/null +++ b/test/yarv_test.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +return if !defined?(RubyVM::InstructionSequence) || RUBY_VERSION < "3.1" +require_relative "test_helper" + +module SyntaxTree + class YARVTest < Minitest::Test + CASES = { + "0" => "break 0\n", + "1" => "break 1\n", + "2" => "break 2\n", + "1.0" => "break 1.0\n", + "1 + 2" => "break 1 + 2\n", + "1 - 2" => "break 1 - 2\n", + "1 * 2" => "break 1 * 2\n", + "1 / 2" => "break 1 / 2\n", + "1 % 2" => "break 1 % 2\n", + "1 < 2" => "break 1 < 2\n", + "1 <= 2" => "break 1 <= 2\n", + "1 > 2" => "break 1 > 2\n", + "1 >= 2" => "break 1 >= 2\n", + "1 == 2" => "break 1 == 2\n", + "1 != 2" => "break 1 != 2\n", + "1 & 2" => "break 1 & 2\n", + "1 | 2" => "break 1 | 2\n", + "1 << 2" => "break 1 << 2\n", + "1 >> 2" => "break 1.>>(2)\n", + "1 ** 2" => "break 1.**(2)\n", + "a = 1; a" => "a = 1\nbreak a\n" + }.freeze + + CASES.each do |source, expected| + define_method("test_disassemble_#{source}") do + assert_disassembles(expected, source) + end + end + + def test_bf + hello_world = + "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]" \ + ">>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++." + + iseq = YARV::Bf.new(hello_world).compile + Formatter.format(hello_world, YARV::Disassembler.new(iseq).to_ruby) + end + + private + + def assert_disassembles(expected, source) + ruby = YARV::Disassembler.new(YARV.compile(source)).to_ruby + actual = Formatter.format(source, ruby) + assert_equal expected, actual + end + end +end