From be9465d49edf5fe71b470aefeff1893289d68070 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 22 Nov 2022 16:45:20 -0500 Subject: [PATCH 1/6] Handle inline_const_cache=false --- lib/syntax_tree/yarv/compiler.rb | 4 +++ lib/syntax_tree/yarv/instruction_sequence.rb | 32 +++++++++++++++----- test/compiler_test.rb | 1 + 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 45f2bb59..21d335ce 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -204,6 +204,7 @@ def visit_unsupported(_node) # These options mirror the compilation options that we currently support # that can be also passed to RubyVM::InstructionSequence.compile. attr_reader :frozen_string_literal, + :inline_const_cache, :operands_unification, :specialized_instruction @@ -217,10 +218,12 @@ def visit_unsupported(_node) def initialize( frozen_string_literal: false, + inline_const_cache: true, operands_unification: true, specialized_instruction: true ) @frozen_string_literal = frozen_string_literal + @inline_const_cache = inline_const_cache @operands_unification = operands_unification @specialized_instruction = specialized_instruction @@ -1374,6 +1377,7 @@ def visit_program(node) nil, node.location, frozen_string_literal: frozen_string_literal, + inline_const_cache: inline_const_cache, operands_unification: operands_unification, specialized_instruction: specialized_instruction ) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 411f4692..4754618e 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -77,6 +77,7 @@ def change_by(value) # These are various compilation options provided. attr_reader :frozen_string_literal, + :inline_const_cache, :operands_unification, :specialized_instruction @@ -86,6 +87,7 @@ def initialize( parent_iseq, location, frozen_string_literal: false, + inline_const_cache: true, operands_unification: true, specialized_instruction: true ) @@ -104,6 +106,7 @@ def initialize( @stack = Stack.new @frozen_string_literal = frozen_string_literal + @inline_const_cache = inline_const_cache @operands_unification = operands_unification @specialized_instruction = specialized_instruction end @@ -192,6 +195,7 @@ def child_iseq(type, name, location) self, location, frozen_string_literal: frozen_string_literal, + inline_const_cache: inline_const_cache, operands_unification: operands_unification, specialized_instruction: specialized_instruction ) @@ -434,14 +438,24 @@ def opt_aset_with(object, calldata) end def opt_getconstant_path(names) - if RUBY_VERSION < "3.2" - cache = inline_storage - getinlinecache = opt_getinlinecache(-1, cache) - - if names[0] == :"" + if RUBY_VERSION < "3.2" || !inline_const_cache + cache = nil + getinlinecache = nil + + if inline_const_cache + cache = inline_storage + getinlinecache = opt_getinlinecache(-1, cache) + + if names[0] == :"" + names.shift + pop + putobject(Object) + end + elsif names[0] == :"" names.shift - pop putobject(Object) + else + putnil end names.each_with_index do |name, index| @@ -449,8 +463,10 @@ def opt_getconstant_path(names) getconstant(name) end - opt_setinlinecache(cache) - getinlinecache.patch!(self) + if inline_const_cache + opt_setinlinecache(cache) + getinlinecache.patch!(self) + end else push(OptGetConstantPath.new(names)) end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 6b185dea..387a726d 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -432,6 +432,7 @@ class CompilerTest < Minitest::Test { frozen_string_literal: true }, { operands_unification: false }, { specialized_instruction: false }, + { inline_const_cache: false }, { operands_unification: false, specialized_instruction: false } ] From 4631b5c1708ac71fc53614924ccf1b6155203b94 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 23 Nov 2022 09:31:49 -0500 Subject: [PATCH 2/6] Convert options into an object --- lib/syntax_tree/yarv.rb | 4 +- lib/syntax_tree/yarv/compiler.rb | 92 ++++++++++++-------- lib/syntax_tree/yarv/instruction_sequence.rb | 56 ++++-------- test/compiler_test.rb | 22 ++--- 4 files changed, 87 insertions(+), 87 deletions(-) diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index df8bc3ce..1e759ad1 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -4,8 +4,8 @@ module SyntaxTree # This module provides an object representation of the YARV bytecode. module YARV # Compile the given source into a YARV instruction sequence. - def self.compile(source, **options) - SyntaxTree.parse(source).accept(Compiler.new(**options)) + def self.compile(source, options = Compiler::Options.new) + SyntaxTree.parse(source).accept(Compiler.new(options)) end end end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 21d335ce..5d717bd1 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -45,6 +45,53 @@ module YARV # RubyVM::InstructionSequence.compile("1 + 2").to_a # class Compiler < BasicVisitor + # This represents a set of options that can be passed to the compiler to + # control how it compiles the code. It mirrors the options that can be + # passed to RubyVM::InstructionSequence.compile, except it only includes + # options that actually change the behavior. + class Options + def initialize( + frozen_string_literal: false, + inline_const_cache: true, + operands_unification: true, + specialized_instruction: true + ) + @frozen_string_literal = frozen_string_literal + @inline_const_cache = inline_const_cache + @operands_unification = operands_unification + @specialized_instruction = specialized_instruction + end + + def to_hash + { + frozen_string_literal: @frozen_string_literal, + inline_const_cache: @inline_const_cache, + operands_unification: @operands_unification, + specialized_instruction: @specialized_instruction + } + end + + def frozen_string_literal! + @frozen_string_literal = true + end + + def frozen_string_literal? + @frozen_string_literal + end + + def inline_const_cache? + @inline_const_cache + end + + def operands_unification? + @operands_unification + end + + def specialized_instruction? + @specialized_instruction + end + end + # This visitor is responsible for converting Syntax Tree nodes into their # corresponding Ruby structures. This is used to convert the operands of # some instructions like putobject that push a Ruby object directly onto @@ -203,10 +250,7 @@ def visit_unsupported(_node) # These options mirror the compilation options that we currently support # that can be also passed to RubyVM::InstructionSequence.compile. - attr_reader :frozen_string_literal, - :inline_const_cache, - :operands_unification, - :specialized_instruction + attr_reader :options # The current instruction sequence that is being compiled. attr_reader :iseq @@ -216,17 +260,8 @@ def visit_unsupported(_node) # if we need to return the value of the last statement. attr_reader :last_statement - def initialize( - frozen_string_literal: false, - inline_const_cache: true, - operands_unification: true, - specialized_instruction: true - ) - @frozen_string_literal = frozen_string_literal - @inline_const_cache = inline_const_cache - @operands_unification = operands_unification - @specialized_instruction = specialized_instruction - + def initialize(options) + @options = options @iseq = nil @last_statement = false end @@ -236,7 +271,7 @@ def visit_BEGIN(node) end def visit_CHAR(node) - if frozen_string_literal + if options.frozen_string_literal? iseq.putobject(node.value[1..]) else iseq.putstring(node.value[1..]) @@ -282,7 +317,7 @@ def visit_aref(node) calldata = YARV.calldata(:[], 1) visit(node.collection) - if !frozen_string_literal && specialized_instruction && + if !options.frozen_string_literal? && options.specialized_instruction? && (node.index.parts.length == 1) arg = node.index.parts.first @@ -453,7 +488,7 @@ def visit_assign(node) when ARefField calldata = YARV.calldata(:[]=, 2) - if !frozen_string_literal && specialized_instruction && + if !options.frozen_string_literal? && options.specialized_instruction? && (node.target.index.parts.length == 1) arg = node.target.index.parts.first @@ -1352,7 +1387,7 @@ def visit_program(node) break unless statement.is_a?(Comment) if statement.value == "# frozen_string_literal: true" - @frozen_string_literal = true + options.frozen_string_literal! end end @@ -1370,18 +1405,7 @@ def visit_program(node) end end - top_iseq = - InstructionSequence.new( - :top, - "", - nil, - node.location, - frozen_string_literal: frozen_string_literal, - inline_const_cache: inline_const_cache, - operands_unification: operands_unification, - specialized_instruction: specialized_instruction - ) - + top_iseq = InstructionSequence.new(:top, "", nil, node.location, options) with_child_iseq(top_iseq) do visit_all(preexes) @@ -1402,7 +1426,7 @@ def visit_qsymbols(node) end def visit_qwords(node) - if frozen_string_literal + if options.frozen_string_literal? iseq.duparray(node.accept(RubyVisitor.new)) else visit_all(node.elements) @@ -1632,7 +1656,7 @@ def visit_top_const_ref(node) end def visit_tstring_content(node) - if frozen_string_literal + if options.frozen_string_literal? iseq.putobject(node.accept(RubyVisitor.new)) else iseq.putstring(node.accept(RubyVisitor.new)) @@ -1808,7 +1832,7 @@ def visit_word(node) end def visit_words(node) - if frozen_string_literal && (compiled = RubyVisitor.compile(node)) + if options.frozen_string_literal? && (compiled = RubyVisitor.compile(node)) iseq.duparray(compiled) else visit_all(node.elements) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 4754618e..156070da 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -76,21 +76,9 @@ def change_by(value) attr_reader :stack # These are various compilation options provided. - attr_reader :frozen_string_literal, - :inline_const_cache, - :operands_unification, - :specialized_instruction - - def initialize( - type, - name, - parent_iseq, - location, - frozen_string_literal: false, - inline_const_cache: true, - operands_unification: true, - specialized_instruction: true - ) + attr_reader :options + + def initialize(type, name, parent_iseq, location, options = Compiler::Options.new) @type = type @name = name @parent_iseq = parent_iseq @@ -105,10 +93,7 @@ def initialize( @storage_index = 0 @stack = Stack.new - @frozen_string_literal = frozen_string_literal - @inline_const_cache = inline_const_cache - @operands_unification = operands_unification - @specialized_instruction = specialized_instruction + @options = options end ########################################################################## @@ -189,16 +174,7 @@ def to_a ########################################################################## def child_iseq(type, name, location) - InstructionSequence.new( - type, - name, - self, - location, - frozen_string_literal: frozen_string_literal, - inline_const_cache: inline_const_cache, - operands_unification: operands_unification, - specialized_instruction: specialized_instruction - ) + InstructionSequence.new(type, name, self, location, options) end def block_child_iseq(location) @@ -359,7 +335,7 @@ def getinstancevariable(name) end def getlocal(index, level) - if operands_unification + if options.operands_unification? # Specialize the getlocal instruction based on the level of the # local variable. If it's 0 or 1, then there's a specialized # instruction that will look at the current scope or the parent @@ -438,11 +414,11 @@ def opt_aset_with(object, calldata) end def opt_getconstant_path(names) - if RUBY_VERSION < "3.2" || !inline_const_cache + if RUBY_VERSION < "3.2" || !options.inline_const_cache? cache = nil getinlinecache = nil - if inline_const_cache + if options.inline_const_cache? cache = inline_storage getinlinecache = opt_getinlinecache(-1, cache) @@ -463,7 +439,7 @@ def opt_getconstant_path(names) getconstant(name) end - if inline_const_cache + if options.inline_const_cache? opt_setinlinecache(cache) getinlinecache.patch!(self) end @@ -477,7 +453,7 @@ def opt_getinlinecache(label, cache) end def opt_newarray_max(length) - if specialized_instruction + if options.specialized_instruction? push(OptNewArrayMax.new(length)) else newarray(length) @@ -486,7 +462,7 @@ def opt_newarray_max(length) end def opt_newarray_min(length) - if specialized_instruction + if options.specialized_instruction? push(OptNewArrayMin.new(length)) else newarray(length) @@ -499,7 +475,7 @@ def opt_setinlinecache(cache) end def opt_str_freeze(object) - if specialized_instruction + if options.specialized_instruction? push(OptStrFreeze.new(object, YARV.calldata(:freeze))) else putstring(object) @@ -508,7 +484,7 @@ def opt_str_freeze(object) end def opt_str_uminus(object) - if specialized_instruction + if options.specialized_instruction? push(OptStrUMinus.new(object, YARV.calldata(:-@))) else putstring(object) @@ -525,7 +501,7 @@ def putnil end def putobject(object) - if operands_unification + if options.operands_unification? # Specialize the putobject instruction based on the value of the # object. If it's 0 or 1, then there's a specialized instruction # that will push the object onto the stack and requires fewer @@ -555,7 +531,7 @@ def putstring(object) end def send(calldata, block_iseq = nil) - if specialized_instruction && !block_iseq && + if options.specialized_instruction? && !block_iseq && !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) # Specialize the send instruction. If it doesn't have a block # attached, then we will replace it with an opt_send_without_block @@ -645,7 +621,7 @@ def setinstancevariable(name) end def setlocal(index, level) - if operands_unification + if options.operands_unification? # Specialize the setlocal instruction based on the level of the # local variable. If it's 0 or 1, then there's a specialized # instruction that will write to the current scope or the parent diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 387a726d..5a602417 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -428,12 +428,12 @@ class CompilerTest < Minitest::Test # These are the combinations of instructions that we're going to test. OPTIONS = [ - {}, - { frozen_string_literal: true }, - { operands_unification: false }, - { specialized_instruction: false }, - { inline_const_cache: false }, - { operands_unification: false, specialized_instruction: false } + YARV::Compiler::Options.new, + YARV::Compiler::Options.new(frozen_string_literal: true), + YARV::Compiler::Options.new(operands_unification: false), + YARV::Compiler::Options.new(specialized_instruction: false), + YARV::Compiler::Options.new(inline_const_cache: false), + YARV::Compiler::Options.new(operands_unification: false, specialized_instruction: false) ] OPTIONS.each do |options| @@ -441,7 +441,7 @@ class CompilerTest < Minitest::Test CASES.each do |source| define_method(:"test_#{source}_#{suffix}") do - assert_compiles(source, **options) + assert_compiles(source, options) end end end @@ -481,17 +481,17 @@ def serialize_iseq(iseq) serialized end - def assert_compiles(source, **options) + def assert_compiles(source, options) program = SyntaxTree.parse(source) assert_equal( serialize_iseq(RubyVM::InstructionSequence.compile(source, **options)), - serialize_iseq(program.accept(YARV::Compiler.new(**options))) + serialize_iseq(program.accept(YARV::Compiler.new(options))) ) end - def assert_evaluates(expected, source, **options) - assert_equal expected, YARV.compile(source, **options).eval + def assert_evaluates(expected, source) + assert_equal expected, YARV.compile(source).eval end end end From da1e46604d56941de004ce561da5b56e7eae1bde Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 23 Nov 2022 09:38:33 -0500 Subject: [PATCH 3/6] Support the tailcall_optimization flag --- lib/syntax_tree/yarv/compiler.rb | 30 ++++++++++++++++++++++++++++-- test/compiler_test.rb | 6 +++--- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 5d717bd1..4b0587fc 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -54,12 +54,14 @@ def initialize( frozen_string_literal: false, inline_const_cache: true, operands_unification: true, - specialized_instruction: true + specialized_instruction: true, + tailcall_optimization: false ) @frozen_string_literal = frozen_string_literal @inline_const_cache = inline_const_cache @operands_unification = operands_unification @specialized_instruction = specialized_instruction + @tailcall_optimization = tailcall_optimization end def to_hash @@ -67,7 +69,8 @@ def to_hash frozen_string_literal: @frozen_string_literal, inline_const_cache: @inline_const_cache, operands_unification: @operands_unification, - specialized_instruction: @specialized_instruction + specialized_instruction: @specialized_instruction, + tailcall_optimization: @tailcall_optimization } end @@ -90,6 +93,10 @@ def operands_unification? def specialized_instruction? @specialized_instruction end + + def tailcall_optimization? + @tailcall_optimization + end end # This visitor is responsible for converting Syntax Tree nodes into their @@ -716,12 +723,17 @@ def visit_call(node) end end + # Track whether or not this is a method call on a block proxy receiver. + # If it is, we can potentially do tailcall optimizations on it. + block_receiver = false + if node.receiver if node.receiver.is_a?(VarRef) lookup = iseq.local_variable(node.receiver.value.value.to_sym) if lookup.local.is_a?(LocalTable::BlockLocal) iseq.getblockparamproxy(lookup.index, lookup.level) + block_receiver = true else visit(node.receiver) end @@ -752,6 +764,7 @@ def visit_call(node) when ArgsForward flag |= CallData::CALL_ARGS_SPLAT flag |= CallData::CALL_ARGS_BLOCKARG + flag |= CallData::CALL_TAILCALL if options.tailcall_optimization? lookup = iseq.local_table.find(:*) iseq.getlocal(lookup.index, lookup.level) @@ -768,9 +781,22 @@ def visit_call(node) end block_iseq = visit(node.block) if node.block + + # If there's no block and we don't already have any special flags set, + # then we can safely call this simple arguments. Note that has to be the + # first flag we set after looking at the arguments to get the flags + # correct. flag |= CallData::CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 + + # If there's no receiver, then this is an "fcall". flag |= CallData::CALL_FCALL if node.receiver.nil? + # If we're calling a method on the passed block object and we have + # tailcall optimizations turned on, then we can set the tailcall flag. + if block_receiver && options.tailcall_optimization? + flag |= CallData::CALL_TAILCALL + end + iseq.send( YARV.calldata(node.message.value.to_sym, argc, flag), block_iseq diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 5a602417..02343ca2 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -433,14 +433,14 @@ class CompilerTest < Minitest::Test YARV::Compiler::Options.new(operands_unification: false), YARV::Compiler::Options.new(specialized_instruction: false), YARV::Compiler::Options.new(inline_const_cache: false), - YARV::Compiler::Options.new(operands_unification: false, specialized_instruction: false) + YARV::Compiler::Options.new(tailcall_optimization: true) ] OPTIONS.each do |options| - suffix = options.inspect + suffix = options.to_hash.map { |k, v| "#{k}=#{v}" }.join("&") CASES.each do |source| - define_method(:"test_#{source}_#{suffix}") do + define_method(:"test_#{source}_(#{suffix})") do assert_compiles(source, options) end end From 85df98f85dc297e16bc27003f2202728c871687e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 23 Nov 2022 09:59:58 -0500 Subject: [PATCH 4/6] Provide shims for methods that should compile --- lib/syntax_tree/yarv/compiler.rb | 52 ++++++++++++++++++++++++++++++++ test/compiler_test.rb | 2 ++ 2 files changed, 54 insertions(+) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 4b0587fc..bdc31ab3 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -54,12 +54,14 @@ def initialize( frozen_string_literal: false, inline_const_cache: true, operands_unification: true, + peephole_optimization: true, specialized_instruction: true, tailcall_optimization: false ) @frozen_string_literal = frozen_string_literal @inline_const_cache = inline_const_cache @operands_unification = operands_unification + @peephole_optimization = peephole_optimization @specialized_instruction = specialized_instruction @tailcall_optimization = tailcall_optimization end @@ -69,6 +71,7 @@ def to_hash frozen_string_literal: @frozen_string_literal, inline_const_cache: @inline_const_cache, operands_unification: @operands_unification, + peephole_optimization: @peephole_optimization, specialized_instruction: @specialized_instruction, tailcall_optimization: @tailcall_optimization } @@ -90,6 +93,10 @@ def operands_unification? @operands_unification end + def peephole_optimization? + @peephole_optimization + end + def specialized_instruction? @specialized_instruction end @@ -608,6 +615,9 @@ def visit_bare_assoc_hash(node) end end + def visit_begin(node) + end + def visit_binary(node) case node.operator when :"&&" @@ -669,6 +679,9 @@ def visit_bodystmt(node) visit(node.statements) end + def visit_break(node) + end + def visit_call(node) if node.is_a?(CallNode) return( @@ -1016,6 +1029,9 @@ def visit_elsif(node) ) end + def visit_ensure(node) + end + def visit_field(node) visit(node.parent) end @@ -1024,6 +1040,9 @@ def visit_float(node) iseq.putobject(node.accept(RubyVisitor.new)) end + def visit_fndptn(node) + end + def visit_for(node) visit(node.collection) @@ -1064,6 +1083,9 @@ def visit_hash(node) end end + def visit_hshptn(node) + end + def visit_heredoc(node) if node.beginning.value.end_with?("`") visit_xstring_literal(node) @@ -1143,6 +1165,9 @@ def visit_imaginary(node) iseq.putobject(node.accept(RubyVisitor.new)) end + def visit_in(node) + end + def visit_int(node) iseq.putobject(node.accept(RubyVisitor.new)) end @@ -1243,6 +1268,9 @@ def visit_mrhs(node) end end + def visit_next(node) + end + def visit_not(node) visit(node.statement) iseq.send(YARV.calldata(:!)) @@ -1408,6 +1436,12 @@ def visit_paren(node) visit(node.contents) end + def visit_pinned_begin(node) + end + + def visit_pinned_var_ref(node) + end + def visit_program(node) node.statements.body.each do |statement| break unless statement.is_a?(Comment) @@ -1566,6 +1600,9 @@ def visit_rational(node) iseq.putobject(node.accept(RubyVisitor.new)) end + def visit_redo(node) + end + def visit_regexp_literal(node) if (compiled = RubyVisitor.compile(node)) iseq.putobject(compiled) @@ -1576,12 +1613,27 @@ def visit_regexp_literal(node) end end + def visit_rescue(node) + end + + def visit_rescue_ex(node) + end + + def visit_rescue_mod(node) + end + def visit_rest_param(node) iseq.local_table.plain(node.name.value.to_sym) iseq.argument_options[:rest_start] = iseq.argument_size iseq.argument_size += 1 end + def visit_retry(node) + end + + def visit_return(node) + end + def visit_sclass(node) visit(node.target) iseq.putnil diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 02343ca2..9ea7f21b 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -431,6 +431,8 @@ class CompilerTest < Minitest::Test YARV::Compiler::Options.new, YARV::Compiler::Options.new(frozen_string_literal: true), YARV::Compiler::Options.new(operands_unification: false), + # TODO: have this work when peephole optimizations are turned off. + # YARV::Compiler::Options.new(peephole_optimization: false), YARV::Compiler::Options.new(specialized_instruction: false), YARV::Compiler::Options.new(inline_const_cache: false), YARV::Compiler::Options.new(tailcall_optimization: true) From 83cdfbbc60adb200aa2d9fa7477c81ee7ab2e6c7 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 23 Nov 2022 10:05:31 -0500 Subject: [PATCH 5/6] Provide missing instructions --- lib/syntax_tree/yarv/instructions.rb | 84 ++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index 5a23bbf0..3fcdadb3 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -1840,6 +1840,54 @@ def pushes end end + # ### Summary + # + # `opt_case_dispatch` is a branch instruction that moves the control flow + # for case statements that have clauses where they can all be used as hash + # keys for an internal hash. + # + # It has two arguments: the `case_dispatch_hash` and an `else_label`. It + # pops one value off the stack: a hash key. `opt_case_dispatch` looks up the + # key in the `case_dispatch_hash` and jumps to the corresponding label if + # there is one. If there is no value in the `case_dispatch_hash`, + # `opt_case_dispatch` jumps to the `else_label` index. + # + # ### Usage + # + # ~~~ruby + # case 1 + # when 1 + # puts "foo" + # else + # puts "bar" + # end + # ~~~ + # + class OptCaseDispatch + attr_reader :case_dispatch_hash, :else_label + + def initialize(case_dispatch_hash, else_label) + @case_dispatch_hash = case_dispatch_hash + @else_label = else_label + end + + def to_a(_iseq) + [:opt_case_dispatch, case_dispatch_hash, else_label] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + end + # ### Summary # # `opt_div` is a specialization of the `opt_send_without_block` instruction @@ -3534,6 +3582,42 @@ def pushes end end + # ### Summary + # + # `throw` pops a value off the top of the stack and throws it. It is caught + # using the instruction sequence's (or an ancestor's) catch table. It pushes + # on the result of throwing the value. + # + # ### Usage + # + # ~~~ruby + # [1, 2, 3].map { break 2 } + # ~~~ + # + class Throw + attr_reader :type + + def initialize(type) + @type = type + end + + def to_a(_iseq) + [:throw, type] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + # ### Summary # # `topn` pushes a single value onto the stack that is a copy of the value From a43005d8a04e277f57b9cbf88d925197de13a367 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 23 Nov 2022 11:06:26 -0500 Subject: [PATCH 6/6] Allow converting from compiled iseq to YARV iseq --- lib/syntax_tree/yarv/compiler.rb | 21 +- lib/syntax_tree/yarv/instruction_sequence.rb | 224 ++++++++++++++++++- lib/syntax_tree/yarv/instructions.rb | 9 + test/compiler_test.rb | 25 ++- 4 files changed, 267 insertions(+), 12 deletions(-) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index bdc31ab3..f876cb3b 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -331,8 +331,8 @@ def visit_aref(node) calldata = YARV.calldata(:[], 1) visit(node.collection) - if !options.frozen_string_literal? && options.specialized_instruction? && - (node.index.parts.length == 1) + if !options.frozen_string_literal? && + options.specialized_instruction? && (node.index.parts.length == 1) arg = node.index.parts.first if arg.is_a?(StringLiteral) && (arg.parts.length == 1) @@ -502,7 +502,8 @@ def visit_assign(node) when ARefField calldata = YARV.calldata(:[]=, 2) - if !options.frozen_string_literal? && options.specialized_instruction? && + if !options.frozen_string_literal? && + options.specialized_instruction? && (node.target.index.parts.length == 1) arg = node.target.index.parts.first @@ -1085,7 +1086,7 @@ def visit_hash(node) def visit_hshptn(node) end - + def visit_heredoc(node) if node.beginning.value.end_with?("`") visit_xstring_literal(node) @@ -1465,7 +1466,14 @@ def visit_program(node) end end - top_iseq = InstructionSequence.new(:top, "", nil, node.location, options) + top_iseq = + InstructionSequence.new( + :top, + "", + nil, + node.location, + options + ) with_child_iseq(top_iseq) do visit_all(preexes) @@ -1910,7 +1918,8 @@ def visit_word(node) end def visit_words(node) - if options.frozen_string_literal? && (compiled = RubyVisitor.compile(node)) + if options.frozen_string_literal? && + (compiled = RubyVisitor.compile(node)) iseq.duparray(compiled) else visit_all(node.elements) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 156070da..c6395f65 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -78,7 +78,13 @@ def change_by(value) # These are various compilation options provided. attr_reader :options - def initialize(type, name, parent_iseq, location, options = Compiler::Options.new) + def initialize( + type, + name, + parent_iseq, + location, + options = Compiler::Options.new + ) @type = type @name = name @parent_iseq = parent_iseq @@ -413,6 +419,10 @@ def opt_aset_with(object, calldata) push(OptAsetWith.new(object, calldata)) end + def opt_case_dispatch(case_dispatch_hash, else_label) + push(OptCaseDispatch.new(case_dispatch_hash, else_label)) + end + def opt_getconstant_path(names) if RUBY_VERSION < "3.2" || !options.inline_const_cache? cache = nil @@ -655,6 +665,10 @@ def swap push(Swap.new) end + def throw(type) + push(Throw.new(type)) + end + def topn(number) push(TopN.new(number)) end @@ -662,6 +676,214 @@ def topn(number) def toregexp(options, length) push(ToRegExp.new(options, length)) end + + # This method will create a new instruction sequence from a serialized + # RubyVM::InstructionSequence object. + def self.from(source, options = Compiler::Options.new, parent_iseq = nil) + iseq = new(source[9], source[5], parent_iseq, Location.default, options) + + # set up the correct argument size + iseq.argument_size = source[4][:arg_size] + + # set up all of the locals + source[10].each { |local| iseq.local_table.plain(local) } + + # set up the argument options + iseq.argument_options.merge!(source[11]) + + # set up all of the instructions + source[13].each do |insn| + # skip line numbers + next if insn.is_a?(Integer) + + # put events into the array and then continue + if insn.is_a?(Symbol) + iseq.event(insn) + next + end + + type, *opnds = insn + case type + when :adjuststack + iseq.adjuststack(opnds[0]) + when :anytostring + iseq.anytostring + when :branchif + iseq.branchif(opnds[0]) + when :branchnil + iseq.branchnil(opnds[0]) + when :branchunless + iseq.branchunless(opnds[0]) + when :checkkeyword + iseq.checkkeyword(iseq.local_table.size - opnds[0] + 2, opnds[1]) + when :checkmatch + iseq.checkmatch(opnds[0]) + when :checktype + iseq.checktype(opnds[0]) + when :concatarray + iseq.concatarray + when :concatstrings + iseq.concatstrings(opnds[0]) + when :defineclass + iseq.defineclass(opnds[0], from(opnds[1], options, iseq), opnds[2]) + when :defined + iseq.defined(opnds[0], opnds[1], opnds[2]) + when :definemethod + iseq.definemethod(opnds[0], from(opnds[1], options, iseq)) + when :definesmethod + iseq.definesmethod(opnds[0], from(opnds[1], options, iseq)) + when :dup + iseq.dup + when :duparray + iseq.duparray(opnds[0]) + when :duphash + iseq.duphash(opnds[0]) + when :dupn + iseq.dupn(opnds[0]) + when :expandarray + iseq.expandarray(opnds[0], opnds[1]) + when :getblockparam, :getblockparamproxy, :getlocal, :getlocal_WC_0, + :getlocal_WC_1, :setblockparam, :setlocal, :setlocal_WC_0, + :setlocal_WC_1 + current = iseq + level = 0 + + case type + when :getlocal_WC_1, :setlocal_WC_1 + level = 1 + when :getblockparam, :getblockparamproxy, :getlocal, :setblockparam, + :setlocal + level = opnds[1] + end + + level.times { current = current.parent_iseq } + index = current.local_table.size - opnds[0] + 2 + + case type + when :getblockparam + iseq.getblockparam(index, level) + when :getblockparamproxy + iseq.getblockparamproxy(index, level) + when :getlocal, :getlocal_WC_0, :getlocal_WC_1 + iseq.getlocal(index, level) + when :setblockparam + iseq.setblockparam(index, level) + when :setlocal, :setlocal_WC_0, :setlocal_WC_1 + iseq.setlocal(index, level) + end + when :getclassvariable + iseq.push(GetClassVariable.new(opnds[0], opnds[1])) + when :getconstant + iseq.getconstant(opnds[0]) + when :getglobal + iseq.getglobal(opnds[0]) + when :getinstancevariable + iseq.push(GetInstanceVariable.new(opnds[0], opnds[1])) + when :getspecial + iseq.getspecial(opnds[0], opnds[1]) + when :intern + iseq.intern + when :invokeblock + iseq.invokeblock(CallData.from(opnds[0])) + when :invokesuper + block_iseq = opnds[1] ? from(opnds[1], options, iseq) : nil + iseq.invokesuper(CallData.from(opnds[0]), block_iseq) + when :jump + iseq.jump(opnds[0]) + when :leave + iseq.leave + when :newarray + iseq.newarray(opnds[0]) + when :newarraykwsplat + iseq.newarraykwsplat(opnds[0]) + when :newhash + iseq.newhash(opnds[0]) + when :newrange + iseq.newrange(opnds[0]) + when :nop + iseq.nop + when :objtostring + iseq.objtostring(CallData.from(opnds[0])) + when :once + iseq.once(from(opnds[0], options, iseq), opnds[1]) + when :opt_and, :opt_aref, :opt_aset, :opt_div, :opt_empty_p, :opt_eq, + :opt_ge, :opt_gt, :opt_le, :opt_length, :opt_lt, :opt_ltlt, + :opt_minus, :opt_mod, :opt_mult, :opt_nil_p, :opt_not, :opt_or, + :opt_plus, :opt_regexpmatch2, :opt_send_without_block, :opt_size, + :opt_succ + iseq.send(CallData.from(opnds[0]), nil) + when :opt_aref_with + iseq.opt_aref_with(opnds[0], CallData.from(opnds[1])) + when :opt_aset_with + iseq.opt_aset_with(opnds[0], CallData.from(opnds[1])) + when :opt_case_dispatch + iseq.opt_case_dispatch(opnds[0], opnds[1]) + when :opt_getconstant_path + iseq.opt_getconstant_path(opnds[0]) + when :opt_getinlinecache + iseq.opt_getinlinecache(opnds[0], opnds[1]) + when :opt_newarray_max + iseq.opt_newarray_max(opnds[0]) + when :opt_newarray_min + iseq.opt_newarray_min(opnds[0]) + when :opt_neq + iseq.push( + OptNEq.new(CallData.from(opnds[0]), CallData.from(opnds[1])) + ) + when :opt_setinlinecache + iseq.opt_setinlinecache(opnds[0]) + when :opt_str_freeze + iseq.opt_str_freeze(opnds[0]) + when :opt_str_uminus + iseq.opt_str_uminus(opnds[0]) + when :pop + iseq.pop + when :putnil + iseq.putnil + when :putobject + iseq.putobject(opnds[0]) + when :putobject_INT2FIX_0_ + iseq.putobject(0) + when :putobject_INT2FIX_1_ + iseq.putobject(1) + when :putself + iseq.putself + when :putstring + iseq.putstring(opnds[0]) + when :putspecialobject + iseq.putspecialobject(opnds[0]) + when :send + block_iseq = opnds[1] ? from(opnds[1], options, iseq) : nil + iseq.send(CallData.from(opnds[0]), block_iseq) + when :setclassvariable + iseq.push(SetClassVariable.new(opnds[0], opnds[1])) + when :setconstant + iseq.setconstant(opnds[0]) + when :setglobal + iseq.setglobal(opnds[0]) + when :setinstancevariable + iseq.push(SetInstanceVariable.new(opnds[0], opnds[1])) + when :setn + iseq.setn(opnds[0]) + when :setspecial + iseq.setspecial(opnds[0]) + when :splatarray + iseq.splatarray(opnds[0]) + when :swap + iseq.swap + when :throw + iseq.throw(opnds[0]) + when :topn + iseq.topn(opnds[0]) + when :toregexp + iseq.toregexp(opnds[0], opnds[1]) + else + raise "Unknown instruction type: #{type}" + end + end + + iseq + end end end end diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index 3fcdadb3..9c816072 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -42,6 +42,15 @@ def to_h result[:kw_arg] = kw_arg if kw_arg result end + + def self.from(serialized) + new( + serialized[:mid], + serialized[:orig_argc], + serialized[:flag], + serialized[:kw_arg] + ) + end end # A convenience method for creating a CallData object. diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 9ea7f21b..1f4a5299 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -439,12 +439,16 @@ class CompilerTest < Minitest::Test ] OPTIONS.each do |options| - suffix = options.to_hash.map { |k, v| "#{k}=#{v}" }.join("&") + suffix = options.to_hash.map { |key, value| "#{key}=#{value}" }.join("&") CASES.each do |source| - define_method(:"test_#{source}_(#{suffix})") do + define_method(:"test_compiles_#{source}_(#{suffix})") do assert_compiles(source, options) end + + define_method(:"test_loads_#{source}_(#{suffix})") do + assert_loads(source, options) + end end end @@ -483,12 +487,23 @@ def serialize_iseq(iseq) serialized end + # Check that the compiled instruction sequence matches the expected + # instruction sequence. def assert_compiles(source, options) - program = SyntaxTree.parse(source) - assert_equal( serialize_iseq(RubyVM::InstructionSequence.compile(source, **options)), - serialize_iseq(program.accept(YARV::Compiler.new(options))) + serialize_iseq(YARV.compile(source, options)) + ) + end + + # Check that the compiled instruction sequence matches the instruction + # sequence created directly from the compiled instruction sequence. + def assert_loads(source, options) + compiled = RubyVM::InstructionSequence.compile(source, **options) + + assert_equal( + serialize_iseq(compiled), + serialize_iseq(YARV::InstructionSequence.from(compiled.to_a, options)) ) end