From 5dcd6722b6ccec6e95ade74d08d3260fdd292a54 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 23 Nov 2022 12:37:45 -0500 Subject: [PATCH 01/12] Use label objects instead of symbols --- lib/syntax_tree/yarv/bf.rb | 2 +- lib/syntax_tree/yarv/compiler.rb | 86 +++++++++++--------- lib/syntax_tree/yarv/instruction_sequence.rb | 69 +++++++++++++--- lib/syntax_tree/yarv/instructions.rb | 18 ++-- lib/syntax_tree/yarv/legacy.rb | 2 +- 5 files changed, 115 insertions(+), 62 deletions(-) diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb index 0fb27f7e..9b037305 100644 --- a/lib/syntax_tree/yarv/bf.rb +++ b/lib/syntax_tree/yarv/bf.rb @@ -153,7 +153,7 @@ def input_char(iseq) # unless $tape[$cursor] == 0 def loop_start(iseq) - start_label = iseq.label + start_label = iseq.label_at_index iseq.getglobal(:$tape) iseq.getglobal(:$cursor) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index f876cb3b..5f4f6ac0 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -417,7 +417,8 @@ def visit_aryptn(node) # First, check if the #deconstruct cache is nil. If it is, we're going # to call #deconstruct on the object and cache the result. iseq.topn(2) - branchnil = iseq.branchnil(-1) + deconstruct_label = iseq.label + iseq.branchnil(deconstruct_label) # Next, ensure that the cached value was cached correctly, otherwise # fail the match. @@ -432,7 +433,7 @@ def visit_aryptn(node) # Check if the object responds to #deconstruct, fail the match # otherwise. - branchnil.patch!(iseq) + iseq.event(deconstruct_label) iseq.dup iseq.putobject(:deconstruct) iseq.send(YARV.calldata(:respond_to?, 1)) @@ -634,11 +635,12 @@ def visit_binary(node) visit(node.left) iseq.dup - branchif = iseq.branchif(-1) + skip_right_label = iseq.label + iseq.branchif(skip_right_label) iseq.pop visit(node.right) - branchif.patch!(iseq) + iseq.push(skip_right_label) else visit(node.left) visit(node.right) @@ -758,11 +760,12 @@ def visit_call(node) iseq.putself end - branchnil = - if node.operator&.value == "&." - iseq.dup - iseq.branchnil(-1) - end + after_call_label = nil + if node.operator&.value == "&." + iseq.dup + after_call_label = iseq.label + iseq.branchnil(after_call_label) + end flag = 0 @@ -815,7 +818,7 @@ def visit_call(node) YARV.calldata(node.message.value.to_sym, argc, flag), block_iseq ) - branchnil.patch!(iseq) if branchnil + iseq.event(after_call_label) if after_call_label end def visit_case(node) @@ -845,16 +848,19 @@ def visit_case(node) CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE ) ) - [clause, iseq.branchif(:label_00)] + + label = iseq.label + iseq.branchif(label) + [clause, label] end iseq.pop else_clause ? visit(else_clause) : iseq.putnil iseq.leave - branches.each_with_index do |(clause, branchif), index| + branches.each_with_index do |(clause, label), index| iseq.leave if index != 0 - branchif.patch!(iseq) + iseq.push(label) iseq.pop visit(clause) end @@ -1100,26 +1106,28 @@ def visit_heredoc(node) def visit_if(node) if node.predicate.is_a?(RangeNode) + true_label = iseq.label + iseq.getspecial(GetSpecial::SVAR_FLIPFLOP_START, 0) - branchif = iseq.branchif(-1) + iseq.branchif(true_label) visit(node.predicate.left) - branchunless_true = iseq.branchunless(-1) + end_branch = iseq.branchunless(-1) iseq.putobject(true) iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) - branchif.patch!(iseq) + iseq.push(true_label) visit(node.predicate.right) - branchunless_false = iseq.branchunless(-1) + false_branch = iseq.branchunless(-1) iseq.putobject(false) iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) - branchunless_false.patch!(iseq) + false_branch.patch!(iseq) visit(node.statements) iseq.leave - branchunless_true.patch!(iseq) + end_branch.patch!(iseq) iseq.putnil else visit(node.predicate) @@ -1317,22 +1325,22 @@ def visit_opassign(node) [Const, CVar, GVar].include?(node.target.value.class) opassign_defined(node) else - branchif = nil + skip_value_label = iseq.label with_opassign(node) do iseq.dup - branchif = iseq.branchif(-1) + iseq.branchif(skip_value_label) iseq.pop visit(node.value) end if node.target.is_a?(ARefField) iseq.leave - branchif.patch!(iseq) + iseq.push(skip_value_label) iseq.setn(3) iseq.adjuststack(3) else - branchif.patch!(iseq) + iseq.push(skip_value_label) end end else @@ -1363,13 +1371,11 @@ def visit_params(node) iseq.local_table.plain(name) iseq.argument_size += 1 - argument_options[:opt] = [iseq.label] unless argument_options.key?( - :opt - ) + argument_options[:opt] = [iseq.label_at_index] unless argument_options.key?(:opt) visit(value) iseq.setlocal(index, 0) - iseq.argument_options[:opt] << iseq.label + iseq.argument_options[:opt] << iseq.label_at_index end visit(node.rest) if node.rest @@ -1406,12 +1412,14 @@ def visit_params(node) elsif (compiled = RubyVisitor.compile(value)) argument_options[:keyword] << [name, compiled] else + skip_value_label = iseq.label + argument_options[:keyword] << [name] iseq.checkkeyword(keyword_bits_index, keyword_index) - branchif = iseq.branchif(-1) + iseq.branchif(skip_value_label) visit(value) iseq.setlocal(index, 0) - branchif.patch!(iseq) + iseq.push(skip_value_label) end end @@ -1558,13 +1566,15 @@ def visit_rassign(node) jumps_to_match.concat(visit(node.pattern)) end + no_key_label = iseq.label + # First we're going to push the core onto the stack, then we'll check # if the value to match is truthy. If it is, we'll jump down to raise # NoMatchingPatternKeyError. Otherwise we'll raise # NoMatchingPatternError. iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) iseq.topn(4) - branchif_no_key = iseq.branchif(-1) + iseq.branchif(no_key_label) # Here we're going to raise NoMatchingPatternError. iseq.putobject(NoMatchingPatternError) @@ -1577,7 +1587,7 @@ def visit_rassign(node) jump_to_exit = iseq.jump(-1) # Here we're going to raise NoMatchingPatternKeyError. - branchif_no_key.patch!(iseq) + iseq.push(no_key_label) iseq.putobject(NoMatchingPatternKeyError) iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) iseq.putobject("%p: %s") @@ -1797,7 +1807,7 @@ def visit_unless(node) jump = iseq.jump(-1) branchunless.patch!(iseq) visit(node.consequent) - jump.patch!(iseq.label) + jump.patch!(iseq.label_at_index) else branchunless.patch!(iseq) end @@ -1812,7 +1822,7 @@ def visit_until(node) iseq.pop jumps << iseq.jump(-1) - label = iseq.label + label = iseq.label_at_index visit(node.statements) iseq.pop jumps.each { |jump| jump.patch!(iseq) } @@ -1891,6 +1901,7 @@ def visit_when(node) end def visit_while(node) + repeat_label = iseq.label jumps = [] jumps << iseq.jump(-1) @@ -1898,13 +1909,13 @@ def visit_while(node) iseq.pop jumps << iseq.jump(-1) - label = iseq.label + iseq.push(repeat_label) visit(node.statements) iseq.pop jumps.each { |jump| jump.patch!(iseq) } visit(node.predicate) - iseq.branchif(label) + iseq.branchif(repeat_label) iseq.putnil if last_statement? end @@ -2060,7 +2071,8 @@ def opassign_defined(node) end iseq.dup - branchif = iseq.branchif(-1) + skip_value_label = iseq.label + iseq.branchif(skip_value_label) iseq.pop branchunless.patch!(iseq) @@ -2085,7 +2097,7 @@ def opassign_defined(node) end end - branchif.patch!(iseq) + iseq.push(skip_value_label) end # Whenever a value is interpolated into a string-like structure, these diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index c6395f65..e47a18ea 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -41,6 +41,21 @@ def change_by(value) end end + # This represents the destination of instructions that jump. Initially it + # does not track its position so that when we perform optimizations the + # indices don't get messed up. + class Label + attr_reader :name + + def initialize(name = nil) + @name = name + end + + def patch!(name) + @name = name + end + end + # The type of the instruction sequence. attr_reader :type @@ -129,7 +144,7 @@ def inline_storage_for(name) def length insns.inject(0) do |sum, insn| case insn - when Integer, Symbol + when Integer, Label, Symbol sum else sum + insn.length @@ -151,6 +166,20 @@ def eval def to_a versions = RUBY_VERSION.split(".").map(&:to_i) + # First, set it up so that all of the labels get their correct name. + insns.inject(0) do |length, insn| + case insn + when Integer, Symbol + length + when Label + insn.patch!(:"label_#{length}") + length + else + length + insn.length + end + end + + # Next, return the instruction sequence as an array. [ MAGIC, versions[0], @@ -170,7 +199,14 @@ def to_a argument_options, [], insns.map do |insn| - insn.is_a?(Integer) || insn.is_a?(Symbol) ? insn : insn.to_a(self) + case insn + when Integer, Symbol + insn + when Label + insn.name + else + insn.to_a(self) + end end ] end @@ -209,11 +245,15 @@ def singleton_class_child_iseq(location) # Instruction push methods ########################################################################## + def label + Label.new + end + def push(insn) insns << insn case insn - when Integer, Symbol, Array + when Array, Integer, Label, Symbol insn else stack.change_by(-insn.pops + insn.pushes) @@ -221,9 +261,7 @@ def push(insn) end end - # This creates a new label at the current length of the instruction - # sequence. It is used as the operand for jump instructions. - def label + def label_at_index name = :"label_#{length}" insns.last == name ? name : event(name) end @@ -691,27 +729,38 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) # set up the argument options iseq.argument_options.merge!(source[11]) + # set up the labels object so that the labels are shared between the + # location in the instruction sequence and the instructions that + # reference them + labels = Hash.new { |hash, name| hash[name] = Label.new(name) } + # set up all of the instructions source[13].each do |insn| # skip line numbers next if insn.is_a?(Integer) - # put events into the array and then continue + # add events and labels if insn.is_a?(Symbol) - iseq.event(insn) + if insn.start_with?("label_") + iseq.push(labels[insn]) + else + iseq.push(insn) + end next end + # add instructions, mapped to our own instruction classes type, *opnds = insn + case type when :adjuststack iseq.adjuststack(opnds[0]) when :anytostring iseq.anytostring when :branchif - iseq.branchif(opnds[0]) + iseq.branchif(labels[opnds[0]]) when :branchnil - iseq.branchnil(opnds[0]) + iseq.branchnil(labels[opnds[0]]) when :branchunless iseq.branchunless(opnds[0]) when :checkkeyword diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index 9c816072..c340cd4e 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -159,12 +159,8 @@ def initialize(label) @label = label end - def patch!(iseq) - @label = iseq.label - end - def to_a(_iseq) - [:branchif, label] + [:branchif, label.name] end def length @@ -204,12 +200,8 @@ def initialize(label) @label = label end - def patch!(iseq) - @label = iseq.label - end - def to_a(_iseq) - [:branchnil, label] + [:branchnil, label.name] end def length @@ -249,7 +241,7 @@ def initialize(label) end def patch!(iseq) - @label = iseq.label + @label = iseq.label_at_index end def to_a(_iseq) @@ -297,7 +289,7 @@ def initialize(keyword_bits_index, keyword_index) end def patch!(iseq) - @label = iseq.label + @label = iseq.label_at_index end def to_a(iseq) @@ -1360,7 +1352,7 @@ def initialize(label) end def patch!(iseq) - @label = iseq.label + @label = iseq.label_at_index end def to_a(_iseq) diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb index 45dfe768..20588974 100644 --- a/lib/syntax_tree/yarv/legacy.rb +++ b/lib/syntax_tree/yarv/legacy.rb @@ -68,7 +68,7 @@ def initialize(label, cache) end def patch!(iseq) - @label = iseq.label + @label = iseq.label_at_index end def to_a(_iseq) From 633ab9bea7f542b098c975296e7e6044faefdb51 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 23 Nov 2022 14:26:28 -0500 Subject: [PATCH 02/12] Start using labels for jumps --- lib/syntax_tree/yarv/bf.rb | 12 +- lib/syntax_tree/yarv/compiler.rb | 335 ++++++++++--------- lib/syntax_tree/yarv/disassembler.rb | 21 +- lib/syntax_tree/yarv/instruction_sequence.rb | 15 +- lib/syntax_tree/yarv/instructions.rb | 16 +- lib/syntax_tree/yarv/legacy.rb | 6 +- 6 files changed, 196 insertions(+), 209 deletions(-) diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb index 9b037305..78c01af5 100644 --- a/lib/syntax_tree/yarv/bf.rb +++ b/lib/syntax_tree/yarv/bf.rb @@ -153,23 +153,25 @@ def input_char(iseq) # unless $tape[$cursor] == 0 def loop_start(iseq) - start_label = iseq.label_at_index + start_label = iseq.label + end_label = iseq.label + iseq.push(start_label) iseq.getglobal(:$tape) iseq.getglobal(:$cursor) iseq.send(YARV.calldata(:[], 1)) iseq.putobject(0) iseq.send(YARV.calldata(:==, 1)) + iseq.branchunless(end_label) - branchunless = iseq.branchunless(-1) - [start_label, branchunless] + [start_label, end_label] end # Jump back to the start of the loop. - def loop_end(iseq, start_label, branchunless) + def loop_end(iseq, start_label, end_label) iseq.jump(start_label) - branchunless.patch!(iseq) + iseq.push(end_label) end end end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 5f4f6ac0..3bcfc598 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -402,100 +402,6 @@ def visit_array(node) end def visit_aryptn(node) - match_failures = [] - jumps_to_exit = [] - - # If there's a constant, then check if we match against that constant or - # not first. Branch to failure if we don't. - if node.constant - iseq.dup - visit(node.constant) - iseq.checkmatch(CheckMatch::TYPE_CASE) - match_failures << iseq.branchunless(-1) - end - - # First, check if the #deconstruct cache is nil. If it is, we're going - # to call #deconstruct on the object and cache the result. - iseq.topn(2) - deconstruct_label = iseq.label - iseq.branchnil(deconstruct_label) - - # Next, ensure that the cached value was cached correctly, otherwise - # fail the match. - iseq.topn(2) - match_failures << iseq.branchunless(-1) - - # Since we have a valid cached value, we can skip past the part where we - # call #deconstruct on the object. - iseq.pop - iseq.topn(1) - jump = iseq.jump(-1) - - # Check if the object responds to #deconstruct, fail the match - # otherwise. - iseq.event(deconstruct_label) - iseq.dup - iseq.putobject(:deconstruct) - iseq.send(YARV.calldata(:respond_to?, 1)) - iseq.setn(3) - match_failures << iseq.branchunless(-1) - - # Call #deconstruct and ensure that it's an array, raise an error - # otherwise. - iseq.send(YARV.calldata(:deconstruct)) - iseq.setn(2) - iseq.dup - iseq.checktype(CheckType::TYPE_ARRAY) - match_error = iseq.branchunless(-1) - - # Ensure that the deconstructed array has the correct size, fail the - # match otherwise. - jump.patch!(iseq) - iseq.dup - iseq.send(YARV.calldata(:length)) - iseq.putobject(node.requireds.length) - iseq.send(YARV.calldata(:==, 1)) - match_failures << iseq.branchunless(-1) - - # For each required element, check if the deconstructed array contains - # the element, otherwise jump out to the top-level match failure. - iseq.dup - node.requireds.each_with_index do |required, index| - iseq.putobject(index) - iseq.send(YARV.calldata(:[], 1)) - - case required - when VarField - lookup = visit(required) - iseq.setlocal(lookup.index, lookup.level) - else - visit(required) - iseq.checkmatch(CheckMatch::TYPE_CASE) - match_failures << iseq.branchunless(-1) - end - - if index < node.requireds.length - 1 - iseq.dup - else - iseq.pop - jumps_to_exit << iseq.jump(-1) - end - end - - # Set up the routine here to raise an error to indicate that the type of - # the deconstructed array was incorrect. - match_error.patch!(iseq) - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.putobject(TypeError) - iseq.putobject("deconstruct must return Array") - iseq.send(YARV.calldata(:"core#raise", 2)) - iseq.pop - - # Patch all of the match failures to jump here so that we pop a final - # value before returning to the parent node. - match_failures.each { |match_failure| match_failure.patch!(iseq) } - iseq.pop - jumps_to_exit end def visit_assign(node) @@ -623,14 +529,15 @@ def visit_begin(node) def visit_binary(node) case node.operator when :"&&" + done_label = iseq.label + visit(node.left) iseq.dup + iseq.branchunless(done_label) - branchunless = iseq.branchunless(-1) iseq.pop - visit(node.right) - branchunless.patch!(iseq) + iseq.push(done_label) when :"||" visit(node.left) iseq.dup @@ -1107,48 +1014,52 @@ def visit_heredoc(node) def visit_if(node) if node.predicate.is_a?(RangeNode) true_label = iseq.label + false_label = iseq.label + end_label = iseq.label iseq.getspecial(GetSpecial::SVAR_FLIPFLOP_START, 0) iseq.branchif(true_label) visit(node.predicate.left) - end_branch = iseq.branchunless(-1) + iseq.branchunless(end_label) iseq.putobject(true) iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) iseq.push(true_label) visit(node.predicate.right) - false_branch = iseq.branchunless(-1) + iseq.branchunless(false_label) iseq.putobject(false) iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) - false_branch.patch!(iseq) + iseq.push(false_label) visit(node.statements) iseq.leave - end_branch.patch!(iseq) + iseq.push(end_label) iseq.putnil else + consequent_label = iseq.label + visit(node.predicate) - branchunless = iseq.branchunless(-1) + iseq.branchunless(consequent_label) visit(node.statements) if last_statement? iseq.leave - branchunless.patch!(iseq) - + iseq.push(consequent_label) node.consequent ? visit(node.consequent) : iseq.putnil else iseq.pop if node.consequent - jump = iseq.jump(-1) - branchunless.patch!(iseq) + done_label = iseq.label + iseq.jump(done_label) + iseq.push(consequent_label) visit(node.consequent) - jump.patch!(iseq) + iseq.push(done_label) else - branchunless.patch!(iseq) + iseq.push(consequent_label) end end end @@ -1174,9 +1085,6 @@ def visit_imaginary(node) iseq.putobject(node.accept(RubyVisitor.new)) end - def visit_in(node) - end - def visit_int(node) iseq.putobject(node.accept(RubyVisitor.new)) end @@ -1293,11 +1201,11 @@ def visit_opassign(node) case (operator = node.operator.value.chomp("=").to_sym) when :"&&" - branchunless = nil + done_label = iseq.label with_opassign(node) do iseq.dup - branchunless = iseq.branchunless(-1) + iseq.branchunless(done_label) iseq.pop visit(node.value) end @@ -1305,15 +1213,15 @@ def visit_opassign(node) case node.target when ARefField iseq.leave - branchunless.patch!(iseq) + iseq.push(done_label) iseq.setn(3) iseq.adjuststack(3) when ConstPathField, TopConstField - branchunless.patch!(iseq) + iseq.push(done_label) iseq.swap iseq.pop else - branchunless.patch!(iseq) + iseq.push(done_label) end when :"||" if node.target.is_a?(ConstPathField) || @@ -1524,30 +1432,25 @@ def visit_rassign(node) iseq.putnil if node.operator.is_a?(Kw) - jumps = [] + match_label = iseq.label visit(node.value) iseq.dup - case node.pattern - when VarField - lookup = visit(node.pattern) - iseq.setlocal(lookup.index, lookup.level) - jumps << iseq.jump(-1) - else - jumps.concat(visit(node.pattern)) - end + visit_pattern(node.pattern, match_label) iseq.pop iseq.pop iseq.putobject(false) iseq.leave - jumps.each { |jump| jump.patch!(iseq) } + iseq.push(match_label) iseq.adjuststack(2) iseq.putobject(true) else - jumps_to_match = [] + no_key_label = iseq.label + end_leave_label = iseq.label + end_label = iseq.label iseq.putnil iseq.putobject(false) @@ -1556,17 +1459,7 @@ def visit_rassign(node) visit(node.value) iseq.dup - # Visit the pattern. If it matches, - case node.pattern - when VarField - lookup = visit(node.pattern) - iseq.setlocal(lookup.index, lookup.level) - jumps_to_match << iseq.jump(-1) - else - jumps_to_match.concat(visit(node.pattern)) - end - - no_key_label = iseq.label + visit_pattern(node.pattern, end_label) # First we're going to push the core onto the stack, then we'll check # if the value to match is truthy. If it is, we'll jump down to raise @@ -1584,7 +1477,7 @@ def visit_rassign(node) iseq.topn(7) iseq.send(YARV.calldata(:"core#sprintf", 3)) iseq.send(YARV.calldata(:"core#raise", 2)) - jump_to_exit = iseq.jump(-1) + iseq.jump(end_leave_label) # Here we're going to raise NoMatchingPatternKeyError. iseq.push(no_key_label) @@ -1601,14 +1494,12 @@ def visit_rassign(node) ) iseq.send(YARV.calldata(:"core#raise", 1)) - # This runs when the pattern fails to match. - jump_to_exit.patch!(iseq) + iseq.push(end_leave_label) iseq.adjuststack(7) iseq.putnil iseq.leave - # This runs when the pattern matches successfully. - jumps_to_match.each { |jump| jump.patch!(iseq) } + iseq.push(end_label) iseq.adjuststack(6) iseq.putnil end @@ -1791,44 +1682,47 @@ def visit_undef(node) end def visit_unless(node) + statements_label = iseq.label + visit(node.predicate) - branchunless = iseq.branchunless(-1) + iseq.branchunless(statements_label) node.consequent ? visit(node.consequent) : iseq.putnil if last_statement? iseq.leave - branchunless.patch!(iseq) - + iseq.push(statements_label) visit(node.statements) else iseq.pop if node.consequent - jump = iseq.jump(-1) - branchunless.patch!(iseq) + done_label = iseq.label + iseq.jump(done_label) + iseq.push(statements_label) visit(node.consequent) - jump.patch!(iseq.label_at_index) + iseq.push(done_label) else - branchunless.patch!(iseq) + iseq.push(statements_label) end end end def visit_until(node) - jumps = [] + predicate_label = iseq.label + statements_label = iseq.label - jumps << iseq.jump(-1) + iseq.jump(predicate_label) iseq.putnil iseq.pop - jumps << iseq.jump(-1) + iseq.jump(predicate_label) - label = iseq.label_at_index + iseq.push(statements_label) visit(node.statements) iseq.pop - jumps.each { |jump| jump.patch!(iseq) } + iseq.push(predicate_label) visit(node.predicate) - iseq.branchunless(label) + iseq.branchunless(statements_label) iseq.putnil if last_statement? end @@ -1901,21 +1795,21 @@ def visit_when(node) end def visit_while(node) - repeat_label = iseq.label - jumps = [] + predicate_label = iseq.label + statements_label = iseq.label - jumps << iseq.jump(-1) + iseq.jump(predicate_label) iseq.putnil iseq.pop - jumps << iseq.jump(-1) + iseq.jump(predicate_label) - iseq.push(repeat_label) + iseq.push(statements_label) visit(node.statements) iseq.pop - jumps.each { |jump| jump.patch!(iseq) } + iseq.push(predicate_label) visit(node.predicate) - iseq.branchif(repeat_label) + iseq.branchif(statements_label) iseq.putnil if last_statement? end @@ -2025,6 +1919,9 @@ def constant_names(node) # first check if the value is defined using the defined instruction. I # don't know why it is necessary, and suspect that it isn't. def opassign_defined(node) + value_label = iseq.label + skip_value_label = iseq.label + case node.target when ConstPathField visit(node.target.parent) @@ -2052,7 +1949,7 @@ def opassign_defined(node) end end - branchunless = iseq.branchunless(-1) + iseq.branchunless(value_label) case node.target when ConstPathField, TopConstField @@ -2071,11 +1968,10 @@ def opassign_defined(node) end iseq.dup - skip_value_label = iseq.label iseq.branchif(skip_value_label) - iseq.pop - branchunless.patch!(iseq) + iseq.pop + iseq.push(value_label) visit(node.value) case node.target @@ -2114,6 +2010,111 @@ def push_interpolate iseq.anytostring end + # Visit a type of pattern in a pattern match. + def visit_pattern(node, end_label) + case node + when AryPtn + length_label = iseq.label + match_failure_label = iseq.label + match_error_label = iseq.label + + # If there's a constant, then check if we match against that constant or + # not first. Branch to failure if we don't. + if node.constant + iseq.dup + visit(node.constant) + iseq.checkmatch(CheckMatch::TYPE_CASE) + iseq.branchunless(match_failure_label) + end + + # First, check if the #deconstruct cache is nil. If it is, we're going + # to call #deconstruct on the object and cache the result. + iseq.topn(2) + deconstruct_label = iseq.label + iseq.branchnil(deconstruct_label) + + # Next, ensure that the cached value was cached correctly, otherwise + # fail the match. + iseq.topn(2) + iseq.branchunless(match_failure_label) + + # Since we have a valid cached value, we can skip past the part where we + # call #deconstruct on the object. + iseq.pop + iseq.topn(1) + iseq.jump(length_label) + + # Check if the object responds to #deconstruct, fail the match + # otherwise. + iseq.event(deconstruct_label) + iseq.dup + iseq.putobject(:deconstruct) + iseq.send(YARV.calldata(:respond_to?, 1)) + iseq.setn(3) + iseq.branchunless(match_failure_label) + + # Call #deconstruct and ensure that it's an array, raise an error + # otherwise. + iseq.send(YARV.calldata(:deconstruct)) + iseq.setn(2) + iseq.dup + iseq.checktype(CheckType::TYPE_ARRAY) + iseq.branchunless(match_error_label) + + # Ensure that the deconstructed array has the correct size, fail the + # match otherwise. + iseq.push(length_label) + iseq.dup + iseq.send(YARV.calldata(:length)) + iseq.putobject(node.requireds.length) + iseq.send(YARV.calldata(:==, 1)) + iseq.branchunless(match_failure_label) + + # For each required element, check if the deconstructed array contains + # the element, otherwise jump out to the top-level match failure. + iseq.dup + node.requireds.each_with_index do |required, index| + iseq.putobject(index) + iseq.send(YARV.calldata(:[], 1)) + + case required + when VarField + lookup = visit(required) + iseq.setlocal(lookup.index, lookup.level) + else + visit(required) + iseq.checkmatch(CheckMatch::TYPE_CASE) + iseq.branchunless(match_failure_label) + end + + if index < node.requireds.length - 1 + iseq.dup + else + iseq.pop + iseq.jump(end_label) + end + end + + # Set up the routine here to raise an error to indicate that the type of + # the deconstructed array was incorrect. + iseq.push(match_error_label) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putobject(TypeError) + iseq.putobject("deconstruct must return Array") + iseq.send(YARV.calldata(:"core#raise", 2)) + iseq.pop + + # Patch all of the match failures to jump here so that we pop a final + # value before returning to the parent node. + iseq.push(match_failure_label) + iseq.pop + when VarField + lookup = visit(node) + iseq.setlocal(lookup.index, lookup.level) + iseq.jump(end_label) + end + end + # There are a lot of nodes in the AST that act as contains of parts of # strings. This includes things like string literals, regular expressions, # heredocs, etc. This method will visit all the parts of a string within diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index d606e3cc..757b8b40 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -54,21 +54,20 @@ def disassemble(iseq) clauses = {} clause = [] + iseq.to_a iseq.insns.each do |insn| case insn - when Symbol - if insn.start_with?("label_") - unless clause.last.is_a?(Next) - clause << Assign(disasm_label.field, node_for(insn)) - end - - clauses[label] = clause - clause = [] - label = insn + when InstructionSequence::Label + unless clause.last.is_a?(Next) + clause << Assign(disasm_label.field, node_for(insn.name)) end + + clauses[label] = clause + clause = [] + label = insn.name when BranchUnless body = [ - Assign(disasm_label.field, node_for(insn.label)), + Assign(disasm_label.field, node_for(insn.label.name)), Next(Args([])) ] @@ -88,7 +87,7 @@ def disassemble(iseq) local = iseq.local_table.locals[insn.index] clause << VarRef(Ident(local.name.to_s)) when Jump - clause << Assign(disasm_label.field, node_for(insn.label)) + clause << Assign(disasm_label.field, node_for(insn.label.name)) clause << Next(Args([])) when Leave value = Args([clause.pop]) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index e47a18ea..097fda38 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -464,11 +464,12 @@ def opt_case_dispatch(case_dispatch_hash, else_label) def opt_getconstant_path(names) if RUBY_VERSION < "3.2" || !options.inline_const_cache? cache = nil - getinlinecache = nil + cache_filled_label = nil if options.inline_const_cache? cache = inline_storage - getinlinecache = opt_getinlinecache(-1, cache) + cache_filled_label = label + opt_getinlinecache(cache_filled_label, cache) if names[0] == :"" names.shift @@ -489,7 +490,7 @@ def opt_getconstant_path(names) if options.inline_const_cache? opt_setinlinecache(cache) - getinlinecache.patch!(self) + push(cache_filled_label) end else push(OptGetConstantPath.new(names)) @@ -762,7 +763,7 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) when :branchnil iseq.branchnil(labels[opnds[0]]) when :branchunless - iseq.branchunless(opnds[0]) + iseq.branchunless(labels[opnds[0]]) when :checkkeyword iseq.checkkeyword(iseq.local_table.size - opnds[0] + 2, opnds[1]) when :checkmatch @@ -838,7 +839,7 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) block_iseq = opnds[1] ? from(opnds[1], options, iseq) : nil iseq.invokesuper(CallData.from(opnds[0]), block_iseq) when :jump - iseq.jump(opnds[0]) + iseq.jump(labels[opnds[0]]) when :leave iseq.leave when :newarray @@ -866,11 +867,11 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) when :opt_aset_with iseq.opt_aset_with(opnds[0], CallData.from(opnds[1])) when :opt_case_dispatch - iseq.opt_case_dispatch(opnds[0], opnds[1]) + iseq.opt_case_dispatch(opnds[0], labels[opnds[1]]) when :opt_getconstant_path iseq.opt_getconstant_path(opnds[0]) when :opt_getinlinecache - iseq.opt_getinlinecache(opnds[0], opnds[1]) + iseq.opt_getinlinecache(labels[opnds[0]], opnds[1]) when :opt_newarray_max iseq.opt_newarray_max(opnds[0]) when :opt_newarray_min diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index c340cd4e..8ec1f068 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -240,12 +240,8 @@ def initialize(label) @label = label end - def patch!(iseq) - @label = iseq.label_at_index - end - def to_a(_iseq) - [:branchunless, label] + [:branchunless, label.name] end def length @@ -288,10 +284,6 @@ def initialize(keyword_bits_index, keyword_index) @keyword_index = keyword_index end - def patch!(iseq) - @label = iseq.label_at_index - end - def to_a(iseq) [ :checkkeyword, @@ -1351,12 +1343,8 @@ def initialize(label) @label = label end - def patch!(iseq) - @label = iseq.label_at_index - end - def to_a(_iseq) - [:jump, label] + [:jump, label.name] end def length diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb index 20588974..82f7560d 100644 --- a/lib/syntax_tree/yarv/legacy.rb +++ b/lib/syntax_tree/yarv/legacy.rb @@ -67,12 +67,8 @@ def initialize(label, cache) @cache = cache end - def patch!(iseq) - @label = iseq.label_at_index - end - def to_a(_iseq) - [:opt_getinlinecache, label, cache] + [:opt_getinlinecache, label.name, cache] end def length From f87fc563b0127bbe661bb43b424ca379e3a20aa4 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 26 Nov 2022 13:20:21 -0500 Subject: [PATCH 03/12] Create a linked list for nodes --- lib/syntax_tree/yarv/compiler.rb | 28 +++---- lib/syntax_tree/yarv/instruction_sequence.rb | 78 ++++++++++++++++---- 2 files changed, 76 insertions(+), 30 deletions(-) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 3bcfc598..f6d40f30 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -1260,15 +1260,13 @@ def visit_opassign(node) end def visit_params(node) - argument_options = iseq.argument_options - if node.requireds.any? - argument_options[:lead_num] = 0 + iseq.argument_options[:lead_num] = 0 node.requireds.each do |required| iseq.local_table.plain(required.value.to_sym) iseq.argument_size += 1 - argument_options[:lead_num] += 1 + iseq.argument_options[:lead_num] += 1 end end @@ -1279,7 +1277,9 @@ def visit_params(node) iseq.local_table.plain(name) iseq.argument_size += 1 - argument_options[:opt] = [iseq.label_at_index] unless argument_options.key?(:opt) + unless iseq.argument_options.key?(:opt) + iseq.argument_options[:opt] = [iseq.label_at_index] + end visit(value) iseq.setlocal(index, 0) @@ -1289,19 +1289,19 @@ def visit_params(node) visit(node.rest) if node.rest if node.posts.any? - argument_options[:post_start] = iseq.argument_size - argument_options[:post_num] = 0 + iseq.argument_options[:post_start] = iseq.argument_size + iseq.argument_options[:post_num] = 0 node.posts.each do |post| iseq.local_table.plain(post.value.to_sym) iseq.argument_size += 1 - argument_options[:post_num] += 1 + iseq.argument_options[:post_num] += 1 end end if node.keywords.any? - argument_options[:kwbits] = 0 - argument_options[:keyword] = [] + iseq.argument_options[:kwbits] = 0 + iseq.argument_options[:keyword] = [] keyword_bits_name = node.keyword_rest ? 3 : 2 iseq.argument_size += 1 @@ -1313,16 +1313,16 @@ def visit_params(node) iseq.local_table.plain(name) iseq.argument_size += 1 - argument_options[:kwbits] += 1 + iseq.argument_options[:kwbits] += 1 if value.nil? - argument_options[:keyword] << name + iseq.argument_options[:keyword] << name elsif (compiled = RubyVisitor.compile(value)) - argument_options[:keyword] << [name, compiled] + iseq.argument_options[:keyword] << [name, compiled] else skip_value_label = iseq.label - argument_options[:keyword] << [name] + iseq.argument_options[:keyword] << [name] iseq.checkkeyword(keyword_bits_index, keyword_index) iseq.branchif(skip_value_label) visit(value) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 097fda38..42910266 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -7,6 +7,50 @@ module YARV # list of instructions along with the metadata pertaining to them. It also # functions as a builder for the instruction sequence. class InstructionSequence + # When the list of instructions is first being created, it's stored as a + # linked list. This is to make it easier to perform peephole optimizations + # and other transformations like instruction specialization. + class InstructionList + class Node + attr_reader :instruction + attr_accessor :next_node + + def initialize(instruction, next_node = nil) + @instruction = instruction + @next_node = next_node + end + end + + attr_reader :head_node, :tail_node + + def initialize + @head_node = nil + @tail_node = nil + end + + def each + return to_enum(__method__) unless block_given? + node = head_node + + while node + yield node.instruction + node = node.next_node + end + end + + def push(instruction) + node = Node.new(instruction) + + if head_node.nil? + @head_node = node + @tail_node = node + else + @tail_node.next_node = node + @tail_node = node + end + end + end + MAGIC = "YARVInstructionSequence/SimpleDataFormat" # This provides a handle to the rb_iseq_load function, which allows you to @@ -110,7 +154,7 @@ def initialize( @local_table = LocalTable.new @inline_storages = {} - @insns = [] + @insns = InstructionList.new @storage_index = 0 @stack = Stack.new @@ -142,7 +186,7 @@ def inline_storage_for(name) end def length - insns.inject(0) do |sum, insn| + insns.each.inject(0) do |sum, insn| case insn when Integer, Label, Symbol sum @@ -167,7 +211,7 @@ def to_a versions = RUBY_VERSION.split(".").map(&:to_i) # First, set it up so that all of the labels get their correct name. - insns.inject(0) do |length, insn| + insns.each.inject(0) do |length, insn| case insn when Integer, Symbol length @@ -179,6 +223,18 @@ def to_a end end + # Next, dump all of the instructions into a flat list. + dumped = insns.each.map do |insn| + case insn + when Integer, Symbol + insn + when Label + insn.name + else + insn.to_a(self) + end + end + # Next, return the instruction sequence as an array. [ MAGIC, @@ -198,16 +254,7 @@ def to_a local_table.names, argument_options, [], - insns.map do |insn| - case insn - when Integer, Symbol - insn - when Label - insn.name - else - insn.to_a(self) - end - end + dumped ] end @@ -250,7 +297,7 @@ def label end def push(insn) - insns << insn + insns.push(insn) case insn when Array, Integer, Label, Symbol @@ -262,8 +309,7 @@ def push(insn) end def label_at_index - name = :"label_#{length}" - insns.last == name ? name : event(name) + push(:"label_#{length}") end def event(name) From 2115177c7f74faafdf6760e9d926417c7c648bde Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 26 Nov 2022 13:27:18 -0500 Subject: [PATCH 04/12] Fix opt table to use labels --- lib/syntax_tree/yarv/compiler.rb | 9 ++++++-- lib/syntax_tree/yarv/instruction_sequence.rb | 22 +++++++++++--------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index f6d40f30..c0d89239 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -1278,12 +1278,17 @@ def visit_params(node) iseq.argument_size += 1 unless iseq.argument_options.key?(:opt) - iseq.argument_options[:opt] = [iseq.label_at_index] + start_label = iseq.label + iseq.push(start_label) + iseq.argument_options[:opt] = [start_label] end visit(value) iseq.setlocal(index, 0) - iseq.argument_options[:opt] << iseq.label_at_index + + arg_given_label = iseq.label + iseq.push(arg_given_label) + iseq.argument_options[:opt] << arg_given_label end visit(node.rest) if node.rest diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 42910266..63904923 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -235,6 +235,9 @@ def to_a end end + dumped_options = argument_options.dup + dumped_options[:opt].map!(&:name) if dumped_options[:opt] + # Next, return the instruction sequence as an array. [ MAGIC, @@ -252,7 +255,7 @@ def to_a location.start_line, type, local_table.names, - argument_options, + dumped_options, [], dumped ] @@ -308,10 +311,6 @@ def push(insn) end end - def label_at_index - push(:"label_#{length}") - end - def event(name) push(name) end @@ -767,6 +766,11 @@ def toregexp(options, length) def self.from(source, options = Compiler::Options.new, parent_iseq = nil) iseq = new(source[9], source[5], parent_iseq, Location.default, options) + # set up the labels object so that the labels are shared between the + # location in the instruction sequence and the instructions that + # reference them + labels = Hash.new { |hash, name| hash[name] = Label.new(name) } + # set up the correct argument size iseq.argument_size = source[4][:arg_size] @@ -775,11 +779,9 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) # set up the argument options iseq.argument_options.merge!(source[11]) - - # set up the labels object so that the labels are shared between the - # location in the instruction sequence and the instructions that - # reference them - labels = Hash.new { |hash, name| hash[name] = Label.new(name) } + if iseq.argument_options[:opt] + iseq.argument_options[:opt].map! { |opt| labels[opt] } + end # set up all of the instructions source[13].each do |insn| From 69d2dfa143361357c2684da17d3c2df3b5ed85c2 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 26 Nov 2022 13:52:27 -0500 Subject: [PATCH 05/12] Specialize in a separate pass --- lib/syntax_tree/yarv/compiler.rb | 11 -- lib/syntax_tree/yarv/instruction_sequence.rb | 185 ++++++++++--------- 2 files changed, 102 insertions(+), 94 deletions(-) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index c0d89239..362ce32f 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -632,17 +632,6 @@ def visit_call(node) return end end - when StringLiteral - if RubyVisitor.compile(node.receiver).nil? - case node.message.value - when "-@" - iseq.opt_str_uminus(node.receiver.parts.first.value) - return - when "freeze" - iseq.opt_str_freeze(node.receiver.parts.first.value) - return - end - end end end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 63904923..dc2f7da8 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -12,8 +12,7 @@ class InstructionSequence # and other transformations like instruction specialization. class InstructionList class Node - attr_reader :instruction - attr_accessor :next_node + attr_accessor :instruction, :next_node def initialize(instruction, next_node = nil) @instruction = instruction @@ -29,11 +28,16 @@ def initialize end def each + return to_enum(__method__) unless block_given? + each_node { |node| yield node.instruction } + end + + def each_node return to_enum(__method__) unless block_given? node = head_node while node - yield node.instruction + yield node node = node.next_node end end @@ -210,7 +214,10 @@ def eval def to_a versions = RUBY_VERSION.split(".").map(&:to_i) - # First, set it up so that all of the labels get their correct name. + # First, specialize any instructions that need to be specialized. + specialize_instructions! if options.specialized_instruction? + + # Next, set it up so that all of the labels get their correct name. insns.each.inject(0) do |length, insn| case insn when Integer, Symbol @@ -261,6 +268,92 @@ def to_a ] end + def specialize_instructions! + insns.each_node do |node| + case node.instruction + when PutObject, PutString + next unless node.next_node + next if node.instruction.is_a?(PutObject) && !node.instruction.object.is_a?(String) + + next_node = node.next_node + next unless next_node.instruction.is_a?(Send) + next if next_node.instruction.block_iseq + + calldata = next_node.instruction.calldata + next unless calldata.flags == CallData::CALL_ARGS_SIMPLE + + case calldata.method + when :freeze + node.instruction = OptStrFreeze.new(node.instruction.object, calldata) + node.next_node = next_node.next_node + when :-@ + node.instruction = OptStrUMinus.new(node.instruction.object, calldata) + node.next_node = next_node.next_node + end + when Send + calldata = node.instruction.calldata + + if !node.instruction.block_iseq && !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) + # Specialize the send instruction. If it doesn't have a block + # attached, then we will replace it with an opt_send_without_block + # and do further specializations based on the called method and + # the number of arguments. + node.instruction = + case [calldata.method, calldata.argc] + when [:length, 0] + OptLength.new(calldata) + when [:size, 0] + OptSize.new(calldata) + when [:empty?, 0] + OptEmptyP.new(calldata) + when [:nil?, 0] + OptNilP.new(calldata) + when [:succ, 0] + OptSucc.new(calldata) + when [:!, 0] + OptNot.new(calldata) + when [:+, 1] + OptPlus.new(calldata) + when [:-, 1] + OptMinus.new(calldata) + when [:*, 1] + OptMult.new(calldata) + when [:/, 1] + OptDiv.new(calldata) + when [:%, 1] + OptMod.new(calldata) + when [:==, 1] + OptEq.new(calldata) + when [:!=, 1] + OptNEq.new(YARV.calldata(:==, 1), calldata) + when [:=~, 1] + OptRegExpMatch2.new(calldata) + when [:<, 1] + OptLT.new(calldata) + when [:<=, 1] + OptLE.new(calldata) + when [:>, 1] + OptGT.new(calldata) + when [:>=, 1] + OptGE.new(calldata) + when [:<<, 1] + OptLTLT.new(calldata) + when [:[], 1] + OptAref.new(calldata) + when [:&, 1] + OptAnd.new(calldata) + when [:|, 1] + OptOr.new(calldata) + when [:[]=, 2] + OptAset.new(calldata) + else + OptSendWithoutBlock.new(calldata) + end + end + end + end + end + ########################################################################## # Child instruction sequence methods ########################################################################## @@ -568,24 +661,6 @@ def opt_setinlinecache(cache) push(Legacy::OptSetInlineCache.new(cache)) end - def opt_str_freeze(object) - if options.specialized_instruction? - push(OptStrFreeze.new(object, YARV.calldata(:freeze))) - else - putstring(object) - send(YARV.calldata(:freeze)) - end - end - - def opt_str_uminus(object) - if options.specialized_instruction? - push(OptStrUMinus.new(object, YARV.calldata(:-@))) - else - putstring(object) - send(YARV.calldata(:-@)) - end - end - def pop push(Pop.new) end @@ -625,65 +700,7 @@ def putstring(object) end def send(calldata, block_iseq = nil) - if options.specialized_instruction? && !block_iseq && - !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) - # Specialize the send instruction. If it doesn't have a block - # attached, then we will replace it with an opt_send_without_block - # and do further specializations based on the called method and the - # number of arguments. - case [calldata.method, calldata.argc] - when [:length, 0] - push(OptLength.new(calldata)) - when [:size, 0] - push(OptSize.new(calldata)) - when [:empty?, 0] - push(OptEmptyP.new(calldata)) - when [:nil?, 0] - push(OptNilP.new(calldata)) - when [:succ, 0] - push(OptSucc.new(calldata)) - when [:!, 0] - push(OptNot.new(calldata)) - when [:+, 1] - push(OptPlus.new(calldata)) - when [:-, 1] - push(OptMinus.new(calldata)) - when [:*, 1] - push(OptMult.new(calldata)) - when [:/, 1] - push(OptDiv.new(calldata)) - when [:%, 1] - push(OptMod.new(calldata)) - when [:==, 1] - push(OptEq.new(calldata)) - when [:!=, 1] - push(OptNEq.new(YARV.calldata(:==, 1), calldata)) - when [:=~, 1] - push(OptRegExpMatch2.new(calldata)) - when [:<, 1] - push(OptLT.new(calldata)) - when [:<=, 1] - push(OptLE.new(calldata)) - when [:>, 1] - push(OptGT.new(calldata)) - when [:>=, 1] - push(OptGE.new(calldata)) - when [:<<, 1] - push(OptLTLT.new(calldata)) - when [:[], 1] - push(OptAref.new(calldata)) - when [:&, 1] - push(OptAnd.new(calldata)) - when [:|, 1] - push(OptOr.new(calldata)) - when [:[]=, 2] - push(OptAset.new(calldata)) - else - push(OptSendWithoutBlock.new(calldata)) - end - else - push(Send.new(calldata, block_iseq)) - end + push(Send.new(calldata, block_iseq)) end def setblockparam(index, level) @@ -931,9 +948,11 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) when :opt_setinlinecache iseq.opt_setinlinecache(opnds[0]) when :opt_str_freeze - iseq.opt_str_freeze(opnds[0]) + iseq.putstring(opnds[0]) + iseq.send(YARV.calldata(:freeze)) when :opt_str_uminus - iseq.opt_str_uminus(opnds[0]) + iseq.putstring(opnds[0]) + iseq.send(YARV.calldata(:-@)) when :pop iseq.pop when :putnil From 80de9c9d4e1ddfc73fab479df69d77ce7367de69 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 26 Nov 2022 14:03:46 -0500 Subject: [PATCH 06/12] Specialize using the linked list --- lib/syntax_tree/yarv/compiler.rb | 29 +---- lib/syntax_tree/yarv/instruction_sequence.rb | 115 +++++++++++-------- 2 files changed, 67 insertions(+), 77 deletions(-) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 362ce32f..9016c136 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -608,33 +608,6 @@ def visit_call(node) ) end - arg_parts = argument_parts(node.arguments) - argc = arg_parts.length - - # First we're going to check if we're calling a method on an array - # literal without any arguments. In that case there are some - # specializations we might be able to perform. - if argc == 0 && (node.message.is_a?(Ident) || node.message.is_a?(Op)) - case node.receiver - when ArrayLiteral - parts = node.receiver.contents&.parts || [] - - if parts.none? { |part| part.is_a?(ArgStar) } && - RubyVisitor.compile(node.receiver).nil? - case node.message.value - when "max" - visit(node.receiver.contents) - iseq.opt_newarray_max(parts.length) - return - when "min" - visit(node.receiver.contents) - iseq.opt_newarray_min(parts.length) - return - end - end - end - end - # Track whether or not this is a method call on a block proxy receiver. # If it is, we can potentially do tailcall optimizations on it. block_receiver = false @@ -663,6 +636,8 @@ def visit_call(node) iseq.branchnil(after_call_label) end + arg_parts = argument_parts(node.arguments) + argc = arg_parts.length flag = 0 arg_parts.each do |arg_part| diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index dc2f7da8..ff324d92 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -190,14 +190,16 @@ def inline_storage_for(name) end def length - insns.each.inject(0) do |sum, insn| - case insn - when Integer, Label, Symbol - sum - else - sum + insn.length + insns + .each + .inject(0) do |sum, insn| + case insn + when Integer, Label, Symbol + sum + else + sum + insn.length + end end - end end def eval @@ -218,29 +220,32 @@ def to_a specialize_instructions! if options.specialized_instruction? # Next, set it up so that all of the labels get their correct name. - insns.each.inject(0) do |length, insn| - case insn - when Integer, Symbol - length - when Label - insn.patch!(:"label_#{length}") - length - else - length + insn.length + insns + .each + .inject(0) do |length, insn| + case insn + when Integer, Symbol + length + when Label + insn.patch!(:"label_#{length}") + length + else + length + insn.length + end end - end # Next, dump all of the instructions into a flat list. - dumped = insns.each.map do |insn| - case insn - when Integer, Symbol - insn - when Label - insn.name - else - insn.to_a(self) + dumped = + insns.each.map do |insn| + case insn + when Integer, Symbol + insn + when Label + insn.name + else + insn.to_a(self) + end end - end dumped_options = argument_options.dup dumped_options[:opt].map!(&:name) if dumped_options[:opt] @@ -271,9 +276,31 @@ def to_a def specialize_instructions! insns.each_node do |node| case node.instruction + when NewArray + next unless node.next_node + + next_node = node.next_node + next unless next_node.instruction.is_a?(Send) + next if next_node.instruction.block_iseq + + calldata = next_node.instruction.calldata + next unless calldata.flags == CallData::CALL_ARGS_SIMPLE + next unless calldata.argc == 0 + + case calldata.method + when :max + node.instruction = OptNewArrayMax.new(node.instruction.number) + node.next_node = next_node.next_node + when :min + node.instruction = OptNewArrayMin.new(node.instruction.number) + node.next_node = next_node.next_node + end when PutObject, PutString next unless node.next_node - next if node.instruction.is_a?(PutObject) && !node.instruction.object.is_a?(String) + if node.instruction.is_a?(PutObject) && + !node.instruction.object.is_a?(String) + next + end next_node = node.next_node next unless next_node.instruction.is_a?(Send) @@ -281,19 +308,23 @@ def specialize_instructions! calldata = next_node.instruction.calldata next unless calldata.flags == CallData::CALL_ARGS_SIMPLE + next unless calldata.argc == 0 case calldata.method when :freeze - node.instruction = OptStrFreeze.new(node.instruction.object, calldata) + node.instruction = + OptStrFreeze.new(node.instruction.object, calldata) node.next_node = next_node.next_node when :-@ - node.instruction = OptStrUMinus.new(node.instruction.object, calldata) + node.instruction = + OptStrUMinus.new(node.instruction.object, calldata) node.next_node = next_node.next_node end when Send calldata = node.instruction.calldata - if !node.instruction.block_iseq && !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) + if !node.instruction.block_iseq && + !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) # Specialize the send instruction. If it doesn't have a block # attached, then we will replace it with an opt_send_without_block # and do further specializations based on the called method and @@ -639,24 +670,6 @@ def opt_getinlinecache(label, cache) push(Legacy::OptGetInlineCache.new(label, cache)) end - def opt_newarray_max(length) - if options.specialized_instruction? - push(OptNewArrayMax.new(length)) - else - newarray(length) - send(YARV.calldata(:max)) - end - end - - def opt_newarray_min(length) - if options.specialized_instruction? - push(OptNewArrayMin.new(length)) - else - newarray(length) - send(YARV.calldata(:min)) - end - end - def opt_setinlinecache(cache) push(Legacy::OptSetInlineCache.new(cache)) end @@ -938,9 +951,11 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) when :opt_getinlinecache iseq.opt_getinlinecache(labels[opnds[0]], opnds[1]) when :opt_newarray_max - iseq.opt_newarray_max(opnds[0]) + iseq.newarray(opnds[0]) + iseq.send(YARV.calldata(:max)) when :opt_newarray_min - iseq.opt_newarray_min(opnds[0]) + iseq.newarray(opnds[0]) + iseq.send(YARV.calldata(:min)) when :opt_neq iseq.push( OptNEq.new(CallData.from(opnds[0]), CallData.from(opnds[1])) From f3ed30d2157dd6351d0cf2fce1d91148f1432318 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 26 Nov 2022 14:21:01 -0500 Subject: [PATCH 07/12] Have the instruction list point to values not necessarily instructions --- lib/syntax_tree/yarv/instruction_sequence.rb | 48 +++++++++----------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index ff324d92..a994c6d2 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -12,10 +12,10 @@ class InstructionSequence # and other transformations like instruction specialization. class InstructionList class Node - attr_accessor :instruction, :next_node + attr_accessor :value, :next_node - def initialize(instruction, next_node = nil) - @instruction = instruction + def initialize(value, next_node = nil) + @value = value @next_node = next_node end end @@ -29,7 +29,7 @@ def initialize def each return to_enum(__method__) unless block_given? - each_node { |node| yield node.instruction } + each_node { |node| yield node.value } end def each_node @@ -37,7 +37,7 @@ def each_node node = head_node while node - yield node + yield node, node.value node = node.next_node end end @@ -274,62 +274,56 @@ def to_a end def specialize_instructions! - insns.each_node do |node| - case node.instruction + insns.each_node do |node, value| + case value when NewArray next unless node.next_node next_node = node.next_node - next unless next_node.instruction.is_a?(Send) - next if next_node.instruction.block_iseq + next unless next_node.value.is_a?(Send) + next if next_node.value.block_iseq - calldata = next_node.instruction.calldata + calldata = next_node.value.calldata next unless calldata.flags == CallData::CALL_ARGS_SIMPLE next unless calldata.argc == 0 case calldata.method when :max - node.instruction = OptNewArrayMax.new(node.instruction.number) + node.value = OptNewArrayMax.new(value.number) node.next_node = next_node.next_node when :min - node.instruction = OptNewArrayMin.new(node.instruction.number) + node.value = OptNewArrayMin.new(value.number) node.next_node = next_node.next_node end when PutObject, PutString next unless node.next_node - if node.instruction.is_a?(PutObject) && - !node.instruction.object.is_a?(String) - next - end + next if value.is_a?(PutObject) && !value.object.is_a?(String) next_node = node.next_node - next unless next_node.instruction.is_a?(Send) - next if next_node.instruction.block_iseq + next unless next_node.value.is_a?(Send) + next if next_node.value.block_iseq - calldata = next_node.instruction.calldata + calldata = next_node.value.calldata next unless calldata.flags == CallData::CALL_ARGS_SIMPLE next unless calldata.argc == 0 case calldata.method when :freeze - node.instruction = - OptStrFreeze.new(node.instruction.object, calldata) + node.value = OptStrFreeze.new(value.object, calldata) node.next_node = next_node.next_node when :-@ - node.instruction = - OptStrUMinus.new(node.instruction.object, calldata) + node.value = OptStrUMinus.new(value.object, calldata) node.next_node = next_node.next_node end when Send - calldata = node.instruction.calldata + calldata = value.calldata - if !node.instruction.block_iseq && - !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) + if !value.block_iseq && !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) # Specialize the send instruction. If it doesn't have a block # attached, then we will replace it with an opt_send_without_block # and do further specializations based on the called method and # the number of arguments. - node.instruction = + node.value = case [calldata.method, calldata.argc] when [:length, 0] OptLength.new(calldata) From b422b428f8089e723732b5a586d5d97bdc18ead6 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 26 Nov 2022 14:29:54 -0500 Subject: [PATCH 08/12] Give a reference on the labels to their container nodes --- lib/syntax_tree/yarv/instruction_sequence.rb | 30 +++++++++++--------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index a994c6d2..5469f6f7 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -95,12 +95,18 @@ def change_by(value) class Label attr_reader :name + # When we're serializing the instruction sequence, we need to be able to + # look up the label from the branch instructions and then access the + # subsequent node. So we'll store the reference here. + attr_reader :node + def initialize(name = nil) @name = name end - def patch!(name) + def patch!(name, node) @name = name + @node = node end end @@ -220,19 +226,17 @@ def to_a specialize_instructions! if options.specialized_instruction? # Next, set it up so that all of the labels get their correct name. - insns - .each - .inject(0) do |length, insn| - case insn - when Integer, Symbol - length - when Label - insn.patch!(:"label_#{length}") - length - else - length + insn.length - end + length = 0 + insns.each_node do |node, value| + case value + when Integer, Symbol + # skip + when Label + value.patch!(:"label_#{length}", node) + else + length += value.length end + end # Next, dump all of the instructions into a flat list. dumped = From 14df44ed9b4c01845e0402a9514c0d40e05bddd7 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 26 Nov 2022 16:03:06 -0500 Subject: [PATCH 09/12] Begin peephole optimizations --- lib/syntax_tree/yarv/instruction_sequence.rb | 50 +++++++++++++++----- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 5469f6f7..e8e30b3b 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -52,6 +52,8 @@ def push(instruction) @tail_node.next_node = node @tail_node = node end + + node end end @@ -98,15 +100,14 @@ class Label # When we're serializing the instruction sequence, we need to be able to # look up the label from the branch instructions and then access the # subsequent node. So we'll store the reference here. - attr_reader :node + attr_accessor :node def initialize(name = nil) @name = name end - def patch!(name, node) + def patch!(name) @name = name - @node = node end end @@ -222,8 +223,9 @@ def eval def to_a versions = RUBY_VERSION.split(".").map(&:to_i) - # First, specialize any instructions that need to be specialized. + # First, handle any compilation options that we need to. specialize_instructions! if options.specialized_instruction? + peephole_optimize! if options.peephole_optimization? # Next, set it up so that all of the labels get their correct name. length = 0 @@ -232,7 +234,7 @@ def to_a when Integer, Symbol # skip when Label - value.patch!(:"label_#{length}", node) + value.patch!(:"label_#{length}") else length += value.length end @@ -383,6 +385,27 @@ def specialize_instructions! end end + def peephole_optimize! + insns.each_node do |node, value| + case value + when Jump + # jump LABEL + # ... + # LABEL: + # leave + # => + # leave + # ... + # LABEL: + # leave + # case value.label.node.next_node&.value + # when Leave + # node.value = Leave.new + # end + end + end + end + ########################################################################## # Child instruction sequence methods ########################################################################## @@ -421,15 +444,18 @@ def label Label.new end - def push(insn) - insns.push(insn) + def push(value) + node = insns.push(value) - case insn - when Array, Integer, Label, Symbol - insn + case value + when Array, Integer, Symbol + value + when Label + value.node = node + value else - stack.change_by(-insn.pops + insn.pushes) - insn + stack.change_by(-value.pops + value.pushes) + value end end From b998a6ea9a5a9564dafc0cd422a77f03e3937c26 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 28 Nov 2022 11:15:38 -0500 Subject: [PATCH 10/12] Add a bit of execution --- .rubocop.yml | 18 + lib/syntax_tree/yarv.rb | 277 +++++ lib/syntax_tree/yarv/compiler.rb | 12 +- lib/syntax_tree/yarv/instruction_sequence.rb | 178 ++- lib/syntax_tree/yarv/instructions.rb | 1062 +++++++++++++++++- lib/syntax_tree/yarv/legacy.rb | 8 + 6 files changed, 1487 insertions(+), 68 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index b7ba43e8..c81fdb59 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -31,12 +31,18 @@ Lint/InterpolationCheck: Lint/MissingSuper: Enabled: false +Lint/NonLocalExitFromIterator: + Enabled: false + Lint/RedundantRequireStatement: Enabled: false Lint/SuppressedException: Enabled: false +Lint/UnderscorePrefixedVariableName: + Enabled: false + Lint/UnusedMethodArgument: AllowUnusedKeywordArguments: true @@ -55,6 +61,9 @@ Naming/RescuedExceptionsVariableName: Naming/VariableNumber: Enabled: false +Security/Eval: + Enabled: false + Style/AccessorGrouping: Enabled: false @@ -64,9 +73,18 @@ Style/CaseEquality: Style/CaseLikeIf: Enabled: false +Style/ClassVars: + Enabled: false + +Style/DocumentDynamicEvalDefinition: + Enabled: false + Style/Documentation: Enabled: false +Style/EndBlock: + Enabled: false + Style/ExplicitBlockArgument: Enabled: false diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 1e759ad1..74f2598e 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -1,11 +1,288 @@ # frozen_string_literal: true +require "forwardable" + module SyntaxTree # This module provides an object representation of the YARV bytecode. module YARV + class VM + class Jump + attr_reader :name + + def initialize(name) + @name = name + end + end + + class Leave + attr_reader :value + + def initialize(value) + @value = value + end + end + + class Frame + attr_reader :iseq, :parent, :stack_index, :_self, :nesting, :svars + + def initialize(iseq, parent, stack_index, _self, nesting) + @iseq = iseq + @parent = parent + @stack_index = stack_index + @_self = _self + @nesting = nesting + @svars = {} + end + end + + class TopFrame < Frame + def initialize(iseq) + super(iseq, nil, 0, TOPLEVEL_BINDING.eval("self"), [Object]) + end + end + + class BlockFrame < Frame + def initialize(iseq, parent, stack_index) + super(iseq, parent, stack_index, parent._self, parent.nesting) + end + end + + class MethodFrame < Frame + attr_reader :name, :block + + def initialize(iseq, parent, stack_index, _self, name, block) + super(iseq, parent, stack_index, _self, parent.nesting) + @name = name + @block = block + end + end + + class ClassFrame < Frame + def initialize(iseq, parent, stack_index, _self) + super(iseq, parent, stack_index, _self, parent.nesting + [_self]) + end + end + + class FrozenCore + define_method("core#hash_merge_kwd") { |left, right| left.merge(right) } + + define_method("core#hash_merge_ptr") do |hash, *values| + hash.merge(values.each_slice(2).to_h) + end + + define_method("core#set_method_alias") do |clazz, new_name, old_name| + clazz.alias_method(new_name, old_name) + end + + define_method("core#set_variable_alias") do |new_name, old_name| + # Using eval here since there isn't a reflection API to be able to + # alias global variables. + eval("alias #{new_name} #{old_name}", binding, __FILE__, __LINE__) + end + + define_method("core#set_postexe") { |&block| END { block.call } } + + define_method("core#undef_method") do |clazz, name| + clazz.undef_method(name) + end + end + + FROZEN_CORE = FrozenCore.new.freeze + + extend Forwardable + + attr_reader :stack + def_delegators :stack, :push, :pop + + attr_reader :frame + def_delegators :frame, :_self + + def initialize + @stack = [] + @frame = nil + end + + ########################################################################## + # Helper methods for frames + ########################################################################## + + def run_frame(frame) + # First, set the current frame to the given value. + @frame = frame + + # Next, set up the local table for the frame. This is actually incorrect + # as it could use the values already on the stack, but for now we're + # just doing this for simplicity. + frame.iseq.local_table.size.times { push(nil) } + + # Yield so that some frame-specific setup can be done. + yield if block_given? + + # This hash is going to hold a mapping of label names to their + # respective indices in our instruction list. + labels = {} + + # This array is going to hold our instructions. + insns = [] + + # Here we're going to preprocess the instruction list from the + # instruction sequence to set up the labels hash and the insns array. + frame.iseq.insns.each do |insn| + case insn + when Integer, Symbol + # skip + when InstructionSequence::Label + labels[insn.name] = insns.length + else + insns << insn + end + end + + # Finally we can execute the instructions one at a time. If they return + # jumps or leaves we will handle those appropriately. + pc = 0 + while pc < insns.length + insn = insns[pc] + pc += 1 + + case (result = insn.call(self)) + when Jump + pc = labels[result.name] + when Leave + return result.value + end + end + ensure + @stack = stack[0...frame.stack_index] + @frame = frame.parent + end + + def run_top_frame(iseq) + run_frame(TopFrame.new(iseq)) + end + + def run_block_frame(iseq, *args, &block) + run_frame(BlockFrame.new(iseq, frame, stack.length)) do + locals = [*args, block] + iseq.local_table.size.times do |index| + local_set(index, 0, locals.shift) + end + end + end + + def run_class_frame(iseq, clazz) + run_frame(ClassFrame.new(iseq, frame, stack.length, clazz)) + end + + def run_method_frame(name, iseq, _self, *args, **kwargs, &block) + run_frame( + MethodFrame.new(iseq, frame, stack.length, _self, name, block) + ) do + locals = [*args, block] + + if iseq.argument_options[:keyword] + # First, set up the keyword bits array. + keyword_bits = + iseq.argument_options[:keyword].map do |config| + kwargs.key?(config.is_a?(Array) ? config[0] : config) + end + + iseq.local_table.locals.each_with_index do |local, index| + # If this is the keyword bits local, then set it appropriately. + if local.name == 2 + locals.insert(index, keyword_bits) + next + end + + # First, find the configuration for this local in the keywords + # list if it exists. + name = local.name + config = + iseq.argument_options[:keyword].find do |keyword| + keyword.is_a?(Array) ? keyword[0] == name : keyword == name + end + + # If the configuration doesn't exist, then the local is not a + # keyword local. + next unless config + + if !config.is_a?(Array) + # required keyword + locals.insert(index, kwargs.fetch(name)) + elsif !config[1].nil? + # optional keyword with embedded default value + locals.insert(index, kwargs.fetch(name, config[1])) + else + # optional keyword with expression default value + locals.insert(index, nil) + end + end + end + + iseq.local_table.size.times do |index| + local_set(index, 0, locals.shift) + end + end + end + + ########################################################################## + # Helper methods for instructions + ########################################################################## + + def const_base + frame.nesting.last + end + + def frame_at(level) + current = frame + level.times { current = current.parent } + current + end + + def frame_svar + current = frame + current = current.parent while current.is_a?(BlockFrame) + current + end + + def frame_yield + current = frame + current = current.parent until current.is_a?(MethodFrame) + current + end + + def frozen_core + FROZEN_CORE + end + + def jump(label) + Jump.new(label.name) + end + + def leave + Leave.new(pop) + end + + def local_get(index, level) + stack[frame_at(level).stack_index + index] + end + + def local_set(index, level, value) + stack[frame_at(level).stack_index + index] = value + end + end + # Compile the given source into a YARV instruction sequence. def self.compile(source, options = Compiler::Options.new) SyntaxTree.parse(source).accept(Compiler.new(options)) end + + # Compile and interpret the given source. + def self.interpret(source, options = Compiler::Options.new) + iseq = RubyVM::InstructionSequence.compile(source, **options) + iseq = InstructionSequence.from(iseq.to_a) + iseq.specialize_instructions! + VM.new.run_top_frame(iseq) + end end end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 9016c136..194b758b 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -1987,8 +1987,8 @@ def visit_pattern(node, end_label) match_failure_label = iseq.label match_error_label = iseq.label - # If there's a constant, then check if we match against that constant or - # not first. Branch to failure if we don't. + # If there's a constant, then check if we match against that constant + # or not first. Branch to failure if we don't. if node.constant iseq.dup visit(node.constant) @@ -2007,8 +2007,8 @@ def visit_pattern(node, end_label) iseq.topn(2) iseq.branchunless(match_failure_label) - # Since we have a valid cached value, we can skip past the part where we - # call #deconstruct on the object. + # Since we have a valid cached value, we can skip past the part where + # we call #deconstruct on the object. iseq.pop iseq.topn(1) iseq.jump(length_label) @@ -2064,8 +2064,8 @@ def visit_pattern(node, end_label) end end - # Set up the routine here to raise an error to indicate that the type of - # the deconstructed array was incorrect. + # Set up the routine here to raise an error to indicate that the type + # of the deconstructed array was incorrect. iseq.push(match_error_label) iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) iseq.putobject(TypeError) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index e8e30b3b..f20981df 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -20,6 +20,7 @@ def initialize(value, next_node = nil) end end + include Enumerable attr_reader :head_node, :tail_node def initialize @@ -109,6 +110,10 @@ def initialize(name = nil) def patch!(name) @name = name end + + def inspect + name.inspect + end end # The type of the instruction sequence. @@ -128,6 +133,9 @@ def patch!(name) attr_accessor :argument_size attr_reader :argument_options + # The catch table for this instruction sequence. + attr_reader :catch_table + # The list of instructions for this instruction sequence. attr_reader :insns @@ -162,6 +170,7 @@ def initialize( @argument_size = 0 @argument_options = {} + @catch_table = [] @local_table = LocalTable.new @inline_storages = {} @@ -229,20 +238,20 @@ def to_a # Next, set it up so that all of the labels get their correct name. length = 0 - insns.each_node do |node, value| - case value + insns.each do |insn| + case insn when Integer, Symbol # skip when Label - value.patch!(:"label_#{length}") + insn.patch!(:"label_#{length}") else - length += value.length + length += insn.length end end # Next, dump all of the instructions into a flat list. dumped = - insns.each.map do |insn| + insns.map do |insn| case insn when Integer, Symbol insn @@ -274,7 +283,7 @@ def to_a type, local_table.names, dumped_options, - [], + catch_table.map(&:to_a), dumped ] end @@ -324,7 +333,8 @@ def specialize_instructions! when Send calldata = value.calldata - if !value.block_iseq && !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) + if !value.block_iseq && + !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) # Specialize the send instruction. If it doesn't have a block # attached, then we will replace it with an opt_send_without_block # and do further specializations based on the called method and @@ -386,24 +396,24 @@ def specialize_instructions! end def peephole_optimize! - insns.each_node do |node, value| - case value - when Jump - # jump LABEL - # ... - # LABEL: - # leave - # => - # leave - # ... - # LABEL: - # leave - # case value.label.node.next_node&.value - # when Leave - # node.value = Leave.new - # end - end - end + # insns.each_node do |node, value| + # case value + # when Jump + # # jump LABEL + # # ... + # # LABEL: + # # leave + # # => + # # leave + # # ... + # # LABEL: + # # leave + # # case value.label.node.next_node&.value + # # when Leave + # # node.value = Leave.new + # # end + # end + # end end ########################################################################## @@ -436,6 +446,77 @@ def singleton_class_child_iseq(location) child_iseq(:class, "singleton class", location) end + ########################################################################## + # Catch table methods + ########################################################################## + + class CatchEntry + attr_reader :iseq, :begin_label, :end_label, :exit_label + + def initialize(iseq, begin_label, end_label, exit_label) + @iseq = iseq + @begin_label = begin_label + @end_label = end_label + @exit_label = exit_label + end + end + + class CatchBreak < CatchEntry + def to_a + [:break, iseq.to_a, begin_label.name, end_label.name, exit_label.name] + end + end + + class CatchNext < CatchEntry + def to_a + [:next, nil, begin_label.name, end_label.name, exit_label.name] + end + end + + class CatchRedo < CatchEntry + def to_a + [:redo, nil, begin_label.name, end_label.name, exit_label.name] + end + end + + class CatchRescue < CatchEntry + def to_a + [ + :rescue, + iseq.to_a, + begin_label.name, + end_label.name, + exit_label.name + ] + end + end + + class CatchRetry < CatchEntry + def to_a + [:retry, nil, begin_label.name, end_label.name, exit_label.name] + end + end + + def catch_break(iseq, begin_label, end_label, exit_label) + catch_table << CatchBreak.new(iseq, begin_label, end_label, exit_label) + end + + def catch_next(begin_label, end_label, exit_label) + catch_table << CatchNext.new(nil, begin_label, end_label, exit_label) + end + + def catch_redo(begin_label, end_label, exit_label) + catch_table << CatchRedo.new(nil, begin_label, end_label, exit_label) + end + + def catch_rescue(iseq, begin_label, end_label, exit_label) + catch_table << CatchRescue.new(iseq, begin_label, end_label, exit_label) + end + + def catch_retry(begin_label, end_label, exit_label) + catch_table << CatchRetry.new(nil, begin_label, end_label, exit_label) + end + ########################################################################## # Instruction push methods ########################################################################## @@ -837,6 +918,46 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) iseq.argument_options[:opt].map! { |opt| labels[opt] } end + # set up the catch table + source[12].each do |entry| + case entry[0] + when :break + iseq.catch_break( + from(entry[1]), + labels[entry[2]], + labels[entry[3]], + labels[entry[4]] + ) + when :next + iseq.catch_next( + labels[entry[2]], + labels[entry[3]], + labels[entry[4]] + ) + when :rescue + iseq.catch_rescue( + from(entry[1]), + labels[entry[2]], + labels[entry[3]], + labels[entry[4]] + ) + when :redo + iseq.catch_redo( + labels[entry[2]], + labels[entry[3]], + labels[entry[4]] + ) + when :retry + iseq.catch_retry( + labels[entry[2]], + labels[entry[3]], + labels[entry[4]] + ) + else + raise "unknown catch type: #{entry[0]}" + end + end + # set up all of the instructions source[13].each do |insn| # skip line numbers @@ -969,7 +1090,12 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) when :opt_aset_with iseq.opt_aset_with(opnds[0], CallData.from(opnds[1])) when :opt_case_dispatch - iseq.opt_case_dispatch(opnds[0], labels[opnds[1]]) + hash = + opnds[0] + .each_slice(2) + .to_h + .transform_values { |value| labels[value] } + iseq.opt_case_dispatch(hash, labels[opnds[1]]) when :opt_getconstant_path iseq.opt_getconstant_path(opnds[0]) when :opt_getinlinecache diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index 8ec1f068..0b60bd13 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -98,6 +98,14 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + vm.pop(number) + end end # ### Summary @@ -134,6 +142,20 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + original, value = vm.pop(2) + + if value.is_a?(String) + vm.push(value) + else + vm.push("#<#{original.class.name}:0000>") + end + end end # ### Summary @@ -174,6 +196,14 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + vm.jump(label) if vm.pop + end end # ### Summary @@ -215,6 +245,14 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + vm.jump(label) if vm.pop.nil? + end end # ### Summary @@ -255,6 +293,14 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + vm.jump(label) unless vm.pop + end end # ### Summary @@ -303,6 +349,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.local_get(keyword_bits_index, 0)[keyword_index]) + end end # ### Summary @@ -343,6 +397,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + raise NotImplementedError, "checkmatch" + end end # ### Summary @@ -406,6 +468,61 @@ def pushes # can investigate further. 2 end + + def canonical + self + end + + def call(vm) + object = vm.pop + result = + case type + when TYPE_OBJECT + raise NotImplementedError, "checktype TYPE_OBJECT" + when TYPE_CLASS + object.is_a?(Class) + when TYPE_MODULE + object.is_a?(Module) + when TYPE_FLOAT + object.is_a?(Float) + when TYPE_STRING + object.is_a?(String) + when TYPE_REGEXP + object.is_a?(Regexp) + when TYPE_ARRAY + object.is_a?(Array) + when TYPE_HASH + object.is_a?(Hash) + when TYPE_STRUCT + object.is_a?(Struct) + when TYPE_BIGNUM + raise NotImplementedError, "checktype TYPE_BIGNUM" + when TYPE_FILE + object.is_a?(File) + when TYPE_DATA + raise NotImplementedError, "checktype TYPE_DATA" + when TYPE_MATCH + raise NotImplementedError, "checktype TYPE_MATCH" + when TYPE_COMPLEX + object.is_a?(Complex) + when TYPE_RATIONAL + object.is_a?(Rational) + when TYPE_NIL + object.nil? + when TYPE_TRUE + object == true + when TYPE_FALSE + object == false + when TYPE_SYMBOL + object.is_a?(Symbol) + when TYPE_FIXNUM + object.is_a?(Integer) + when TYPE_UNDEF + raise NotImplementedError, "checktype TYPE_UNDEF" + end + + vm.push(result) + end end # ### Summary @@ -438,6 +555,15 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + left, right = vm.pop(2) + vm.push([*left, *right]) + end end # ### Summary @@ -477,6 +603,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop(number).join) + end end # ### Summary @@ -524,6 +658,20 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + object, superclass = vm.pop(2) + iseq = class_iseq + + clazz = Class.new(superclass || Object) + vm.push(vm.run_class_frame(iseq, clazz)) + + object.const_set(name, clazz) + end end # ### Summary @@ -579,6 +727,46 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + object = vm.pop + + result = + case type + when TYPE_NIL, TYPE_SELF, TYPE_TRUE, TYPE_FALSE, TYPE_ASGN, TYPE_EXPR + message + when TYPE_IVAR + message if vm._self.instance_variable_defined?(name) + when TYPE_LVAR + raise NotImplementedError, "defined TYPE_LVAR" + when TYPE_GVAR + message if global_variables.include?(name) + when TYPE_CVAR + clazz = vm._self + clazz = clazz.singleton_class unless clazz.is_a?(Module) + message if clazz.class_variable_defined?(name) + when TYPE_CONST + raise NotImplementedError, "defined TYPE_CONST" + when TYPE_METHOD + raise NotImplementedError, "defined TYPE_METHOD" + when TYPE_YIELD + raise NotImplementedError, "defined TYPE_YIELD" + when TYPE_ZSUPER + raise NotImplementedError, "defined TYPE_ZSUPER" + when TYPE_REF + raise NotImplementedError, "defined TYPE_REF" + when TYPE_FUNC + message if object.respond_to?(name, true) + when TYPE_CONST_FROM + raise NotImplementedError, "defined TYPE_CONST_FROM" + end + + vm.push(result) + end end # ### Summary @@ -595,15 +783,15 @@ def pushes # ~~~ # class DefineMethod - attr_reader :name, :method_iseq + attr_reader :method_name, :method_iseq - def initialize(name, method_iseq) - @name = name + def initialize(method_name, method_iseq) + @method_name = method_name @method_iseq = method_iseq end def to_a(_iseq) - [:definemethod, name, method_iseq.to_a] + [:definemethod, method_name, method_iseq.to_a] end def length @@ -617,6 +805,21 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + name = method_name + iseq = method_iseq + + vm + ._self + .__send__(:define_method, name) do |*args, **kwargs, &block| + vm.run_method_frame(name, iseq, self, *args, **kwargs, &block) + end + end end # ### Summary @@ -634,15 +837,15 @@ def pushes # ~~~ # class DefineSMethod - attr_reader :name, :method_iseq + attr_reader :method_name, :method_iseq - def initialize(name, method_iseq) - @name = name + def initialize(method_name, method_iseq) + @method_name = method_name @method_iseq = method_iseq end def to_a(_iseq) - [:definesmethod, name, method_iseq.to_a] + [:definesmethod, method_name, method_iseq.to_a] end def length @@ -656,6 +859,21 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + name = method_name + iseq = method_iseq + + vm + ._self + .__send__(:define_singleton_method, name) do |*args, **kwargs, &block| + vm.run_method_frame(name, iseq, self, *args, **kwargs, &block) + end + end end # ### Summary @@ -684,6 +902,14 @@ def pops def pushes 2 end + + def canonical + self + end + + def call(vm) + vm.push(vm.stack.last.dup) + end end # ### Summary @@ -718,6 +944,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(object.dup) + end end # ### Summary @@ -752,6 +986,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(object.dup) + end end # ### Summary @@ -786,6 +1028,16 @@ def pops def pushes number end + + def canonical + self + end + + def call(vm) + values = vm.pop(number) + vm.push(*values) + vm.push(*values) + end end # ### Summary @@ -823,6 +1075,14 @@ def pops def pushes number end + + def canonical + self + end + + def call(vm) + raise NotImplementedError, "expandarray" + end end # ### Summary @@ -867,6 +1127,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.local_get(index, level)) + end end # ### Summary @@ -909,6 +1177,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.local_get(index, level)) + end end # ### Summary @@ -946,6 +1222,16 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + clazz = vm._self + clazz = clazz.class unless clazz.is_a?(Class) + vm.push(clazz.class_variable_get(name)) + end end # ### Summary @@ -982,6 +1268,24 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + # const_base, allow_nil = + vm.pop(2) + + vm.frame.nesting.reverse_each do |clazz| + if clazz.const_defined?(name) + vm.push(clazz.const_get(name)) + return + end + end + + raise NameError, "uninitialized constant #{name}" + end end # ### Summary @@ -1016,6 +1320,16 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + # Evaluating the name of the global variable because there isn't a + # reflection API for global variables. + vm.push(eval(name.to_s, binding, __FILE__, __LINE__)) + end end # ### Summary @@ -1058,34 +1372,47 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + method = Object.instance_method(:instance_variable_get) + vm.push(method.bind(vm._self).call(name)) + end end # ### Summary # - # `getlocal_WC_0` is a specialized version of the `getlocal` instruction. It - # fetches the value of a local variable from the current frame determined by - # the index given as its only argument. + # `getlocal` fetches the value of a local variable from a frame determined + # by the level and index arguments. The level is the number of frames back + # to look and the index is the index in the local table. It pushes the value + # it finds onto the stack. # # ### Usage # # ~~~ruby # value = 5 - # value + # tap { tap { value } } # ~~~ # - class GetLocalWC0 - attr_reader :index + class GetLocal + attr_reader :index, :level - def initialize(index) + def initialize(index, level) @index = index + @level = level end def to_a(iseq) - [:getlocal_WC_0, iseq.local_table.offset(index)] + current = iseq + level.times { current = current.parent_iseq } + [:getlocal, current.local_table.offset(index), level] end def length - 2 + 3 end def pops @@ -1095,22 +1422,30 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.local_get(index, level)) + end end # ### Summary # - # `getlocal_WC_1` is a specialized version of the `getlocal` instruction. It - # fetches the value of a local variable from the parent frame determined by + # `getlocal_WC_0` is a specialized version of the `getlocal` instruction. It + # fetches the value of a local variable from the current frame determined by # the index given as its only argument. # # ### Usage # # ~~~ruby # value = 5 - # self.then { value } + # value # ~~~ # - class GetLocalWC1 + class GetLocalWC0 attr_reader :index def initialize(index) @@ -1118,7 +1453,7 @@ def initialize(index) end def to_a(iseq) - [:getlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] + [:getlocal_WC_0, iseq.local_table.offset(index)] end def length @@ -1132,38 +1467,42 @@ def pops def pushes 1 end + + def canonical + GetLocal.new(index, 0) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary # - # `getlocal` fetches the value of a local variable from a frame determined - # by the level and index arguments. The level is the number of frames back - # to look and the index is the index in the local table. It pushes the value - # it finds onto the stack. + # `getlocal_WC_1` is a specialized version of the `getlocal` instruction. It + # fetches the value of a local variable from the parent frame determined by + # the index given as its only argument. # # ### Usage # # ~~~ruby # value = 5 - # tap { tap { value } } + # self.then { value } # ~~~ # - class GetLocal - attr_reader :index, :level + class GetLocalWC1 + attr_reader :index - def initialize(index, level) + def initialize(index) @index = index - @level = level end def to_a(iseq) - current = iseq - level.times { current = current.parent_iseq } - [:getlocal, current.local_table.offset(index), level] + [:getlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] end def length - 3 + 2 end def pops @@ -1173,6 +1512,14 @@ def pops def pushes 1 end + + def canonical + GetLocal.new(index, 1) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -1212,6 +1559,21 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + case key + when SVAR_LASTLINE + raise NotImplementedError, "getspecial SVAR_LASTLINE" + when SVAR_BACKREF + raise NotImplementedError, "getspecial SVAR_BACKREF" + when SVAR_FLIPFLOP_START + vm.frame_svar.svars[SVAR_FLIPFLOP_START] + end + end end # ### Summary @@ -1241,6 +1603,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop.to_sym) + end end # ### Summary @@ -1279,6 +1649,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.frame_yield.block.call(*vm.pop(calldata.argc))) + end end # ### Summary @@ -1319,6 +1697,32 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + block = + if (iseq = block_iseq) + ->(*args, **kwargs, &blk) do + vm.run_block_frame(iseq, *args, **kwargs, &blk) + end + end + + keywords = + if calldata.kw_arg + calldata.kw_arg.zip(vm.pop(calldata.kw_arg.length)).to_h + else + {} + end + + arguments = vm.pop(calldata.argc) + receiver = vm.pop + + method = receiver.method(vm.frame.name).super_method + vm.push(method.call(*arguments, **keywords, &block)) + end end # ### Summary @@ -1358,6 +1762,14 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + vm.jump(label) + end end # ### Summary @@ -1388,6 +1800,14 @@ def pushes # otherwise the stack size is incorrectly calculated. 0 end + + def canonical + self + end + + def call(vm) + vm.leave + end end # ### Summary @@ -1424,6 +1844,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop(number)) + end end # ### Summary @@ -1460,6 +1888,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop(number)) + end end # ### Summary @@ -1498,6 +1934,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop(number).each_slice(2).to_h) + end end # ### Summary @@ -1537,6 +1981,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(Range.new(*vm.pop(2), exclude_end == 1)) + end end # ### Summary @@ -1566,6 +2018,13 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + end end # ### Summary @@ -1604,6 +2063,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop.to_s) + end end # ### Summary @@ -1642,6 +2109,16 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + return if @executed + vm.push(vm.run_block_frame(iseq)) + @executed = true + end end # ### Summary @@ -1679,6 +2156,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -1715,6 +2200,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -1753,6 +2246,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop[object]) + end end # ### Summary @@ -1790,6 +2291,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -1827,6 +2336,15 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + hash, value = vm.pop(2) + vm.push(hash[object] = value) + end end # ### Summary @@ -1861,7 +2379,11 @@ def initialize(case_dispatch_hash, else_label) end def to_a(_iseq) - [:opt_case_dispatch, case_dispatch_hash, else_label] + [ + :opt_case_dispatch, + case_dispatch_hash.flat_map { |key, value| [key, value.name] }, + else_label + ] end def length @@ -1875,6 +2397,14 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + vm.jump(case_dispatch_hash.fetch(vm.pop, else_label)) + end end # ### Summary @@ -1912,6 +2442,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -1948,6 +2486,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -1985,6 +2531,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2022,6 +2576,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2058,6 +2620,21 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + current = vm._self + current = current.class unless current.is_a?(Class) + + names.each do |name| + current = name == :"" ? Object : current.const_get(name) + end + + vm.push(current) + end end # ### Summary @@ -2095,6 +2672,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2132,6 +2717,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2169,6 +2762,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2206,6 +2807,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2243,6 +2852,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2281,6 +2898,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2318,6 +2943,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2355,6 +2988,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2395,6 +3036,15 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + receiver, argument = vm.pop(2) + vm.push(receiver != argument) + end end # ### Summary @@ -2431,6 +3081,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop(number).max) + end end # ### Summary @@ -2467,6 +3125,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop(number).min) + end end # ### Summary @@ -2504,6 +3170,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2539,6 +3213,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2576,6 +3258,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2613,6 +3303,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2649,6 +3347,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2685,6 +3391,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2722,6 +3436,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2759,6 +3481,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(object.freeze) + end end # ### Summary @@ -2796,6 +3526,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(-object) + end end # ### Summary @@ -2833,6 +3571,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2861,6 +3607,14 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + vm.pop + end end # ### Summary @@ -2889,6 +3643,14 @@ def pops def pushes 1 end + + def canonical + PutObject.new(nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2923,6 +3685,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(object) + end end # ### Summary @@ -2953,6 +3723,14 @@ def pops def pushes 1 end + + def canonical + PutObject.new(0) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2983,6 +3761,14 @@ def pops def pushes 1 end + + def canonical + PutObject.new(1) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -3011,6 +3797,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm._self) + end end # ### Summary @@ -3051,6 +3845,23 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + case object + when OBJECT_VMCORE + vm.push(vm.frozen_core) + when OBJECT_CBASE + value = vm._self + value = value.singleton_class unless value.is_a?(Class) + vm.push(value) + when OBJECT_CONST_BASE + vm.push(vm.const_base) + end + end end # ### Summary @@ -3085,6 +3896,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(object.dup) + end end # ### Summary @@ -3124,6 +3943,33 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + block = + if (iseq = block_iseq) + ->(*args, **kwargs, &blk) do + vm.run_block_frame(iseq, *args, **kwargs, &blk) + end + end + + keywords = + if calldata.kw_arg + calldata.kw_arg.zip(vm.pop(calldata.kw_arg.length)).to_h + else + {} + end + + arguments = vm.pop(calldata.argc) + receiver = vm.pop + + vm.push( + receiver.__send__(calldata.method, *arguments, **keywords, &block) + ) + end end # ### Summary @@ -3166,6 +4012,14 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + vm.local_set(index, level, vm.pop) + end end # ### Summary @@ -3204,6 +4058,16 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + clazz = vm._self + clazz = clazz.class unless clazz.is_a?(Class) + clazz.class_variable_set(name, vm.pop) + end end # ### Summary @@ -3239,6 +4103,15 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + value, parent = vm.pop(2) + parent.const_set(name, value) + end end # ### Summary @@ -3274,6 +4147,16 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + # Evaluating the name of the global variable because there isn't a + # reflection API for global variables. + eval("#{name} = vm.pop", binding, __FILE__, __LINE__) + end end # ### Summary @@ -3315,6 +4198,15 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + method = Object.instance_method(:instance_variable_set) + method.bind(vm._self).call(name, vm.pop) + end end # ### Summary @@ -3356,6 +4248,14 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + vm.local_set(index, level, vm.pop) + end end # ### Summary @@ -3393,6 +4293,14 @@ def pops def pushes 0 end + + def canonical + SetLocal.new(index, 0) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -3430,6 +4338,14 @@ def pops def pushes 0 end + + def canonical + SetLocal.new(index, 1) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -3465,6 +4381,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.stack[-number - 1] = vm.stack.last + end end # ### Summary @@ -3501,6 +4425,21 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + case key + when GetSpecial::SVAR_LASTLINE + raise NotImplementedError, "svar SVAR_LASTLINE" + when GetSpecial::SVAR_BACKREF + raise NotImplementedError, "setspecial SVAR_BACKREF" + when GetSpecial::SVAR_FLIPFLOP_START + vm.frame_svar.svars[GetSpecial::SVAR_FLIPFLOP_START] + end + end end # ### Summary @@ -3537,6 +4476,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(*vm.pop) + end end # ### Summary @@ -3569,6 +4516,15 @@ def pops def pushes 2 end + + def canonical + self + end + + def call(vm) + left, right = vm.pop(2) + vm.push(right, left) + end end # ### Summary @@ -3584,6 +4540,16 @@ def pushes # ~~~ # class Throw + TAG_NONE = 0x0 + TAG_RETURN = 0x1 + TAG_BREAK = 0x2 + TAG_NEXT = 0x3 + TAG_RETRY = 0x4 + TAG_REDO = 0x5 + TAG_RAISE = 0x6 + TAG_THROW = 0x7 + TAG_FATAL = 0x8 + attr_reader :type def initialize(type) @@ -3605,6 +4571,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + raise NotImplementedError, "throw" + end end # ### Summary @@ -3643,6 +4617,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.stack[-number - 1]) + end end # ### Summary @@ -3675,6 +4657,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(Regexp.new(vm.pop(length).join, options)) + end end end end diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb index 82f7560d..93c4e4c3 100644 --- a/lib/syntax_tree/yarv/legacy.rb +++ b/lib/syntax_tree/yarv/legacy.rb @@ -82,6 +82,10 @@ def pops def pushes 1 end + + def call(vm) + vm.push(nil) + end end # ### Summary @@ -121,6 +125,10 @@ def pops def pushes 1 end + + def call(vm) + vm.push(vm.pop) + end end # ### Summary From 70064564221d38748366abc264368cbb5f8042b3 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 29 Nov 2022 11:02:04 -0500 Subject: [PATCH 11/12] Add an entire compile! step --- lib/syntax_tree/yarv.rb | 1 - lib/syntax_tree/yarv/bf.rb | 1 + lib/syntax_tree/yarv/compiler.rb | 4 + lib/syntax_tree/yarv/disassembler.rb | 3 +- lib/syntax_tree/yarv/instruction_sequence.rb | 103 +++++++++++-------- lib/syntax_tree/yarv/instructions.rb | 43 ++++++++ 6 files changed, 111 insertions(+), 44 deletions(-) diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 74f2598e..97592d4d 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -281,7 +281,6 @@ def self.compile(source, options = Compiler::Options.new) def self.interpret(source, options = Compiler::Options.new) iseq = RubyVM::InstructionSequence.compile(source, **options) iseq = InstructionSequence.from(iseq.to_a) - iseq.specialize_instructions! VM.new.run_top_frame(iseq) end end diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb index 78c01af5..f642fb2f 100644 --- a/lib/syntax_tree/yarv/bf.rb +++ b/lib/syntax_tree/yarv/bf.rb @@ -74,6 +74,7 @@ def compile end iseq.leave + iseq.compile! iseq end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 194b758b..3ea6d22a 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -1359,6 +1359,7 @@ def visit_program(node) node.location, options ) + with_child_iseq(top_iseq) do visit_all(preexes) @@ -1372,6 +1373,9 @@ def visit_program(node) iseq.leave end + + top_iseq.compile! + top_iseq end def visit_qsymbols(node) diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index 757b8b40..af325c31 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -54,7 +54,6 @@ def disassemble(iseq) clauses = {} clause = [] - iseq.to_a iseq.insns.each do |insn| case insn when InstructionSequence::Label @@ -192,7 +191,7 @@ def disassemble(iseq) Assign(VarField(target), value) end else - raise "Unknown instruction #{insn[0]}" + raise "Unknown instruction #{insn}" end end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index f20981df..e3d0c2fc 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -232,24 +232,7 @@ def eval def to_a versions = RUBY_VERSION.split(".").map(&:to_i) - # First, handle any compilation options that we need to. - specialize_instructions! if options.specialized_instruction? - peephole_optimize! if options.peephole_optimization? - - # Next, set it up so that all of the labels get their correct name. - length = 0 - insns.each do |insn| - case insn - when Integer, Symbol - # skip - when Label - insn.patch!(:"label_#{length}") - else - length += insn.length - end - end - - # Next, dump all of the instructions into a flat list. + # Dump all of the instructions into a flat list. dumped = insns.map do |insn| case insn @@ -288,6 +271,65 @@ def to_a ] end + def disasm + output = StringIO.new + output << "== disasm: #:1 (#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column})> (catch: FALSE)\n" + + length = 0 + events = [] + + insns.each do |insn| + case insn + when Integer + # skip + when Symbol + events << insn + when Label + # skip + else + output << "%04d " % length + output << insn.disasm(self) + output << "\n" + end + + length += insn.length + end + + output.string + end + + # This method converts our linked list of instructions into a final array + # and performs any other compilation steps necessary. + def compile! + specialize_instructions! if options.specialized_instruction? + + length = 0 + insns.each do |insn| + case insn + when Integer, Symbol + # skip + when Label + insn.patch!(:"label_#{length}") + when DefineClass + insn.class_iseq.compile! + length += insn.length + when DefineMethod, DefineSMethod + insn.method_iseq.compile! + length += insn.length + when InvokeSuper, Send + insn.block_iseq.compile! if insn.block_iseq + length += insn.length + when Once + insn.iseq.compile! + length += insn.length + else + length += insn.length + end + end + + @insns = insns.to_a + end + def specialize_instructions! insns.each_node do |node, value| case value @@ -333,8 +375,7 @@ def specialize_instructions! when Send calldata = value.calldata - if !value.block_iseq && - !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) + if !value.block_iseq && !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) # Specialize the send instruction. If it doesn't have a block # attached, then we will replace it with an opt_send_without_block # and do further specializations based on the called method and @@ -395,27 +436,6 @@ def specialize_instructions! end end - def peephole_optimize! - # insns.each_node do |node, value| - # case value - # when Jump - # # jump LABEL - # # ... - # # LABEL: - # # leave - # # => - # # leave - # # ... - # # LABEL: - # # leave - # # case value.label.node.next_node&.value - # # when Leave - # # node.value = Leave.new - # # end - # end - # end - end - ########################################################################## # Child instruction sequence methods ########################################################################## @@ -1164,6 +1184,7 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) end end + iseq.compile! if iseq.type == :top iseq end end diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index 0b60bd13..c146bdbf 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -33,6 +33,25 @@ def initialize( @kw_arg = kw_arg end + def disasm + flag_names = [] + flag_names << :ARGS_SPLAT if flag?(CALL_ARGS_SPLAT) + flag_names << :ARGS_BLOCKARG if flag?(CALL_ARGS_BLOCKARG) + flag_names << :FCALL if flag?(CALL_FCALL) + flag_names << :VCALL if flag?(CALL_VCALL) + flag_names << :ARGS_SIMPLE if flag?(CALL_ARGS_SIMPLE) + flag_names << :BLOCKISEQ if flag?(CALL_BLOCKISEQ) + flag_names << :KWARG if flag?(CALL_KWARG) + flag_names << :KW_SPLAT if flag?(CALL_KW_SPLAT) + flag_names << :TAILCALL if flag?(CALL_TAILCALL) + flag_names << :SUPER if flag?(CALL_SUPER) + flag_names << :ZSUPER if flag?(CALL_ZSUPER) + flag_names << :OPT_SEND if flag?(CALL_OPT_SEND) + flag_names << :KW_SPLAT_MUT if flag?(CALL_KW_SPLAT_MUT) + + "" + end + def flag?(mask) (flags & mask) > 0 end @@ -1783,6 +1802,10 @@ def call(vm) # ~~~ # class Leave + def disasm(_iseq) + "leave" + end + def to_a(_iseq) [:leave] end @@ -2973,6 +2996,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(_iseq) + "%-38s %s" % ["opt_mult", calldata.disasm] + end + def to_a(_iseq) [:opt_mult, calldata.to_h] end @@ -3288,6 +3315,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(iseq) + "%-38s %s" % ["opt_plus", calldata.disasm] + end + def to_a(_iseq) [:opt_plus, calldata.to_h] end @@ -3670,6 +3701,10 @@ def initialize(object) @object = object end + def disasm(_iseq) + "%-38s %s" % ["putobject", object.inspect] + end + def to_a(_iseq) [:putobject, object] end @@ -3708,6 +3743,10 @@ def call(vm) # ~~~ # class PutObjectInt2Fix0 + def disasm(_iseq) + "putobject_INT2FIX_0_" + end + def to_a(_iseq) [:putobject_INT2FIX_0_] end @@ -3746,6 +3785,10 @@ def call(vm) # ~~~ # class PutObjectInt2Fix1 + def disasm(_iseq) + "putobject_INT2FIX_1_" + end + def to_a(_iseq) [:putobject_INT2FIX_1_] end From 46ab8292ef0f88f5969e4dece3c45a2c8c968d74 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 29 Nov 2022 12:58:33 -0500 Subject: [PATCH 12/12] Allow calling disasm on instructions --- .rubocop.yml | 9 + lib/syntax_tree.rb | 1 + lib/syntax_tree/yarv/disasm_formatter.rb | 211 +++++++ lib/syntax_tree/yarv/instruction_sequence.rb | 37 +- lib/syntax_tree/yarv/instructions.rb | 558 +++++++++++++++++-- lib/syntax_tree/yarv/legacy.rb | 19 + lib/syntax_tree/yarv/local_table.rb | 8 + test/compiler_test.rb | 11 + 8 files changed, 793 insertions(+), 61 deletions(-) create mode 100644 lib/syntax_tree/yarv/disasm_formatter.rb diff --git a/.rubocop.yml b/.rubocop.yml index c81fdb59..daf5a824 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -16,6 +16,12 @@ Layout/LineLength: Lint/AmbiguousBlockAssociation: Enabled: false +Lint/AmbiguousOperatorPrecedence: + Enabled: false + +Lint/AmbiguousRange: + Enabled: false + Lint/BooleanSymbol: Enabled: false @@ -91,6 +97,9 @@ Style/ExplicitBlockArgument: Style/FormatString: Enabled: false +Style/FormatStringToken: + Enabled: false + Style/GuardClause: Enabled: false diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index b2ff8414..eadb485d 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -30,6 +30,7 @@ require_relative "syntax_tree/yarv" require_relative "syntax_tree/yarv/bf" require_relative "syntax_tree/yarv/compiler" +require_relative "syntax_tree/yarv/disasm_formatter" require_relative "syntax_tree/yarv/disassembler" require_relative "syntax_tree/yarv/instruction_sequence" require_relative "syntax_tree/yarv/instructions" diff --git a/lib/syntax_tree/yarv/disasm_formatter.rb b/lib/syntax_tree/yarv/disasm_formatter.rb new file mode 100644 index 00000000..566bc8fd --- /dev/null +++ b/lib/syntax_tree/yarv/disasm_formatter.rb @@ -0,0 +1,211 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + class DisasmFormatter + attr_reader :output, :queue + attr_reader :current_prefix, :current_iseq + + def initialize + @output = StringIO.new + @queue = [] + + @current_prefix = "" + @current_iseq = nil + end + + ######################################################################## + # Helpers for various instructions + ######################################################################## + + def calldata(value) + flag_names = [] + flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT) + if value.flag?(CallData::CALL_ARGS_BLOCKARG) + flag_names << :ARGS_BLOCKARG + end + flag_names << :FCALL if value.flag?(CallData::CALL_FCALL) + flag_names << :VCALL if value.flag?(CallData::CALL_VCALL) + flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE) + flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ) + flag_names << :KWARG if value.flag?(CallData::CALL_KWARG) + flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT) + flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL) + flag_names << :SUPER if value.flag?(CallData::CALL_SUPER) + flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER) + flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND) + flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT) + + parts = [] + parts << "mid:#{value.method}" if value.method + parts << "argc:#{value.argc}" + parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg + parts << flag_names.join("|") if flag_names.any? + + "" + end + + def enqueue(iseq) + queue << iseq + end + + def event(name) + case name + when :RUBY_EVENT_B_CALL + "Bc" + when :RUBY_EVENT_B_RETURN + "Br" + when :RUBY_EVENT_CALL + "Ca" + when :RUBY_EVENT_CLASS + "Cl" + when :RUBY_EVENT_END + "En" + when :RUBY_EVENT_LINE + "Li" + when :RUBY_EVENT_RETURN + "Re" + else + raise "Unknown event: #{name}" + end + end + + def inline_storage(cache) + "" + end + + def instruction(name, operands = []) + operands.empty? ? name : "%-38s %s" % [name, operands.join(", ")] + end + + def label(value) + value.name["label_".length..] + end + + def local(index, explicit: nil, implicit: nil) + current = current_iseq + (explicit || implicit).times { current = current.parent_iseq } + + value = "#{current.local_table.name_at(index)}@#{index}" + value << ", #{explicit}" if explicit + value + end + + def object(value) + value.inspect + end + + ######################################################################## + # Main entrypoint + ######################################################################## + + def format! + while (@current_iseq = queue.shift) + output << "\n" if output.pos > 0 + format_iseq(@current_iseq) + end + + output.string + end + + private + + def format_iseq(iseq) + output << "#{current_prefix}== disasm: " + output << "#:1 " + + location = iseq.location + output << "(#{location.start_line},#{location.start_column})-" + output << "(#{location.end_line},#{location.end_column})" + output << "> " + + if iseq.catch_table.any? + output << "(catch: TRUE)\n" + output << "#{current_prefix}== catch table\n" + + with_prefix("#{current_prefix}| ") do + iseq.catch_table.each do |entry| + case entry + when InstructionSequence::CatchBreak + output << "#{current_prefix}catch type: break\n" + format_iseq(entry.iseq) + when InstructionSequence::CatchNext + output << "#{current_prefix}catch type: next\n" + when InstructionSequence::CatchRedo + output << "#{current_prefix}catch type: redo\n" + when InstructionSequence::CatchRescue + output << "#{current_prefix}catch type: rescue\n" + format_iseq(entry.iseq) + end + end + end + + output << "#{current_prefix}|#{"-" * 72}\n" + else + output << "(catch: FALSE)\n" + end + + if (local_table = iseq.local_table) && !local_table.empty? + output << "#{current_prefix}local table (size: #{local_table.size})\n" + + locals = + local_table.locals.each_with_index.map do |local, index| + "[%2d] %s@%d" % [local_table.offset(index), local.name, index] + end + + output << "#{current_prefix}#{locals.join(" ")}\n" + end + + length = 0 + events = [] + lines = [] + + iseq.insns.each do |insn| + case insn + when Integer + lines << insn + when Symbol + events << event(insn) + when InstructionSequence::Label + # skip + else + output << "#{current_prefix}%04d " % length + + disasm = insn.disasm(self) + output << disasm + + if lines.any? + output << " " * (65 - disasm.length) if disasm.length < 65 + elsif events.any? + output << " " * (39 - disasm.length) if disasm.length < 39 + end + + if lines.any? + output << "(%4d)" % lines.last + lines.clear + end + + if events.any? + output << "[#{events.join}]" + events.clear + end + + output << "\n" + length += insn.length + end + end + end + + def with_prefix(value) + previous = @current_prefix + + begin + @current_prefix = value + yield + ensure + @current_prefix = previous + end + end + end + end +end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index e3d0c2fc..ee5390a1 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -272,30 +272,9 @@ def to_a end def disasm - output = StringIO.new - output << "== disasm: #:1 (#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column})> (catch: FALSE)\n" - - length = 0 - events = [] - - insns.each do |insn| - case insn - when Integer - # skip - when Symbol - events << insn - when Label - # skip - else - output << "%04d " % length - output << insn.disasm(self) - output << "\n" - end - - length += insn.length - end - - output.string + formatter = DisasmFormatter.new + formatter.enqueue(self) + formatter.format! end # This method converts our linked list of instructions into a final array @@ -375,7 +354,8 @@ def specialize_instructions! when Send calldata = value.calldata - if !value.block_iseq && !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) + if !value.block_iseq && + !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) # Specialize the send instruction. If it doesn't have a block # attached, then we will replace it with an opt_send_without_block # and do further specializations based on the called method and @@ -980,8 +960,11 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) # set up all of the instructions source[13].each do |insn| - # skip line numbers - next if insn.is_a?(Integer) + # add line numbers + if insn.is_a?(Integer) + iseq.push(insn) + next + end # add events and labels if insn.is_a?(Symbol) diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index c146bdbf..772f1bb3 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -33,25 +33,6 @@ def initialize( @kw_arg = kw_arg end - def disasm - flag_names = [] - flag_names << :ARGS_SPLAT if flag?(CALL_ARGS_SPLAT) - flag_names << :ARGS_BLOCKARG if flag?(CALL_ARGS_BLOCKARG) - flag_names << :FCALL if flag?(CALL_FCALL) - flag_names << :VCALL if flag?(CALL_VCALL) - flag_names << :ARGS_SIMPLE if flag?(CALL_ARGS_SIMPLE) - flag_names << :BLOCKISEQ if flag?(CALL_BLOCKISEQ) - flag_names << :KWARG if flag?(CALL_KWARG) - flag_names << :KW_SPLAT if flag?(CALL_KW_SPLAT) - flag_names << :TAILCALL if flag?(CALL_TAILCALL) - flag_names << :SUPER if flag?(CALL_SUPER) - flag_names << :ZSUPER if flag?(CALL_ZSUPER) - flag_names << :OPT_SEND if flag?(CALL_OPT_SEND) - flag_names << :KW_SPLAT_MUT if flag?(CALL_KW_SPLAT_MUT) - - "" - end - def flag?(mask) (flags & mask) > 0 end @@ -102,6 +83,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("adjuststack", [fmt.object(number)]) + end + def to_a(_iseq) [:adjuststack, number] end @@ -146,6 +131,10 @@ def call(vm) # ~~~ # class AnyToString + def disasm(fmt) + fmt.instruction("anytostring") + end + def to_a(_iseq) [:anytostring] end @@ -200,6 +189,10 @@ def initialize(label) @label = label end + def disasm(fmt) + fmt.instruction("branchif", [fmt.label(label)]) + end + def to_a(_iseq) [:branchif, label.name] end @@ -249,6 +242,10 @@ def initialize(label) @label = label end + def disasm(fmt) + fmt.instruction("branchnil", [fmt.label(label)]) + end + def to_a(_iseq) [:branchnil, label.name] end @@ -297,6 +294,10 @@ def initialize(label) @label = label end + def disasm(fmt) + fmt.instruction("branchunless", [fmt.label(label)]) + end + def to_a(_iseq) [:branchunless, label.name] end @@ -349,6 +350,13 @@ def initialize(keyword_bits_index, keyword_index) @keyword_index = keyword_index end + def disasm(fmt) + fmt.instruction( + "checkkeyword", + [fmt.object(keyword_bits_index), fmt.object(keyword_index)] + ) + end + def to_a(iseq) [ :checkkeyword, @@ -401,6 +409,10 @@ def initialize(type) @type = type end + def disasm(fmt) + fmt.instruction("checkmatch", [fmt.object(type)]) + end + def to_a(_iseq) [:checkmatch, type] end @@ -468,6 +480,56 @@ def initialize(type) @type = type end + def disasm(fmt) + name = + case type + when TYPE_OBJECT + "T_OBJECT" + when TYPE_CLASS + "T_CLASS" + when TYPE_MODULE + "T_MODULE" + when TYPE_FLOAT + "T_FLOAT" + when TYPE_STRING + "T_STRING" + when TYPE_REGEXP + "T_REGEXP" + when TYPE_ARRAY + "T_ARRAY" + when TYPE_HASH + "T_HASH" + when TYPE_STRUCT + "T_STRUCT" + when TYPE_BIGNUM + "T_BIGNUM" + when TYPE_FILE + "T_FILE" + when TYPE_DATA + "T_DATA" + when TYPE_MATCH + "T_MATCH" + when TYPE_COMPLEX + "T_COMPLEX" + when TYPE_RATIONAL + "T_RATIONAL" + when TYPE_NIL + "T_NIL" + when TYPE_TRUE + "T_TRUE" + when TYPE_FALSE + "T_FALSE" + when TYPE_SYMBOL + "T_SYMBOL" + when TYPE_FIXNUM + "T_FIXNUM" + when TYPE_UNDEF + "T_UNDEF" + end + + fmt.instruction("checktype", [name]) + end + def to_a(_iseq) [:checktype, type] end @@ -559,6 +621,10 @@ def call(vm) # ~~~ # class ConcatArray + def disasm(fmt) + fmt.instruction("concatarray") + end + def to_a(_iseq) [:concatarray] end @@ -607,6 +673,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("concatstrings", [fmt.object(number)]) + end + def to_a(_iseq) [:concatstrings, number] end @@ -662,6 +732,14 @@ def initialize(name, class_iseq, flags) @flags = flags end + def disasm(fmt) + fmt.enqueue(class_iseq) + fmt.instruction( + "defineclass", + [fmt.object(name), class_iseq.name, fmt.object(flags)] + ) + end + def to_a(_iseq) [:defineclass, name, class_iseq.to_a, flags] end @@ -731,6 +809,51 @@ def initialize(type, name, message) @message = message end + def disasm(fmt) + type_name = + case type + when TYPE_NIL + "nil" + when TYPE_IVAR + "ivar" + when TYPE_LVAR + "lvar" + when TYPE_GVAR + "gvar" + when TYPE_CVAR + "cvar" + when TYPE_CONST + "const" + when TYPE_METHOD + "method" + when TYPE_YIELD + "yield" + when TYPE_ZSUPER + "zsuper" + when TYPE_SELF + "self" + when TYPE_TRUE + "true" + when TYPE_FALSE + "false" + when TYPE_ASGN + "asgn" + when TYPE_EXPR + "expr" + when TYPE_REF + "ref" + when TYPE_FUNC + "func" + when TYPE_CONST_FROM + "constant-from" + end + + fmt.instruction( + "defined", + [type_name, fmt.object(name), fmt.object(message)] + ) + end + def to_a(_iseq) [:defined, type, name, message] end @@ -809,6 +932,14 @@ def initialize(method_name, method_iseq) @method_iseq = method_iseq end + def disasm(fmt) + fmt.enqueue(method_iseq) + fmt.instruction( + "definemethod", + [fmt.object(method_name), method_iseq.name] + ) + end + def to_a(_iseq) [:definemethod, method_name, method_iseq.to_a] end @@ -863,6 +994,14 @@ def initialize(method_name, method_iseq) @method_iseq = method_iseq end + def disasm(fmt) + fmt.enqueue(method_iseq) + fmt.instruction( + "definesmethod", + [fmt.object(method_name), method_iseq.name] + ) + end + def to_a(_iseq) [:definesmethod, method_name, method_iseq.to_a] end @@ -906,6 +1045,10 @@ def call(vm) # ~~~ # class Dup + def disasm(fmt) + fmt.instruction("dup") + end + def to_a(_iseq) [:dup] end @@ -948,6 +1091,10 @@ def initialize(object) @object = object end + def disasm(fmt) + fmt.instruction("duparray", [fmt.object(object)]) + end + def to_a(_iseq) [:duparray, object] end @@ -990,6 +1137,10 @@ def initialize(object) @object = object end + def disasm(fmt) + fmt.instruction("duphash", [fmt.object(object)]) + end + def to_a(_iseq) [:duphash, object] end @@ -1032,6 +1183,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("dupn", [fmt.object(number)]) + end + def to_a(_iseq) [:dupn, number] end @@ -1079,6 +1234,10 @@ def initialize(number, flags) @flags = flags end + def disasm(fmt) + fmt.instruction("expandarray", [fmt.object(number), fmt.object(flags)]) + end + def to_a(_iseq) [:expandarray, number, flags] end @@ -1129,6 +1288,10 @@ def initialize(index, level) @level = level end + def disasm(fmt) + fmt.instruction("getblockparam", [fmt.local(index, explicit: level)]) + end + def to_a(iseq) current = iseq level.times { current = iseq.parent_iseq } @@ -1179,6 +1342,13 @@ def initialize(index, level) @level = level end + def disasm(fmt) + fmt.instruction( + "getblockparamproxy", + [fmt.local(index, explicit: level)] + ) + end + def to_a(iseq) current = iseq level.times { current = iseq.parent_iseq } @@ -1226,6 +1396,13 @@ def initialize(name, cache) @cache = cache end + def disasm(fmt) + fmt.instruction( + "getclassvariable", + [fmt.object(name), fmt.inline_storage(cache)] + ) + end + def to_a(_iseq) [:getclassvariable, name, cache] end @@ -1272,6 +1449,10 @@ def initialize(name) @name = name end + def disasm(fmt) + fmt.instruction("getconstant", [fmt.object(name)]) + end + def to_a(_iseq) [:getconstant, name] end @@ -1324,6 +1505,10 @@ def initialize(name) @name = name end + def disasm(fmt) + fmt.instruction("getglobal", [fmt.object(name)]) + end + def to_a(_iseq) [:getglobal, name] end @@ -1376,6 +1561,13 @@ def initialize(name, cache) @cache = cache end + def disasm(fmt) + fmt.instruction( + "getinstancevariable", + [fmt.object(name), fmt.inline_storage(cache)] + ) + end + def to_a(_iseq) [:getinstancevariable, name, cache] end @@ -1424,6 +1616,10 @@ def initialize(index, level) @level = level end + def disasm(fmt) + fmt.instruction("getlocal", [fmt.local(index, explicit: level)]) + end + def to_a(iseq) current = iseq level.times { current = current.parent_iseq } @@ -1471,6 +1667,10 @@ def initialize(index) @index = index end + def disasm(fmt) + fmt.instruction("getlocal_WC_0", [fmt.local(index, implicit: 0)]) + end + def to_a(iseq) [:getlocal_WC_0, iseq.local_table.offset(index)] end @@ -1516,6 +1716,10 @@ def initialize(index) @index = index end + def disasm(fmt) + fmt.instruction("getlocal_WC_1", [fmt.local(index, implicit: 1)]) + end + def to_a(iseq) [:getlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] end @@ -1548,7 +1752,7 @@ def call(vm) # ### Usage # # ~~~ruby - # [true] + # 1 if (a == 1) .. (b == 2) # ~~~ # class GetSpecial @@ -1563,6 +1767,10 @@ def initialize(key, type) @type = type end + def disasm(fmt) + fmt.instruction("getspecial", [fmt.object(key), fmt.object(type)]) + end + def to_a(_iseq) [:getspecial, key, type] end @@ -1607,6 +1815,10 @@ def call(vm) # ~~~ # class Intern + def disasm(fmt) + fmt.instruction("intern") + end + def to_a(_iseq) [:intern] end @@ -1653,6 +1865,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("invokeblock", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:invokeblock, calldata.to_h] end @@ -1700,6 +1916,14 @@ def initialize(calldata, block_iseq) @block_iseq = block_iseq end + def disasm(fmt) + fmt.enqueue(block_iseq) if block_iseq + fmt.instruction( + "invokesuper", + [fmt.calldata(calldata), block_iseq&.name || "nil"] + ) + end + def to_a(_iseq) [:invokesuper, calldata.to_h, block_iseq&.to_a] end @@ -1766,6 +1990,10 @@ def initialize(label) @label = label end + def disasm(fmt) + fmt.instruction("jump", [fmt.label(label)]) + end + def to_a(_iseq) [:jump, label.name] end @@ -1802,8 +2030,8 @@ def call(vm) # ~~~ # class Leave - def disasm(_iseq) - "leave" + def disasm(fmt) + fmt.instruction("leave") end def to_a(_iseq) @@ -1852,6 +2080,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("newarray", [fmt.object(number)]) + end + def to_a(_iseq) [:newarray, number] end @@ -1896,6 +2128,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("newarraykwsplat", [fmt.object(number)]) + end + def to_a(_iseq) [:newarraykwsplat, number] end @@ -1942,6 +2178,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("newhash", [fmt.object(number)]) + end + def to_a(_iseq) [:newhash, number] end @@ -1989,6 +2229,10 @@ def initialize(exclude_end) @exclude_end = exclude_end end + def disasm(fmt) + fmt.instruction("newrange", [fmt.object(exclude_end)]) + end + def to_a(_iseq) [:newrange, exclude_end] end @@ -2026,6 +2270,10 @@ def call(vm) # ~~~ # class Nop + def disasm(fmt) + fmt.instruction("nop") + end + def to_a(_iseq) [:nop] end @@ -2071,6 +2319,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("objtostring", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:objtostring, calldata.to_h] end @@ -2117,6 +2369,11 @@ def initialize(iseq, cache) @cache = cache end + def disasm(fmt) + fmt.enqueue(iseq) + fmt.instruction("once", [iseq.name, fmt.inline_storage(cache)]) + end + def to_a(_iseq) [:once, iseq.to_a, cache] end @@ -2164,6 +2421,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_and", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_and, calldata.to_h] end @@ -2208,6 +2469,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_aref", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_aref, calldata.to_h] end @@ -2254,6 +2519,13 @@ def initialize(object, calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction( + "opt_aref_with", + [fmt.object(object), fmt.calldata(calldata)] + ) + end + def to_a(_iseq) [:opt_aref_with, object, calldata.to_h] end @@ -2299,6 +2571,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_aset", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_aset, calldata.to_h] end @@ -2344,6 +2620,13 @@ def initialize(object, calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction( + "opt_aset_with", + [fmt.object(object), fmt.calldata(calldata)] + ) + end + def to_a(_iseq) [:opt_aset_with, object, calldata.to_h] end @@ -2401,6 +2684,13 @@ def initialize(case_dispatch_hash, else_label) @else_label = else_label end + def disasm(fmt) + fmt.instruction( + "opt_case_dispatch", + ["", fmt.label(else_label)] + ) + end + def to_a(_iseq) [ :opt_case_dispatch, @@ -2450,6 +2740,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_div", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_div, calldata.to_h] end @@ -2494,6 +2788,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_empty_p", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_empty_p, calldata.to_h] end @@ -2539,6 +2837,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_eq", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_eq, calldata.to_h] end @@ -2584,6 +2886,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_ge", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_ge, calldata.to_h] end @@ -2628,6 +2934,11 @@ def initialize(names) @names = names end + def disasm(fmt) + cache = "" + fmt.instruction("opt_getconstant_path", [cache]) + end + def to_a(_iseq) [:opt_getconstant_path, names] end @@ -2680,6 +2991,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_gt", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_gt, calldata.to_h] end @@ -2725,6 +3040,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_le", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_le, calldata.to_h] end @@ -2770,6 +3089,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_length", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_length, calldata.to_h] end @@ -2815,6 +3138,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_lt", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_lt, calldata.to_h] end @@ -2860,6 +3187,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_ltlt", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_ltlt, calldata.to_h] end @@ -2906,6 +3237,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_minus", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_minus, calldata.to_h] end @@ -2951,6 +3286,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_mod", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_mod, calldata.to_h] end @@ -2996,8 +3335,8 @@ def initialize(calldata) @calldata = calldata end - def disasm(_iseq) - "%-38s %s" % ["opt_mult", calldata.disasm] + def disasm(fmt) + fmt.instruction("opt_mult", [fmt.calldata(calldata)]) end def to_a(_iseq) @@ -3048,6 +3387,13 @@ def initialize(eq_calldata, neq_calldata) @neq_calldata = neq_calldata end + def disasm(fmt) + fmt.instruction( + "opt_neq", + [fmt.calldata(eq_calldata), fmt.calldata(neq_calldata)] + ) + end + def to_a(_iseq) [:opt_neq, eq_calldata.to_h, neq_calldata.to_h] end @@ -3083,7 +3429,7 @@ def call(vm) # ### Usage # # ~~~ruby - # [1, 2, 3].max + # [a, b, c].max # ~~~ # class OptNewArrayMax @@ -3093,6 +3439,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("opt_newarray_max", [fmt.object(number)]) + end + def to_a(_iseq) [:opt_newarray_max, number] end @@ -3127,7 +3477,7 @@ def call(vm) # ### Usage # # ~~~ruby - # [1, 2, 3].min + # [a, b, c].min # ~~~ # class OptNewArrayMin @@ -3137,6 +3487,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("opt_newarray_min", [fmt.object(number)]) + end + def to_a(_iseq) [:opt_newarray_min, number] end @@ -3182,6 +3536,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_nil_p", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_nil_p, calldata.to_h] end @@ -3225,6 +3583,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_not", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_not, calldata.to_h] end @@ -3270,6 +3632,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_or", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_or, calldata.to_h] end @@ -3315,8 +3681,8 @@ def initialize(calldata) @calldata = calldata end - def disasm(iseq) - "%-38s %s" % ["opt_plus", calldata.disasm] + def disasm(fmt) + fmt.instruction("opt_plus", [fmt.calldata(calldata)]) end def to_a(_iseq) @@ -3363,6 +3729,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_regexpmatch2", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_regexpmatch2, calldata.to_h] end @@ -3407,6 +3777,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_send_without_block", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_send_without_block, calldata.to_h] end @@ -3452,6 +3826,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_size", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_size, calldata.to_h] end @@ -3497,6 +3875,13 @@ def initialize(object, calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction( + "opt_str_freeze", + [fmt.object(object), fmt.calldata(calldata)] + ) + end + def to_a(_iseq) [:opt_str_freeze, object, calldata.to_h] end @@ -3542,6 +3927,13 @@ def initialize(object, calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction( + "opt_str_uminus", + [fmt.object(object), fmt.calldata(calldata)] + ) + end + def to_a(_iseq) [:opt_str_uminus, object, calldata.to_h] end @@ -3587,6 +3979,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_succ", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_succ, calldata.to_h] end @@ -3623,6 +4019,10 @@ def call(vm) # ~~~ # class Pop + def disasm(fmt) + fmt.instruction("pop") + end + def to_a(_iseq) [:pop] end @@ -3659,6 +4059,10 @@ def call(vm) # ~~~ # class PutNil + def disasm(fmt) + fmt.instruction("putnil") + end + def to_a(_iseq) [:putnil] end @@ -3701,8 +4105,8 @@ def initialize(object) @object = object end - def disasm(_iseq) - "%-38s %s" % ["putobject", object.inspect] + def disasm(fmt) + fmt.instruction("putobject", [fmt.object(object)]) end def to_a(_iseq) @@ -3743,8 +4147,8 @@ def call(vm) # ~~~ # class PutObjectInt2Fix0 - def disasm(_iseq) - "putobject_INT2FIX_0_" + def disasm(fmt) + fmt.instruction("putobject_INT2FIX_0_") end def to_a(_iseq) @@ -3785,8 +4189,8 @@ def call(vm) # ~~~ # class PutObjectInt2Fix1 - def disasm(_iseq) - "putobject_INT2FIX_1_" + def disasm(fmt) + fmt.instruction("putobject_INT2FIX_1_") end def to_a(_iseq) @@ -3825,6 +4229,10 @@ def call(vm) # ~~~ # class PutSelf + def disasm(fmt) + fmt.instruction("putself") + end + def to_a(_iseq) [:putself] end @@ -3873,6 +4281,10 @@ def initialize(object) @object = object end + def disasm(fmt) + fmt.instruction("putspecialobject", [fmt.object(object)]) + end + def to_a(_iseq) [:putspecialobject, object] end @@ -3924,6 +4336,10 @@ def initialize(object) @object = object end + def disasm(fmt) + fmt.instruction("putstring", [fmt.object(object)]) + end + def to_a(_iseq) [:putstring, object] end @@ -3970,6 +4386,14 @@ def initialize(calldata, block_iseq) @block_iseq = block_iseq end + def disasm(fmt) + fmt.enqueue(block_iseq) if block_iseq + fmt.instruction( + "send", + [fmt.calldata(calldata), block_iseq&.name || "nil"] + ) + end + def to_a(_iseq) [:send, calldata.to_h, block_iseq&.to_a] end @@ -4038,6 +4462,10 @@ def initialize(index, level) @level = level end + def disasm(fmt) + fmt.instruction("setblockparam", [fmt.local(index, explicit: level)]) + end + def to_a(iseq) current = iseq level.times { current = current.parent_iseq } @@ -4086,6 +4514,13 @@ def initialize(name, cache) @cache = cache end + def disasm(fmt) + fmt.instruction( + "setclassvariable", + [fmt.object(name), fmt.inline_storage(cache)] + ) + end + def to_a(_iseq) [:setclassvariable, name, cache] end @@ -4131,6 +4566,10 @@ def initialize(name) @name = name end + def disasm(fmt) + fmt.instruction("setconstant", [fmt.object(name)]) + end + def to_a(_iseq) [:setconstant, name] end @@ -4175,6 +4614,10 @@ def initialize(name) @name = name end + def disasm(fmt) + fmt.instruction("setglobal", [fmt.object(name)]) + end + def to_a(_iseq) [:setglobal, name] end @@ -4226,6 +4669,13 @@ def initialize(name, cache) @cache = cache end + def disasm(fmt) + fmt.instruction( + "setinstancevariable", + [fmt.object(name), fmt.inline_storage(cache)] + ) + end + def to_a(_iseq) [:setinstancevariable, name, cache] end @@ -4274,6 +4724,10 @@ def initialize(index, level) @level = level end + def disasm(fmt) + fmt.instruction("setlocal", [fmt.local(index, explicit: level)]) + end + def to_a(iseq) current = iseq level.times { current = current.parent_iseq } @@ -4321,6 +4775,10 @@ def initialize(index) @index = index end + def disasm(fmt) + fmt.instruction("setlocal_WC_0", [fmt.local(index, implicit: 0)]) + end + def to_a(iseq) [:setlocal_WC_0, iseq.local_table.offset(index)] end @@ -4366,6 +4824,10 @@ def initialize(index) @index = index end + def disasm(fmt) + fmt.instruction("setlocal_WC_1", [fmt.local(index, implicit: 1)]) + end + def to_a(iseq) [:setlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] end @@ -4409,6 +4871,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("setn", [fmt.object(number)]) + end + def to_a(_iseq) [:setn, number] end @@ -4453,6 +4919,10 @@ def initialize(key) @key = key end + def disasm(fmt) + fmt.instruction("setspecial", [fmt.object(key)]) + end + def to_a(_iseq) [:setspecial, key] end @@ -4504,6 +4974,10 @@ def initialize(flag) @flag = flag end + def disasm(fmt) + fmt.instruction("splatarray", [fmt.object(flag)]) + end + def to_a(_iseq) [:splatarray, flag] end @@ -4544,6 +5018,10 @@ def call(vm) # ~~~ # class Swap + def disasm(fmt) + fmt.instruction("swap") + end + def to_a(_iseq) [:swap] end @@ -4599,6 +5077,10 @@ def initialize(type) @type = type end + def disasm(fmt) + fmt.instruction("throw", [fmt.object(type)]) + end + def to_a(_iseq) [:throw, type] end @@ -4645,6 +5127,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("topn", [fmt.object(number)]) + end + def to_a(_iseq) [:topn, number] end @@ -4689,6 +5175,10 @@ def initialize(options, length) @length = length end + def disasm(fmt) + fmt.instruction("toregexp", [fmt.object(options), fmt.object(length)]) + end + def to_a(_iseq) [:toregexp, options, length] end diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb index 93c4e4c3..30a95437 100644 --- a/lib/syntax_tree/yarv/legacy.rb +++ b/lib/syntax_tree/yarv/legacy.rb @@ -26,6 +26,10 @@ def initialize(name) @name = name end + def disasm(fmt) + fmt.instruction("getclassvariable", [fmt.object(name)]) + end + def to_a(_iseq) [:getclassvariable, name] end @@ -67,6 +71,13 @@ def initialize(label, cache) @cache = cache end + def disasm(fmt) + fmt.instruction( + "opt_getinlinecache", + [fmt.label(label), fmt.inline_storage(cache)] + ) + end + def to_a(_iseq) [:opt_getinlinecache, label.name, cache] end @@ -110,6 +121,10 @@ def initialize(cache) @cache = cache end + def disasm(fmt) + fmt.instruction("opt_setinlinecache", [fmt.inline_storage(cache)]) + end + def to_a(_iseq) [:opt_setinlinecache, cache] end @@ -152,6 +167,10 @@ def initialize(name) @name = name end + def disasm(fmt) + fmt.instruction("setclassvariable", [fmt.object(name)]) + end + def to_a(_iseq) [:setclassvariable, name] end diff --git a/lib/syntax_tree/yarv/local_table.rb b/lib/syntax_tree/yarv/local_table.rb index 5eac346c..54cc55ad 100644 --- a/lib/syntax_tree/yarv/local_table.rb +++ b/lib/syntax_tree/yarv/local_table.rb @@ -44,6 +44,10 @@ def initialize @locals = [] end + def empty? + locals.empty? + end + def find(name, level = 0) index = locals.index { |local| local.name == name } Lookup.new(locals[index], index, level) if index @@ -57,6 +61,10 @@ def names locals.map(&:name) end + def name_at(index) + locals[index].name + end + def size locals.length end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 1f4a5299..1922f8c6 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -449,6 +449,10 @@ class CompilerTest < Minitest::Test define_method(:"test_loads_#{source}_(#{suffix})") do assert_loads(source, options) end + + define_method(:"test_disasms_#{source}_(#{suffix})") do + assert_disasms(source, options) + end end end @@ -507,6 +511,13 @@ def assert_loads(source, options) ) end + # Check that we can successfully disasm the compiled instruction sequence. + def assert_disasms(source, options) + compiled = RubyVM::InstructionSequence.compile(source, **options) + yarv = YARV::InstructionSequence.from(compiled.to_a, options) + assert_kind_of String, yarv.disasm + end + def assert_evaluates(expected, source) assert_equal expected, YARV.compile(source).eval end