22
33module SyntaxTree
44 module YARV
5- # Constructs a control-flow-graph of a YARV instruction sequence. We use
6- # conventional basic-blocks.
5+ # This class represents a control flow graph of a YARV instruction sequence.
6+ # It constructs a graph of basic blocks that hold subsets of the list of
7+ # instructions from the instruction sequence.
8+ #
9+ # You can use this class by calling the ::compile method and passing it a
10+ # YARV instruction sequence. It will return a control flow graph object.
11+ #
12+ # iseq = RubyVM::InstructionSequence.compile("1 + 2")
13+ # iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a)
14+ # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq)
15+ #
716 class ControlFlowGraph
817 # This object represents a single basic block, wherein all contained
918 # instructions do not branch except for the last one.
1019 class BasicBlock
20+ # This is the unique identifier for this basic block.
21+ attr_reader :id
22+
1123 # This is the index into the list of instructions where this block
1224 # starts.
1325 attr_reader :block_start
@@ -22,110 +34,129 @@ class BasicBlock
2234 attr_reader :succs
2335
2436 def initialize ( block_start , insns )
37+ @id = "block_#{ block_start } "
38+
2539 @block_start = block_start
2640 @insns = insns
2741
2842 @preds = [ ]
2943 @succs = [ ]
3044 end
3145
32- def id
33- "block_#{ block_start } "
46+ # This method is used to verify that the basic block is well formed. It
47+ # checks that the only instruction in this basic block that branches is
48+ # the last instruction.
49+ def verify
50+ insns [ 0 ...-1 ] . each { |insn | raise if insn . branches? }
3451 end
3552
3653 def last
3754 insns . last
3855 end
3956 end
4057
41- # This is the instruction sequence that this control flow graph
42- # corresponds to.
43- attr_reader :iseq
44-
45- # This is the list of instructions that this control flow graph contains.
46- # It is effectively the same as the list of instructions in the
47- # instruction sequence but with line numbers and events filtered out.
48- attr_reader :insns
49-
50- # This is the set of basic blocks that this control-flow graph contains.
51- attr_reader :blocks
52-
53- def initialize ( iseq , insns , blocks )
54- @iseq = iseq
55- @insns = insns
56- @blocks = blocks
57- end
58-
59- def self . compile ( iseq )
60- # First, we need to find all of the instructions that immediately follow
61- # labels so that when we are looking at instructions that branch we know
62- # where they branch to.
63- labels = { }
64- insns = [ ]
65-
66- iseq . insns . each do |insn |
67- case insn
68- when Instruction
69- insns << insn
70- when InstructionSequence ::Label
71- labels [ insn ] = insns . length
58+ # This class is responsible for creating a control flow graph from the
59+ # given instruction sequence.
60+ class Compiler
61+ attr_reader :iseq , :labels , :insns
62+
63+ def initialize ( iseq )
64+ @iseq = iseq
65+
66+ # We need to find all of the instructions that immediately follow
67+ # labels so that when we are looking at instructions that branch we
68+ # know where they branch to.
69+ @labels = { }
70+ @insns = [ ]
71+
72+ iseq . insns . each do |insn |
73+ case insn
74+ when Instruction
75+ @insns << insn
76+ when InstructionSequence ::Label
77+ @labels [ insn ] = @insns . length
78+ end
7279 end
7380 end
7481
75- # Now we need to find the indices of the instructions that start a basic
76- # block because they're either:
82+ # This method is used to compile the instruction sequence into a control
83+ # flow graph. It returns an instance of ControlFlowGraph.
84+ def compile
85+ blocks = connect_basic_blocks ( build_basic_blocks )
86+ ControlFlowGraph . new ( iseq , insns , blocks . values ) . tap ( &:verify )
87+ end
88+
89+ private
90+
91+ # Finds the indices of the instructions that start a basic block because
92+ # they're either:
7793 #
7894 # * the start of an instruction sequence
7995 # * the target of a branch
8096 # * fallen through to from a branch
8197 #
82- block_starts = Set . new ( [ 0 ] )
83-
84- insns . each_with_index do |insn , index |
85- if insn . branches?
86- block_starts . add ( labels [ insn . label ] ) if insn . respond_to? ( :label )
87- block_starts . add ( index + 1 ) if insn . falls_through?
98+ def find_basic_block_starts
99+ block_starts = Set . new ( [ 0 ] )
100+
101+ insns . each_with_index do |insn , index |
102+ if insn . branches?
103+ block_starts . add ( labels [ insn . label ] ) if insn . respond_to? ( :label )
104+ block_starts . add ( index + 1 ) if insn . falls_through?
105+ end
88106 end
107+
108+ block_starts . to_a . sort
89109 end
90110
91- block_starts = block_starts . to_a . sort
111+ # Builds up a set of basic blocks by iterating over the starts of each
112+ # block. They are keyed by the index of their first instruction.
113+ def build_basic_blocks
114+ block_starts = find_basic_block_starts
115+ blocks = { }
92116
93- # Now we can build up a set of basic blocks by iterating over the starts
94- # of each block. They are keyed by the index of their first instruction.
95- blocks = { }
96- block_starts . each_with_index do |block_start , block_index |
97- block_stop = ( block_starts [ ( block_index + 1 ) ..] + [ insns . length ] ) . min
117+ block_starts . each_with_index . to_h do |block_start , block_index |
118+ block_end = ( block_starts [ ( block_index + 1 ) ..] + [ insns . length ] ) . min
119+ block_insns = insns [ block_start ...block_end ]
98120
99- blocks [ block_start ] =
100- BasicBlock . new ( block_start , insns [ block_start ... block_stop ] )
121+ [ block_start , BasicBlock . new ( block_start , block_insns ) ]
122+ end
101123 end
102124
103125 # Now we need to connect the blocks by letting them know which blocks
104126 # precede them and which blocks follow them.
105- blocks . each do |block_start , block |
106- insn = block . last
127+ def connect_basic_blocks ( blocks )
128+ blocks . each do |block_start , block |
129+ insn = block . last
107130
108- if insn . branches? && insn . respond_to? ( :label )
109- block . succs << blocks . fetch ( labels [ insn . label ] )
110- end
131+ if insn . branches? && insn . respond_to? ( :label )
132+ block . succs << blocks . fetch ( labels [ insn . label ] )
133+ end
111134
112- if ( !insn . branches? && !insn . leaves? ) || insn . falls_through?
113- block . succs << blocks . fetch ( block_start + block . insns . length )
114- end
135+ if ( !insn . branches? && !insn . leaves? ) || insn . falls_through?
136+ block . succs << blocks . fetch ( block_start + block . insns . length )
137+ end
115138
116- block . succs . each { |succ | succ . preds << block }
139+ block . succs . each { |succ | succ . preds << block }
140+ end
117141 end
142+ end
118143
119- # Here we're going to verify that we set up the control flow graph
120- # correctly. To do so we will assert that the only instruction in any
121- # given block that branches is the last instruction in the block.
122- blocks . each_value do |block |
123- block . insns [ 0 ...-1 ] . each { |insn | raise if insn . branches? }
124- end
144+ # This is the instruction sequence that this control flow graph
145+ # corresponds to.
146+ attr_reader :iseq
147+
148+ # This is the list of instructions that this control flow graph contains.
149+ # It is effectively the same as the list of instructions in the
150+ # instruction sequence but with line numbers and events filtered out.
151+ attr_reader :insns
152+
153+ # This is the set of basic blocks that this control-flow graph contains.
154+ attr_reader :blocks
125155
126- # Finally we can return a new control flow graph with the given
127- # instruction sequence and our set of basic blocks.
128- new ( iseq , insns , blocks . values )
156+ def initialize ( iseq , insns , blocks )
157+ @iseq = iseq
158+ @insns = insns
159+ @blocks = blocks
129160 end
130161
131162 def disasm
@@ -156,6 +187,17 @@ def disasm
156187
157188 output . string
158189 end
190+
191+ # This method is used to verify that the control flow graph is well
192+ # formed. It does this by checking that each basic block is itself well
193+ # formed.
194+ def verify
195+ blocks . each ( &:verify )
196+ end
197+
198+ def self . compile ( iseq )
199+ Compiler . new ( iseq ) . compile
200+ end
159201 end
160202 end
161203end
0 commit comments