@@ -14,93 +14,6 @@ module YARV
1414 # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq)
1515 #
1616 class ControlFlowGraph
17- # This is the instruction sequence that this control flow graph
18- # corresponds to.
19- attr_reader :iseq
20-
21- # This is the list of instructions that this control flow graph contains.
22- # It is effectively the same as the list of instructions in the
23- # instruction sequence but with line numbers and events filtered out.
24- attr_reader :insns
25-
26- # This is the set of basic blocks that this control-flow graph contains.
27- attr_reader :blocks
28-
29- def initialize ( iseq , insns , blocks )
30- @iseq = iseq
31- @insns = insns
32- @blocks = blocks
33- end
34-
35- def disasm
36- fmt = Disassembler . new ( iseq )
37- fmt . puts ( "== cfg: #{ iseq . inspect } " )
38-
39- blocks . each do |block |
40- fmt . puts ( block . id )
41- fmt . with_prefix ( " " ) do |prefix |
42- unless block . incoming_blocks . empty?
43- from = block . incoming_blocks . map ( &:id )
44- fmt . puts ( "#{ prefix } == from: #{ from . join ( ", " ) } " )
45- end
46-
47- fmt . format_insns! ( block . insns , block . block_start )
48-
49- to = block . outgoing_blocks . map ( &:id )
50- to << "leaves" if block . insns . last . leaves?
51- fmt . puts ( "#{ prefix } == to: #{ to . join ( ", " ) } " )
52- end
53- end
54-
55- fmt . string
56- end
57-
58- def to_mermaid
59- output = StringIO . new
60- output . puts ( "flowchart TD" )
61-
62- fmt = Disassembler ::Mermaid . new
63- blocks . each do |block |
64- output . puts ( " subgraph #{ block . id } " )
65- previous = nil
66-
67- block . each_with_length do |insn , length |
68- node_id = "node_#{ length } "
69- label = "%04d %s" % [ length , insn . disasm ( fmt ) ]
70-
71- output . puts ( " #{ node_id } (\" #{ CGI . escapeHTML ( label ) } \" )" )
72- output . puts ( " #{ previous } --> #{ node_id } " ) if previous
73-
74- previous = node_id
75- end
76-
77- output . puts ( " end" )
78- end
79-
80- blocks . each do |block |
81- block . outgoing_blocks . each do |outgoing |
82- offset =
83- block . block_start + block . insns . sum ( &:length ) -
84- block . insns . last . length
85-
86- output . puts ( " node_#{ offset } --> node_#{ outgoing . block_start } " )
87- end
88- end
89-
90- output . string
91- end
92-
93- # This method is used to verify that the control flow graph is well
94- # formed. It does this by checking that each basic block is itself well
95- # formed.
96- def verify
97- blocks . each ( &:verify )
98- end
99-
100- def self . compile ( iseq )
101- Compiler . new ( iseq ) . compile
102- end
103-
10417 # This class is responsible for creating a control flow graph from the
10518 # given instruction sequence.
10619 class Compiler
@@ -139,7 +52,11 @@ def initialize(iseq)
13952 # This method is used to compile the instruction sequence into a control
14053 # flow graph. It returns an instance of ControlFlowGraph.
14154 def compile
142- blocks = connect_basic_blocks ( build_basic_blocks )
55+ blocks = build_basic_blocks
56+
57+ connect_basic_blocks ( blocks )
58+ prune_basic_blocks ( blocks )
59+
14360 ControlFlowGraph . new ( iseq , insns , blocks . values ) . tap ( &:verify )
14461 end
14562
@@ -187,7 +104,16 @@ def build_basic_blocks
187104
188105 block_starts
189106 . zip ( blocks )
190- . to_h do |block_start , block_insns |
107+ . to_h do |block_start , insns |
108+ # It's possible that we have not detected a block start but still
109+ # have branching instructions inside of a basic block. This can
110+ # happen if you have an unconditional jump which is followed by
111+ # instructions that are unreachable. As of Ruby 3.2, this is
112+ # possible with something as simple as "1 => a". In this case we
113+ # can discard all instructions that follow branching instructions.
114+ block_insns =
115+ insns . slice_after { |insn | insn . branch_targets . any? } . first
116+
191117 [ block_start , BasicBlock . new ( block_start , block_insns ) ]
192118 end
193119 end
@@ -213,6 +139,114 @@ def connect_basic_blocks(blocks)
213139 end
214140 end
215141 end
142+
143+ # If there are blocks that are unreachable, we can remove them from the
144+ # graph entirely at this point.
145+ def prune_basic_blocks ( blocks )
146+ visited = Set . new
147+ queue = [ blocks . fetch ( 0 ) ]
148+
149+ until queue . empty?
150+ current_block = queue . shift
151+ next if visited . include? ( current_block )
152+
153+ visited << current_block
154+ queue . concat ( current_block . outgoing_blocks )
155+ end
156+
157+ blocks . select! { |_ , block | visited . include? ( block ) }
158+ end
159+ end
160+
161+ # This is the instruction sequence that this control flow graph
162+ # corresponds to.
163+ attr_reader :iseq
164+
165+ # This is the list of instructions that this control flow graph contains.
166+ # It is effectively the same as the list of instructions in the
167+ # instruction sequence but with line numbers and events filtered out.
168+ attr_reader :insns
169+
170+ # This is the set of basic blocks that this control-flow graph contains.
171+ attr_reader :blocks
172+
173+ def initialize ( iseq , insns , blocks )
174+ @iseq = iseq
175+ @insns = insns
176+ @blocks = blocks
177+ end
178+
179+ def disasm
180+ fmt = Disassembler . new ( iseq )
181+ fmt . puts ( "== cfg: #{ iseq . inspect } " )
182+
183+ blocks . each do |block |
184+ fmt . puts ( block . id )
185+ fmt . with_prefix ( " " ) do |prefix |
186+ unless block . incoming_blocks . empty?
187+ from = block . incoming_blocks . map ( &:id )
188+ fmt . puts ( "#{ prefix } == from: #{ from . join ( ", " ) } " )
189+ end
190+
191+ fmt . format_insns! ( block . insns , block . block_start )
192+
193+ to = block . outgoing_blocks . map ( &:id )
194+ to << "leaves" if block . insns . last . leaves?
195+ fmt . puts ( "#{ prefix } == to: #{ to . join ( ", " ) } " )
196+ end
197+ end
198+
199+ fmt . string
200+ end
201+
202+ def to_dfg
203+ DataFlowGraph . compile ( self )
204+ end
205+
206+ def to_mermaid
207+ output = StringIO . new
208+ output . puts ( "flowchart TD" )
209+
210+ fmt = Disassembler ::Mermaid . new
211+ blocks . each do |block |
212+ output . puts ( " subgraph #{ block . id } " )
213+ previous = nil
214+
215+ block . each_with_length do |insn , length |
216+ node_id = "node_#{ length } "
217+ label = "%04d %s" % [ length , insn . disasm ( fmt ) ]
218+
219+ output . puts ( " #{ node_id } (\" #{ CGI . escapeHTML ( label ) } \" )" )
220+ output . puts ( " #{ previous } --> #{ node_id } " ) if previous
221+
222+ previous = node_id
223+ end
224+
225+ output . puts ( " end" )
226+ end
227+
228+ blocks . each do |block |
229+ block . outgoing_blocks . each do |outgoing |
230+ offset =
231+ block . block_start + block . insns . sum ( &:length ) -
232+ block . insns . last . length
233+
234+ output . puts ( " node_#{ offset } --> node_#{ outgoing . block_start } " )
235+ end
236+ end
237+
238+ output . string
239+ end
240+
241+ # This method is used to verify that the control flow graph is well
242+ # formed. It does this by checking that each basic block is itself well
243+ # formed.
244+ def verify
245+ blocks . each ( &:verify )
246+ end
247+
248+ def self . compile ( iseq )
249+ Compiler . new ( iseq ) . compile
216250 end
217251 end
218252 end
0 commit comments