Skip to content

Commit 907cf23

Browse files
committed
More documentation
1 parent 7e6e4d1 commit 907cf23

File tree

2 files changed

+174
-69
lines changed

2 files changed

+174
-69
lines changed

lib/syntax_tree/yarv/control_flow_graph.rb

Lines changed: 111 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,24 @@
22

33
module SyntaxTree
44
module YARV
5-
# Constructs a control-flow-graph of a YARV instruction sequence. We use
6-
# conventional basic-blocks.
5+
# This class represents a control flow graph of a YARV instruction sequence.
6+
# It constructs a graph of basic blocks that hold subsets of the list of
7+
# instructions from the instruction sequence.
8+
#
9+
# You can use this class by calling the ::compile method and passing it a
10+
# YARV instruction sequence. It will return a control flow graph object.
11+
#
12+
# iseq = RubyVM::InstructionSequence.compile("1 + 2")
13+
# iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a)
14+
# cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq)
15+
#
716
class ControlFlowGraph
817
# This object represents a single basic block, wherein all contained
918
# instructions do not branch except for the last one.
1019
class BasicBlock
20+
# This is the unique identifier for this basic block.
21+
attr_reader :id
22+
1123
# This is the index into the list of instructions where this block
1224
# starts.
1325
attr_reader :block_start
@@ -22,110 +34,129 @@ class BasicBlock
2234
attr_reader :succs
2335

2436
def initialize(block_start, insns)
37+
@id = "block_#{block_start}"
38+
2539
@block_start = block_start
2640
@insns = insns
2741

2842
@preds = []
2943
@succs = []
3044
end
3145

32-
def id
33-
"block_#{block_start}"
46+
# This method is used to verify that the basic block is well formed. It
47+
# checks that the only instruction in this basic block that branches is
48+
# the last instruction.
49+
def verify
50+
insns[0...-1].each { |insn| raise if insn.branches? }
3451
end
3552

3653
def last
3754
insns.last
3855
end
3956
end
4057

41-
# This is the instruction sequence that this control flow graph
42-
# corresponds to.
43-
attr_reader :iseq
44-
45-
# This is the list of instructions that this control flow graph contains.
46-
# It is effectively the same as the list of instructions in the
47-
# instruction sequence but with line numbers and events filtered out.
48-
attr_reader :insns
49-
50-
# This is the set of basic blocks that this control-flow graph contains.
51-
attr_reader :blocks
52-
53-
def initialize(iseq, insns, blocks)
54-
@iseq = iseq
55-
@insns = insns
56-
@blocks = blocks
57-
end
58-
59-
def self.compile(iseq)
60-
# First, we need to find all of the instructions that immediately follow
61-
# labels so that when we are looking at instructions that branch we know
62-
# where they branch to.
63-
labels = {}
64-
insns = []
65-
66-
iseq.insns.each do |insn|
67-
case insn
68-
when Instruction
69-
insns << insn
70-
when InstructionSequence::Label
71-
labels[insn] = insns.length
58+
# This class is responsible for creating a control flow graph from the
59+
# given instruction sequence.
60+
class Compiler
61+
attr_reader :iseq, :labels, :insns
62+
63+
def initialize(iseq)
64+
@iseq = iseq
65+
66+
# We need to find all of the instructions that immediately follow
67+
# labels so that when we are looking at instructions that branch we
68+
# know where they branch to.
69+
@labels = {}
70+
@insns = []
71+
72+
iseq.insns.each do |insn|
73+
case insn
74+
when Instruction
75+
@insns << insn
76+
when InstructionSequence::Label
77+
@labels[insn] = @insns.length
78+
end
7279
end
7380
end
7481

75-
# Now we need to find the indices of the instructions that start a basic
76-
# block because they're either:
82+
# This method is used to compile the instruction sequence into a control
83+
# flow graph. It returns an instance of ControlFlowGraph.
84+
def compile
85+
blocks = connect_basic_blocks(build_basic_blocks)
86+
ControlFlowGraph.new(iseq, insns, blocks.values).tap(&:verify)
87+
end
88+
89+
private
90+
91+
# Finds the indices of the instructions that start a basic block because
92+
# they're either:
7793
#
7894
# * the start of an instruction sequence
7995
# * the target of a branch
8096
# * fallen through to from a branch
8197
#
82-
block_starts = Set.new([0])
83-
84-
insns.each_with_index do |insn, index|
85-
if insn.branches?
86-
block_starts.add(labels[insn.label]) if insn.respond_to?(:label)
87-
block_starts.add(index + 1) if insn.falls_through?
98+
def find_basic_block_starts
99+
block_starts = Set.new([0])
100+
101+
insns.each_with_index do |insn, index|
102+
if insn.branches?
103+
block_starts.add(labels[insn.label]) if insn.respond_to?(:label)
104+
block_starts.add(index + 1) if insn.falls_through?
105+
end
88106
end
107+
108+
block_starts.to_a.sort
89109
end
90110

91-
block_starts = block_starts.to_a.sort
111+
# Builds up a set of basic blocks by iterating over the starts of each
112+
# block. They are keyed by the index of their first instruction.
113+
def build_basic_blocks
114+
block_starts = find_basic_block_starts
115+
blocks = {}
92116

93-
# Now we can build up a set of basic blocks by iterating over the starts
94-
# of each block. They are keyed by the index of their first instruction.
95-
blocks = {}
96-
block_starts.each_with_index do |block_start, block_index|
97-
block_stop = (block_starts[(block_index + 1)..] + [insns.length]).min
117+
block_starts.each_with_index.to_h do |block_start, block_index|
118+
block_end = (block_starts[(block_index + 1)..] + [insns.length]).min
119+
block_insns = insns[block_start...block_end]
98120

99-
blocks[block_start] =
100-
BasicBlock.new(block_start, insns[block_start...block_stop])
121+
[block_start, BasicBlock.new(block_start, block_insns)]
122+
end
101123
end
102124

103125
# Now we need to connect the blocks by letting them know which blocks
104126
# precede them and which blocks follow them.
105-
blocks.each do |block_start, block|
106-
insn = block.last
127+
def connect_basic_blocks(blocks)
128+
blocks.each do |block_start, block|
129+
insn = block.last
107130

108-
if insn.branches? && insn.respond_to?(:label)
109-
block.succs << blocks.fetch(labels[insn.label])
110-
end
131+
if insn.branches? && insn.respond_to?(:label)
132+
block.succs << blocks.fetch(labels[insn.label])
133+
end
111134

112-
if (!insn.branches? && !insn.leaves?) || insn.falls_through?
113-
block.succs << blocks.fetch(block_start + block.insns.length)
114-
end
135+
if (!insn.branches? && !insn.leaves?) || insn.falls_through?
136+
block.succs << blocks.fetch(block_start + block.insns.length)
137+
end
115138

116-
block.succs.each { |succ| succ.preds << block }
139+
block.succs.each { |succ| succ.preds << block }
140+
end
117141
end
142+
end
118143

119-
# Here we're going to verify that we set up the control flow graph
120-
# correctly. To do so we will assert that the only instruction in any
121-
# given block that branches is the last instruction in the block.
122-
blocks.each_value do |block|
123-
block.insns[0...-1].each { |insn| raise if insn.branches? }
124-
end
144+
# This is the instruction sequence that this control flow graph
145+
# corresponds to.
146+
attr_reader :iseq
147+
148+
# This is the list of instructions that this control flow graph contains.
149+
# It is effectively the same as the list of instructions in the
150+
# instruction sequence but with line numbers and events filtered out.
151+
attr_reader :insns
152+
153+
# This is the set of basic blocks that this control-flow graph contains.
154+
attr_reader :blocks
125155

126-
# Finally we can return a new control flow graph with the given
127-
# instruction sequence and our set of basic blocks.
128-
new(iseq, insns, blocks.values)
156+
def initialize(iseq, insns, blocks)
157+
@iseq = iseq
158+
@insns = insns
159+
@blocks = blocks
129160
end
130161

131162
def disasm
@@ -156,6 +187,17 @@ def disasm
156187

157188
output.string
158189
end
190+
191+
# This method is used to verify that the control flow graph is well
192+
# formed. It does this by checking that each basic block is itself well
193+
# formed.
194+
def verify
195+
blocks.each(&:verify)
196+
end
197+
198+
def self.compile(iseq)
199+
Compiler.new(iseq).compile
200+
end
159201
end
160202
end
161203
end

test/yarv_test.rb

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,69 @@ def value
297297
end
298298
end
299299

300+
def test_cfg
301+
iseq = RubyVM::InstructionSequence.compile("100 + (14 < 0 ? -1 : +1)")
302+
iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a)
303+
cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq)
304+
305+
assert_equal(<<~CFG, cfg.disasm)
306+
== cfg <compiled>
307+
block_0
308+
putobject 100
309+
putobject 14
310+
putobject_INT2FIX_0_
311+
opt_lt <calldata!mid:<, argc:1, ARGS_SIMPLE>
312+
branchunless 13
313+
# to: block_7, block_5
314+
block_5 # from: block_0
315+
putobject -1
316+
jump 14
317+
# to: block_8
318+
block_7 # from: block_0
319+
putobject_INT2FIX_1_
320+
# to: block_8
321+
block_8 # from: block_5, block_7
322+
opt_plus <calldata!mid:+, argc:1, ARGS_SIMPLE>
323+
leave
324+
# to: leaves
325+
CFG
326+
end
327+
328+
def test_dfg
329+
iseq = RubyVM::InstructionSequence.compile("100 + (14 < 0 ? -1 : +1)")
330+
iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a)
331+
cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq)
332+
dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg)
333+
334+
assert_equal(<<~DFG, dfg.disasm)
335+
== dfg <compiled>
336+
block_0
337+
putobject 100 # out: out_0
338+
putobject 14 # out: 3
339+
putobject_INT2FIX_0_ # out: 3
340+
opt_lt <calldata!mid:<, argc:1, ARGS_SIMPLE> # in: 1, 2; out: 4
341+
branchunless 13 # in: 3
342+
# to: block_7, block_5
343+
# out: 0
344+
block_5 # from: block_0
345+
# in: pass_0
346+
putobject -1 # out: out_0
347+
jump 14
348+
# to: block_8
349+
# out: pass_0, 5
350+
block_7 # from: block_0
351+
# in: pass_0
352+
putobject_INT2FIX_1_ # out: out_0
353+
# to: block_8
354+
# out: pass_0, 7
355+
block_8 # from: block_5, block_7
356+
# in: in_0, in_1
357+
opt_plus <calldata!mid:+, argc:1, ARGS_SIMPLE> # in: in_0, in_1; out: 9
358+
leave # in: 8
359+
# to: leaves
360+
DFG
361+
end
362+
300363
private
301364

302365
def assert_decompiles(expected, source)

0 commit comments

Comments
 (0)