Skip to content

Commit 7e6e4d1

Browse files
committed
Build a data flow graph
1 parent 33d36ed commit 7e6e4d1

File tree

3 files changed

+215
-1
lines changed

3 files changed

+215
-1
lines changed

lib/syntax_tree.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
require_relative "syntax_tree/yarv/bf"
3333
require_relative "syntax_tree/yarv/compiler"
3434
require_relative "syntax_tree/yarv/control_flow_graph"
35+
require_relative "syntax_tree/yarv/data_flow_graph"
3536
require_relative "syntax_tree/yarv/decompiler"
3637
require_relative "syntax_tree/yarv/disassembler"
3738
require_relative "syntax_tree/yarv/instruction_sequence"

lib/syntax_tree/yarv/control_flow_graph.rb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,6 @@ def self.compile(iseq)
130130

131131
def disasm
132132
fmt = Disassembler.new
133-
134133
output = StringIO.new
135134
output.puts "== cfg #{iseq.name}"
136135

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
# frozen_string_literal: true
2+
3+
module SyntaxTree
4+
module YARV
5+
# Constructs a data-flow-graph of a YARV instruction sequence, via a
6+
# control-flow-graph. Data flow is discovered locally and then globally. The
7+
# graph only considers data flow through the stack - local variables and
8+
# objects are considered fully escaped in this analysis.
9+
class DataFlowGraph
10+
# This object represents the flow of data between instructions.
11+
class DataFlow
12+
attr_reader :in
13+
attr_reader :out
14+
15+
def initialize
16+
@in = []
17+
@out = []
18+
end
19+
end
20+
21+
attr_reader :cfg, :insn_flows, :block_flows
22+
23+
def initialize(cfg, insn_flows, block_flows)
24+
@cfg = cfg
25+
@insn_flows = insn_flows
26+
@block_flows = block_flows
27+
end
28+
29+
def self.compile(cfg)
30+
# First, create a data structure to encode data flow between
31+
# instructions.
32+
insn_flows = {}
33+
cfg.insns.each_with_index do |insn, index|
34+
insn_flows[index] = DataFlow.new
35+
end
36+
37+
# Next, create a data structure to encode data flow between basic
38+
# blocks.
39+
block_flows = {}
40+
cfg.blocks.each do |block|
41+
block_flows[block.block_start] = DataFlow.new
42+
end
43+
44+
# Now, discover the data flow within each basic block. Using an abstract
45+
# stack, connect from consumers of data to the producers of that data.
46+
cfg.blocks.each do |block|
47+
block_flow = block_flows.fetch(block.block_start)
48+
49+
stack = []
50+
stack_initial_depth = 0
51+
52+
# Go through each instruction in the block...
53+
block.insns.each.with_index(block.block_start) do |insn, index|
54+
insn_flow = insn_flows[index]
55+
56+
# How many values will be missing from the local stack to run this
57+
# instruction?
58+
missing_stack_values = insn.pops - stack.size
59+
60+
# For every value the instruction pops off the stack...
61+
insn.pops.times do
62+
# Was the value it pops off from another basic block?
63+
if stack.empty?
64+
# This is a basic block argument.
65+
name = :"in_#{missing_stack_values - 1}"
66+
67+
insn_flow.in.unshift(name)
68+
block_flow.in.unshift(name)
69+
70+
stack_initial_depth += 1
71+
missing_stack_values -= 1
72+
else
73+
# Connect this consumer to the producer of the value.
74+
insn_flow.in.unshift(stack.pop)
75+
end
76+
end
77+
78+
# Record on our abstract stack that this instruction pushed
79+
# this value onto the stack.
80+
insn.pushes.times { stack << index }
81+
end
82+
83+
# Values that are left on the stack after going through all
84+
# instructions are arguments to the basic block that we jump to.
85+
stack.reverse_each.with_index do |producer, index|
86+
block_flow.out << producer
87+
insn_flows[producer].out << :"out_#{index}"
88+
end
89+
end
90+
91+
# Go backwards and connect from producers to consumers.
92+
cfg.insns.each_with_index do |insn, index|
93+
# For every instruction that produced a value used in this
94+
# instruction...
95+
insn_flows[index].in.each do |producer|
96+
# If it's actually another instruction and not a basic block
97+
# argument...
98+
if producer.is_a?(Integer)
99+
# Record in the producing instruction that it produces a value
100+
# used by this construction.
101+
insn_flows[producer].out << index
102+
end
103+
end
104+
end
105+
106+
# Now, discover the data flow between basic blocks.
107+
stack = [*cfg.blocks]
108+
until stack.empty?
109+
succ = stack.pop
110+
succ_flow = block_flows.fetch(succ.block_start)
111+
succ.preds.each do |pred|
112+
pred_flow = block_flows.fetch(pred.block_start)
113+
114+
# Does a predecessor block have fewer outputs than the successor
115+
# has inputs?
116+
if pred_flow.out.size < succ_flow.in.size
117+
# If so then add arguments to pass data through from the
118+
# predecessor's predecessors.
119+
(succ_flow.in.size - pred_flow.out.size).times do |index|
120+
name = :"pass_#{index}"
121+
pred_flow.in.unshift(name)
122+
pred_flow.out.unshift(name)
123+
end
124+
125+
# Since we modified the predecessor, add it back to the worklist
126+
# so it'll be considered as a successor again, and propogate the
127+
# global data flow back up the control flow graph.
128+
stack << pred
129+
end
130+
end
131+
end
132+
133+
# Verify that we constructed the data flow graph correctly. Check that
134+
# the first block has no arguments.
135+
raise unless block_flows.fetch(cfg.blocks.first.block_start).in.empty?
136+
137+
# Check all control flow edges between blocks pass the right number of
138+
# arguments.
139+
cfg.blocks.each do |pred|
140+
pred_flow = block_flows.fetch(pred.block_start)
141+
142+
if pred.succs.empty?
143+
# With no successors, there should be no output arguments.
144+
raise unless pred_flow.out.empty?
145+
else
146+
# Check with successor...
147+
pred.succs.each do |succ|
148+
succ_flow = block_flows.fetch(succ.block_start)
149+
150+
# The predecessor should have as many output arguments as the
151+
# success has input arguments.
152+
raise unless pred_flow.out.size == succ_flow.in.size
153+
end
154+
end
155+
end
156+
157+
# Finally we can return the data flow graph.
158+
new(cfg, insn_flows, block_flows)
159+
end
160+
161+
def disasm
162+
fmt = Disassembler.new
163+
output = StringIO.new
164+
output.puts "== dfg #{cfg.iseq.name}"
165+
166+
cfg.blocks.each do |block|
167+
output.print(block.id)
168+
unless block.preds.empty?
169+
output.print(" # from: #{block.preds.map(&:id).join(", ")}")
170+
end
171+
output.puts
172+
173+
block_flow = block_flows.fetch(block.block_start)
174+
unless block_flow.in.empty?
175+
output.puts " # in: #{block_flow.in.join(", ")}"
176+
end
177+
178+
block.insns.each.with_index(block.block_start) do |insn, index|
179+
output.print(" ")
180+
output.print(insn.disasm(fmt))
181+
182+
insn_flow = insn_flows[index]
183+
if insn_flow.in.empty? && insn_flow.out.empty?
184+
output.puts
185+
next
186+
end
187+
188+
output.print(" # ")
189+
unless insn_flow.in.empty?
190+
output.print("in: #{insn_flow.in.join(", ")}")
191+
output.print("; ") unless insn_flow.out.empty?
192+
end
193+
194+
unless insn_flow.out.empty?
195+
output.print("out: #{insn_flow.out.join(", ")}")
196+
end
197+
198+
output.puts
199+
end
200+
201+
succs = block.succs.map(&:id)
202+
succs << "leaves" if block.last.leaves?
203+
output.puts(" # to: #{succs.join(", ")}") unless succs.empty?
204+
205+
unless block_flow.out.empty?
206+
output.puts " # out: #{block_flow.out.join(", ")}"
207+
end
208+
end
209+
210+
output.string
211+
end
212+
end
213+
end
214+
end

0 commit comments

Comments
 (0)