1+ # frozen_string_literal: true
2+
3+ module SyntaxTree
4+ module YARV
5+ # Constructs a data-flow-graph of a YARV instruction sequence, via a
6+ # control-flow-graph. Data flow is discovered locally and then globally. The
7+ # graph only considers data flow through the stack - local variables and
8+ # objects are considered fully escaped in this analysis.
9+ class DataFlowGraph
10+ # This object represents the flow of data between instructions.
11+ class DataFlow
12+ attr_reader :in
13+ attr_reader :out
14+
15+ def initialize
16+ @in = [ ]
17+ @out = [ ]
18+ end
19+ end
20+
21+ attr_reader :cfg , :insn_flows , :block_flows
22+
23+ def initialize ( cfg , insn_flows , block_flows )
24+ @cfg = cfg
25+ @insn_flows = insn_flows
26+ @block_flows = block_flows
27+ end
28+
29+ def self . compile ( cfg )
30+ # First, create a data structure to encode data flow between
31+ # instructions.
32+ insn_flows = { }
33+ cfg . insns . each_with_index do |insn , index |
34+ insn_flows [ index ] = DataFlow . new
35+ end
36+
37+ # Next, create a data structure to encode data flow between basic
38+ # blocks.
39+ block_flows = { }
40+ cfg . blocks . each do |block |
41+ block_flows [ block . block_start ] = DataFlow . new
42+ end
43+
44+ # Now, discover the data flow within each basic block. Using an abstract
45+ # stack, connect from consumers of data to the producers of that data.
46+ cfg . blocks . each do |block |
47+ block_flow = block_flows . fetch ( block . block_start )
48+
49+ stack = [ ]
50+ stack_initial_depth = 0
51+
52+ # Go through each instruction in the block...
53+ block . insns . each . with_index ( block . block_start ) do |insn , index |
54+ insn_flow = insn_flows [ index ]
55+
56+ # How many values will be missing from the local stack to run this
57+ # instruction?
58+ missing_stack_values = insn . pops - stack . size
59+
60+ # For every value the instruction pops off the stack...
61+ insn . pops . times do
62+ # Was the value it pops off from another basic block?
63+ if stack . empty?
64+ # This is a basic block argument.
65+ name = :"in_#{ missing_stack_values - 1 } "
66+
67+ insn_flow . in . unshift ( name )
68+ block_flow . in . unshift ( name )
69+
70+ stack_initial_depth += 1
71+ missing_stack_values -= 1
72+ else
73+ # Connect this consumer to the producer of the value.
74+ insn_flow . in . unshift ( stack . pop )
75+ end
76+ end
77+
78+ # Record on our abstract stack that this instruction pushed
79+ # this value onto the stack.
80+ insn . pushes . times { stack << index }
81+ end
82+
83+ # Values that are left on the stack after going through all
84+ # instructions are arguments to the basic block that we jump to.
85+ stack . reverse_each . with_index do |producer , index |
86+ block_flow . out << producer
87+ insn_flows [ producer ] . out << :"out_#{ index } "
88+ end
89+ end
90+
91+ # Go backwards and connect from producers to consumers.
92+ cfg . insns . each_with_index do |insn , index |
93+ # For every instruction that produced a value used in this
94+ # instruction...
95+ insn_flows [ index ] . in . each do |producer |
96+ # If it's actually another instruction and not a basic block
97+ # argument...
98+ if producer . is_a? ( Integer )
99+ # Record in the producing instruction that it produces a value
100+ # used by this construction.
101+ insn_flows [ producer ] . out << index
102+ end
103+ end
104+ end
105+
106+ # Now, discover the data flow between basic blocks.
107+ stack = [ *cfg . blocks ]
108+ until stack . empty?
109+ succ = stack . pop
110+ succ_flow = block_flows . fetch ( succ . block_start )
111+ succ . preds . each do |pred |
112+ pred_flow = block_flows . fetch ( pred . block_start )
113+
114+ # Does a predecessor block have fewer outputs than the successor
115+ # has inputs?
116+ if pred_flow . out . size < succ_flow . in . size
117+ # If so then add arguments to pass data through from the
118+ # predecessor's predecessors.
119+ ( succ_flow . in . size - pred_flow . out . size ) . times do |index |
120+ name = :"pass_#{ index } "
121+ pred_flow . in . unshift ( name )
122+ pred_flow . out . unshift ( name )
123+ end
124+
125+ # Since we modified the predecessor, add it back to the worklist
126+ # so it'll be considered as a successor again, and propogate the
127+ # global data flow back up the control flow graph.
128+ stack << pred
129+ end
130+ end
131+ end
132+
133+ # Verify that we constructed the data flow graph correctly. Check that
134+ # the first block has no arguments.
135+ raise unless block_flows . fetch ( cfg . blocks . first . block_start ) . in . empty?
136+
137+ # Check all control flow edges between blocks pass the right number of
138+ # arguments.
139+ cfg . blocks . each do |pred |
140+ pred_flow = block_flows . fetch ( pred . block_start )
141+
142+ if pred . succs . empty?
143+ # With no successors, there should be no output arguments.
144+ raise unless pred_flow . out . empty?
145+ else
146+ # Check with successor...
147+ pred . succs . each do |succ |
148+ succ_flow = block_flows . fetch ( succ . block_start )
149+
150+ # The predecessor should have as many output arguments as the
151+ # success has input arguments.
152+ raise unless pred_flow . out . size == succ_flow . in . size
153+ end
154+ end
155+ end
156+
157+ # Finally we can return the data flow graph.
158+ new ( cfg , insn_flows , block_flows )
159+ end
160+
161+ def disasm
162+ fmt = Disassembler . new
163+ output = StringIO . new
164+ output . puts "== dfg #{ cfg . iseq . name } "
165+
166+ cfg . blocks . each do |block |
167+ output . print ( block . id )
168+ unless block . preds . empty?
169+ output . print ( " # from: #{ block . preds . map ( &:id ) . join ( ", " ) } " )
170+ end
171+ output . puts
172+
173+ block_flow = block_flows . fetch ( block . block_start )
174+ unless block_flow . in . empty?
175+ output . puts " # in: #{ block_flow . in . join ( ", " ) } "
176+ end
177+
178+ block . insns . each . with_index ( block . block_start ) do |insn , index |
179+ output . print ( " " )
180+ output . print ( insn . disasm ( fmt ) )
181+
182+ insn_flow = insn_flows [ index ]
183+ if insn_flow . in . empty? && insn_flow . out . empty?
184+ output . puts
185+ next
186+ end
187+
188+ output . print ( " # " )
189+ unless insn_flow . in . empty?
190+ output . print ( "in: #{ insn_flow . in . join ( ", " ) } " )
191+ output . print ( "; " ) unless insn_flow . out . empty?
192+ end
193+
194+ unless insn_flow . out . empty?
195+ output . print ( "out: #{ insn_flow . out . join ( ", " ) } " )
196+ end
197+
198+ output . puts
199+ end
200+
201+ succs = block . succs . map ( &:id )
202+ succs << "leaves" if block . last . leaves?
203+ output . puts ( " # to: #{ succs . join ( ", " ) } " ) unless succs . empty?
204+
205+ unless block_flow . out . empty?
206+ output . puts " # out: #{ block_flow . out . join ( ", " ) } "
207+ end
208+ end
209+
210+ output . string
211+ end
212+ end
213+ end
214+ end
0 commit comments