Skip to content

Commit 01e64c9

Browse files
author
Whitney Tsang
committed
[LoopFusion] Update second loop guard non loop successor phis incoming
blocks. Summary: The current LoopFusion forget to update the incoming block of the phis in second loop guard non loop successor from second loop guard block to first loop guard block. A test case is provided to better understand the problem. Reviewed By: jdoerfert Subscribers: hiraditya, llvm-commits Tag: LLVM Differential Revision: https://reviews.llvm.org/D81421
1 parent a27d385 commit 01e64c9

File tree

2 files changed

+60
-0
lines changed

2 files changed

+60
-0
lines changed

llvm/lib/Transforms/Scalar/LoopFuse.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1385,6 +1385,7 @@ struct LoopFuser {
13851385
// Thus, one path from the guard goes to the preheader for FC0 (and thus
13861386
// executes the new fused loop) and the other path goes to the NonLoopBlock
13871387
// for FC1 (where FC1 guard would have gone if FC1 was not executed).
1388+
FC1NonLoopBlock->replacePhiUsesWith(FC1GuardBlock, FC0GuardBlock);
13881389
FC0.GuardBranch->replaceUsesOfWith(FC0NonLoopBlock, FC1NonLoopBlock);
13891390
FC0.ExitBlock->getTerminator()->replaceUsesOfWith(FC1GuardBlock,
13901391
FC1.Header);

llvm/test/Transforms/LoopFusion/guarded.ll

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,3 +232,62 @@ for.second.exit:
232232
for.end:
233233
ret void
234234
}
235+
236+
; Test that the incoming block of `%j.lcssa` is updated correctly
237+
; from for.second.guard to for.first.guard, and the two loops for.first and
238+
; for.second are fused.
239+
240+
; CHECK: i64 @updatephi_guardnonloopblock
241+
; CHECK-LABEL: for.first.guard:
242+
; CHECK-NEXT: %cmp.guard = icmp slt i64 0, %N
243+
; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end
244+
; CHECK-LABEL: for.first.preheader:
245+
; CHECK-NEXT: br label %for.first
246+
; CHECK-LABEL: for.first:
247+
; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit
248+
; CHECK-LABEL: for.second.exit:
249+
; CHECK-NEXT: br label %for.end
250+
; CHECK-LABEL: for.end:
251+
; CHECK-NEXT: %j.lcssa = phi i64 [ 0, %for.first.guard ], [ %j.02, %for.second.exit ]
252+
; CHECK-NEXT: ret i64 %j.lcssa
253+
254+
define i64 @updatephi_guardnonloopblock(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) {
255+
for.first.guard:
256+
%cmp.guard = icmp slt i64 0, %N
257+
br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
258+
259+
for.first.preheader:
260+
br label %for.first
261+
262+
for.first:
263+
%i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
264+
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04
265+
store i32 0, i32* %arrayidx, align 4
266+
%inc = add nsw i64 %i.04, 1
267+
%cmp = icmp slt i64 %inc, %N
268+
br i1 %cmp, label %for.first, label %for.first.exit
269+
270+
for.first.exit:
271+
br label %for.second.guard
272+
273+
for.second.guard:
274+
br i1 %cmp.guard, label %for.second.preheader, label %for.end
275+
276+
for.second.preheader:
277+
br label %for.second
278+
279+
for.second:
280+
%j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
281+
%arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02
282+
store i32 0, i32* %arrayidx4, align 4
283+
%inc6 = add nsw i64 %j.02, 1
284+
%cmp.j = icmp slt i64 %inc6, %N
285+
br i1 %cmp.j, label %for.second, label %for.second.exit
286+
287+
for.second.exit:
288+
br label %for.end
289+
290+
for.end:
291+
%j.lcssa = phi i64 [ 0, %for.second.guard ], [ %j.02, %for.second.exit ]
292+
ret i64 %j.lcssa
293+
}

0 commit comments

Comments
 (0)