Skip to content

Commit 9417f7f

Browse files
committed
[X86] - Avoid SFB pass - fix bug in updating the offsets for newly created copies
Change-Id: I169ab6fe7e187727c0298c2a1e2868a683f3e688 llvm-svn: 332849
1 parent 1b14a3a commit 9417f7f

File tree

2 files changed

+109
-2
lines changed

2 files changed

+109
-2
lines changed

llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -568,8 +568,8 @@ void X86AvoidSFBPass::breakBlockedCopies(
568568
const DisplacementSizeMap &BlockingStoresDispSizeMap) {
569569
int64_t LdDispImm = getDispOperand(LoadInst).getImm();
570570
int64_t StDispImm = getDispOperand(StoreInst).getImm();
571-
int64_t LMMOffset = (*LoadInst->memoperands_begin())->getOffset();
572-
int64_t SMMOffset = (*StoreInst->memoperands_begin())->getOffset();
571+
int64_t LMMOffset = 0;
572+
int64_t SMMOffset = 0;
573573

574574
int64_t LdDisp1 = LdDispImm;
575575
int64_t LdDisp2 = 0;
+107
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# RUN: llc -o - %s -mtriple=x86_64-- -run-pass=x86-avoid-SFB | FileCheck %s
2+
--- |
3+
; ModuleID = '../test50419-2.ll'
4+
source_filename = "nice.c"
5+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
6+
target triple = "x86_64-unknown-linux-gnu"
7+
8+
@.str = private unnamed_addr constant [3 x i8] c"%u\00", align 1
9+
10+
define i32 @test_offset() #0 {
11+
entry:
12+
%a = alloca [36 x i32], align 16
13+
%z = alloca [36 x i32], align 16
14+
%0 = bitcast [36 x i32]* %z to i8*
15+
%scevgep = getelementptr inbounds [36 x i32], [36 x i32]* %a, i64 0, i64 1
16+
%scevgep40 = bitcast i32* %scevgep to i8*
17+
%arrayidx.9 = getelementptr inbounds [36 x i32], [36 x i32]* %a, i64 0, i64 9
18+
%1 = load i32, i32* %arrayidx.9, align 4
19+
%add.9 = add i32 %1, 9
20+
store i32 %add.9, i32* %arrayidx.9, align 4
21+
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 %0, i8* nonnull align 4 %scevgep40, i64 136, i1 false)
22+
ret i32 %1
23+
}
24+
25+
; Function Attrs: argmemonly nounwind
26+
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
27+
28+
; Function Attrs: nounwind
29+
declare void @llvm.stackprotector(i8*, i8**) #2
30+
31+
attributes #0 = { "target-cpu"="core-avx2" }
32+
attributes #1 = { argmemonly nounwind "target-cpu"="core-avx2" }
33+
attributes #2 = { nounwind }
34+
35+
...
36+
---
37+
name: test_offset
38+
alignment: 4
39+
exposesReturnsTwice: false
40+
legalized: false
41+
regBankSelected: false
42+
selected: false
43+
failedISel: false
44+
tracksRegLiveness: true
45+
registers:
46+
- { id: 0, class: gr32, preferred-register: '' }
47+
- { id: 1, class: gr32, preferred-register: '' }
48+
- { id: 2, class: vr256, preferred-register: '' }
49+
- { id: 3, class: vr256, preferred-register: '' }
50+
- { id: 4, class: vr256, preferred-register: '' }
51+
- { id: 5, class: gr64, preferred-register: '' }
52+
- { id: 6, class: vr256, preferred-register: '' }
53+
liveins:
54+
frameInfo:
55+
isFrameAddressTaken: false
56+
isReturnAddressTaken: false
57+
hasStackMap: false
58+
hasPatchPoint: false
59+
stackSize: 0
60+
offsetAdjustment: 0
61+
maxAlignment: 16
62+
adjustsStack: false
63+
hasCalls: false
64+
stackProtector: ''
65+
maxCallFrameSize: 4294967295
66+
hasOpaqueSPAdjustment: false
67+
hasVAStart: false
68+
hasMustTailInVarArgFunc: false
69+
localFrameSize: 0
70+
savePoint: ''
71+
restorePoint: ''
72+
fixedStack:
73+
stack:
74+
- { id: 0, name: a, type: default, offset: 0, size: 144, alignment: 16,
75+
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
76+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
77+
- { id: 1, name: z, type: default, offset: 0, size: 144, alignment: 16,
78+
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
79+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
80+
constants:
81+
body: |
82+
bb.0.entry:
83+
%0:gr32 = MOV32rm %stack.0.a, 1, $noreg, 36, $noreg :: (dereferenceable load 4 from %ir.arrayidx.9)
84+
%1:gr32 = ADD32ri8 %0, 9, implicit-def dead $eflags
85+
MOV32mr %stack.0.a, 1, $noreg, 36, $noreg, killed %1 :: (store 4 into %ir.arrayidx.9)
86+
%2:vr256 = VMOVUPSYrm %stack.0.a, 1, $noreg, 4, $noreg :: (dereferenceable load 32 from %ir.scevgep40, align 4)
87+
VMOVUPSYmr %stack.1.z, 1, $noreg, 0, $noreg, killed %2 :: (store 32 into %ir.0, align 16)
88+
%3:vr256 = VMOVUPSYrm %stack.0.a, 1, $noreg, 68, $noreg :: (dereferenceable load 32 from %ir.scevgep40 + 64, align 4)
89+
VMOVUPSYmr %stack.1.z, 1, $noreg, 64, $noreg, killed %3 :: (store 32 into %ir.0 + 64, align 16)
90+
%4:vr256 = VMOVUPSYrm %stack.0.a, 1, $noreg, 100, $noreg :: (dereferenceable load 32 from %ir.scevgep40 + 96, align 4)
91+
VMOVUPSYmr %stack.1.z, 1, $noreg, 96, $noreg, killed %4 :: (store 32 into %ir.0 + 96, align 16)
92+
%5:gr64 = MOV64rm %stack.0.a, 1, $noreg, 132, $noreg :: (dereferenceable load 8 from %ir.scevgep40 + 128, align 4)
93+
MOV64mr %stack.1.z, 1, $noreg, 128, $noreg, killed %5 :: (store 8 into %ir.0 + 128, align 16)
94+
; CHECK: gr32 = MOV32rm %stack.0.a, 1, $noreg, 36, $noreg :: (dereferenceable load 4 from %ir.scevgep40 + 32)
95+
; CHECK-NEXT: MOV32mr %stack.1.z, 1, $noreg, 32, $noreg, killed %7 :: (store 4 into %ir.0 + 32, align 16)
96+
; CHECK-NEXT: %8:vr128 = VMOVUPSrm %stack.0.a, 1, $noreg, 40, $noreg :: (dereferenceable load 16 from %ir.scevgep40 + 36, align 4)
97+
; CHECK-NEXT: VMOVUPSmr %stack.1.z, 1, $noreg, 36, $noreg, killed %8 :: (store 16 into %ir.0 + 36)
98+
; CHECK-NEXT: %9:gr64 = MOV64rm %stack.0.a, 1, $noreg, 56, $noreg :: (dereferenceable load 8 from %ir.scevgep40 + 52, align 4)
99+
; CHECK-NEXT: MOV64mr %stack.1.z, 1, $noreg, 52, $noreg, killed %9 :: (store 8 into %ir.0 + 52, align 16)
100+
; CHECK-NEXT: %10:gr32 = MOV32rm %stack.0.a, 1, $noreg, 64, $noreg :: (dereferenceable load 4 from %ir.scevgep40 + 60)
101+
; CHECK-NEXT: MOV32mr %stack.1.z, 1, $noreg, 60, $noreg, killed %10 :: (store 4 into %ir.0 + 60, align 16)
102+
%6:vr256 = VMOVUPSYrm %stack.0.a, 1, $noreg, 36, $noreg :: (dereferenceable load 32 from %ir.scevgep40 + 32, align 4)
103+
VMOVUPSYmr %stack.1.z, 1, $noreg, 32, $noreg, killed %6 :: (store 32 into %ir.0 + 32, align 16)
104+
$eax = COPY %0
105+
RET 0, $eax
106+
107+
...

0 commit comments

Comments
 (0)