Skip to content

Commit 13eb890

Browse files
author
Pierre-vh
committed
[Target][ARM] Fix VPT Block Pass miscompilation
The pass was incorrectly reverting back to a "T" when something wrote to VPR inside a "E" block. This is not the correct behaviour, the predicate should stay the same. Differential Revision: https://reviews.llvm.org/D77798
1 parent 4563024 commit 13eb890

File tree

3 files changed

+76
-38
lines changed

3 files changed

+76
-38
lines changed

llvm/lib/Target/ARM/MVEVPTBlockPass.cpp

+7-16
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
194194

195195
// Remove VPNOTs while there's still room in the block, so we can make the
196196
// largest block possible.
197-
ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Then;
197+
ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Else;
198198
while (BlockSize < 4 && Iter != EndIter &&
199199
Iter->getOpcode() == ARM::MVE_VPNOT) {
200200

@@ -222,28 +222,19 @@ CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
222222
DeadInstructions.push_back(&*Iter);
223223
++Iter;
224224

225-
// Replace "then" by "elses" in the block until we find an instruction that
226-
// defines VPR, then after that leave everything to "t".
225+
// Replace the predicates of the instructions we're adding.
227226
// Note that we are using "Iter" to iterate over the block so we can update
228227
// it at the same time.
229-
bool ChangeToElse = (CurrentPredicate == ARMVCC::Then);
230228
for (; Iter != VPNOTBlockEndIter; ++Iter) {
231229
// Find the register in which the predicate is
232230
int OpIdx = findFirstVPTPredOperandIdx(*Iter);
233231
assert(OpIdx != -1);
234232

235-
// Update the mask + change the predicate to an else if needed.
236-
if (ChangeToElse) {
237-
// Change the predicate and update the mask
238-
Iter->getOperand(OpIdx).setImm(ARMVCC::Else);
239-
BlockMask = ExpandBlockMask(BlockMask, ARMVCC::Else);
240-
// Reset back to a "then" predicate if this instruction defines VPR.
241-
if (Iter->definesRegister(ARM::VPR))
242-
ChangeToElse = false;
243-
} else
244-
BlockMask = ExpandBlockMask(BlockMask, ARMVCC::Then);
245-
246-
LLVM_DEBUG(dbgs() << " adding: "; Iter->dump());
233+
// Change the predicate and update the mask
234+
Iter->getOperand(OpIdx).setImm(CurrentPredicate);
235+
BlockMask = ExpandBlockMask(BlockMask, CurrentPredicate);
236+
237+
LLVM_DEBUG(dbgs() << " adding : "; Iter->dump());
247238
}
248239

249240
CurrentPredicate =

llvm/test/CodeGen/Thumb2/mve-pred-not.ll

+43-13
Original file line numberDiff line numberDiff line change
@@ -405,12 +405,42 @@ declare <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>,
405405
define arm_aapcs_vfpcc <4 x i32> @vpttet_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
406406
; CHECK-LABEL: vpttet_v4i1:
407407
; CHECK: @ %bb.0: @ %entry
408+
; CHECK-NEXT: .pad #4
409+
; CHECK-NEXT: sub sp, #4
410+
; CHECK-NEXT: vcmp.s32 ge, q0, q2
411+
; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
412+
; CHECK-NEXT: vpstt
413+
; CHECK-NEXT: vmovt q0, q2
414+
; CHECK-NEXT: vmovt q0, q2
415+
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
416+
; CHECK-NEXT: vpnot
417+
; CHECK-NEXT: vpst
418+
; CHECK-NEXT: vmovt q0, q2
419+
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
420+
; CHECK-NEXT: vpst
421+
; CHECK-NEXT: vmovt q0, q2
422+
; CHECK-NEXT: add sp, #4
423+
; CHECK-NEXT: bx lr
424+
entry:
425+
%0 = icmp sge <4 x i32> %x, %z
426+
%1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %0, <4 x i32> %x)
427+
%2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %0, <4 x i32> %1)
428+
%3 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true>
429+
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %3, <4 x i32> %2)
430+
%5 = xor <4 x i1> %3, <i1 true, i1 true, i1 true, i1 true>
431+
%6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %5, <4 x i32> %4)
432+
ret <4 x i32> %6
433+
}
434+
435+
define arm_aapcs_vfpcc <4 x i32> @vpttee_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
436+
; CHECK-LABEL: vpttee_v4i1:
437+
; CHECK: @ %bb.0: @ %entry
408438
; CHECK-NEXT: vmov q3, q2
409-
; CHECK-NEXT: vpttet.s32 ge, q0, q2
439+
; CHECK-NEXT: vpttee.s32 ge, q0, q2
410440
; CHECK-NEXT: vmaxt.s32 q3, q0, q1
411441
; CHECK-NEXT: vcmpt.s32 gt, q0, zr
412-
; CHECK-NEXT: vcmpe.s32 gt, q1, zr
413-
; CHECK-NEXT: vmovt q3, q2
442+
; CHECK-NEXT: vmove q3, q2
443+
; CHECK-NEXT: vmove q3, q2
414444
; CHECK-NEXT: vmov q0, q3
415445
; CHECK-NEXT: bx lr
416446
entry:
@@ -419,20 +449,19 @@ entry:
419449
%2 = icmp sgt <4 x i32> %x, zeroinitializer
420450
%3 = and <4 x i1> %0, %2
421451
%4 = xor <4 x i1> %3, <i1 true, i1 true, i1 true, i1 true>
422-
%5 = icmp sgt <4 x i32> %y, zeroinitializer
423-
%6 = and <4 x i1> %5, %4
424-
%7 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %6, <4 x i32> %1)
425-
ret <4 x i32> %7
452+
%5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %1)
453+
%6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %5)
454+
ret <4 x i32> %6
426455
}
427456

428-
define arm_aapcs_vfpcc <4 x i32> @vpttee_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
429-
; CHECK-LABEL: vpttee_v4i1:
457+
define arm_aapcs_vfpcc <4 x i32> @vpttee2_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
458+
; CHECK-LABEL: vpttee2_v4i1:
430459
; CHECK: @ %bb.0: @ %entry
431460
; CHECK-NEXT: vmov q3, q2
432461
; CHECK-NEXT: vpttee.s32 ge, q0, q2
433462
; CHECK-NEXT: vmaxt.s32 q3, q0, q1
434463
; CHECK-NEXT: vcmpt.s32 gt, q0, zr
435-
; CHECK-NEXT: vmove q3, q2
464+
; CHECK-NEXT: vcmpe.s32 gt, q1, zr
436465
; CHECK-NEXT: vmove q3, q2
437466
; CHECK-NEXT: vmov q0, q3
438467
; CHECK-NEXT: bx lr
@@ -442,9 +471,10 @@ entry:
442471
%2 = icmp sgt <4 x i32> %x, zeroinitializer
443472
%3 = and <4 x i1> %0, %2
444473
%4 = xor <4 x i1> %3, <i1 true, i1 true, i1 true, i1 true>
445-
%5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %1)
446-
%6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %5)
447-
ret <4 x i32> %6
474+
%5 = icmp sgt <4 x i32> %y, zeroinitializer
475+
%6 = and <4 x i1> %5, %4
476+
%7 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %6, <4 x i32> %1)
477+
ret <4 x i32> %7
448478
}
449479

450480
define arm_aapcs_vfpcc <4 x i32> @vpttte_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {

llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir

+26-9
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,11 @@ body: |
6969
; CHECK: liveins: $q0, $q1, $q2
7070
; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
7171
; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit killed $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr {
72-
; CHECK: MVE_VPTv4s32 5, renamable $q0, renamable $q2, 10, implicit-def $vpr
72+
; CHECK: MVE_VPTv4s32 7, renamable $q0, renamable $q2, 10, implicit-def $vpr
7373
; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
7474
; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, internal killed renamable $vpr
7575
; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr
76-
; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal renamable $q3
76+
; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal renamable $q3
7777
; CHECK: }
7878
; CHECK: $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0
7979
; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
@@ -87,20 +87,20 @@ body: |
8787
; CHECK: $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0
8888
; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
8989
; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr {
90-
; CHECK: MVE_VPTv4s32 13, renamable $q0, renamable $q2, 10, implicit-def $vpr
90+
; CHECK: MVE_VPTv4s32 15, renamable $q0, renamable $q2, 10, implicit-def $vpr
9191
; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
9292
; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3
9393
; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr
94-
; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3
94+
; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3
9595
; CHECK: }
9696
; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
9797
; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
9898
; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr {
99-
; CHECK: MVE_VPTv4s32 9, renamable $q0, renamable $q2, 10, implicit-def $vpr
99+
; CHECK: MVE_VPTv4s32 15, renamable $q0, renamable $q2, 10, implicit-def $vpr
100100
; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
101101
; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr
102-
; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, internal renamable $q3
103-
; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3
102+
; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3
103+
; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3
104104
; CHECK: }
105105
; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
106106
; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
@@ -122,10 +122,10 @@ body: |
122122
; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
123123
; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
124124
; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr {
125-
; CHECK: MVE_VPTv4s32 10, renamable $q0, renamable $q2, 10, implicit-def $vpr
125+
; CHECK: MVE_VPTv4s32 14, renamable $q0, renamable $q2, 10, implicit-def $vpr
126126
; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3
127127
; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr
128-
; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3
128+
; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3
129129
; CHECK: }
130130
; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
131131
; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
@@ -145,6 +145,14 @@ body: |
145145
; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal killed renamable $q3
146146
; CHECK: }
147147
; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
148+
; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
149+
; CHECK: BUNDLE implicit-def $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit killed $q3 {
150+
; CHECK: MVE_VPTv4s32 13, renamable $q0, renamable $q2, 10, implicit-def $vpr
151+
; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, killed renamable $q3
152+
; CHECK: renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 11, 2, internal killed renamable $vpr
153+
; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal killed renamable $q3
154+
; CHECK: renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 11, 1, internal killed renamable $vpr
155+
; CHECK: }
148156
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0
149157
renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg
150158
$q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
@@ -226,6 +234,15 @@ body: |
226234
renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3
227235
$q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0
228236
237+
$q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3
238+
renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg
239+
renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3
240+
renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
241+
renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 11, 1, killed renamable $vpr
242+
renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3
243+
renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg
244+
renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 11, 1, killed renamable $vpr
245+
229246
tBX_RET 14, $noreg, implicit $q0
230247
231248
...

0 commit comments

Comments
 (0)