Skip to content

Commit 0cfb78e

Browse files
committed
[ARM] MVE i1 splat
We needn't BFI each lane individually into a predicate register when each lane in the same. A simple sign extend and a vmsr will do. Differential Revision: https://reviews.llvm.org/D67653 llvm-svn: 372313
1 parent ec841cf commit 0cfb78e

File tree

2 files changed

+16
-35
lines changed

2 files changed

+16
-35
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

+13-1
Original file line numberDiff line numberDiff line change
@@ -6945,6 +6945,19 @@ static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG,
69456945
} else
69466946
return SDValue();
69476947

6948+
// If this is a single value copied into all lanes (a splat), we can just sign
6949+
// extend that single value
6950+
SDValue FirstOp = Op.getOperand(0);
6951+
if (!isa<ConstantSDNode>(FirstOp) &&
6952+
std::all_of(std::next(Op->op_begin()), Op->op_end(),
6953+
[&FirstOp](SDUse &U) {
6954+
return U.get().isUndef() || U.get() == FirstOp;
6955+
})) {
6956+
SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,
6957+
DAG.getValueType(MVT::i1));
6958+
return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);
6959+
}
6960+
69486961
// First create base with bits set where known
69496962
unsigned Bits32 = 0;
69506963
for (unsigned i = 0; i < NumElts; ++i) {
@@ -6957,7 +6970,6 @@ static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG,
69576970
}
69586971

69596972
// Add in unknown nodes
6960-
// FIXME: Handle splats of the same value better.
69616973
SDValue Base = DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT,
69626974
DAG.getConstant(Bits32, dl, MVT::i32));
69636975
for (unsigned i = 0; i < NumElts; ++i) {

llvm/test/CodeGen/Thumb2/mve-pred-build-var.ll

+3-34
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,10 @@ define arm_aapcs_vfpcc <4 x i32> @build_varN_v4i1(i32 %s, i32 %t, <4 x i32> %a,
4444
; CHECK-LABEL: build_varN_v4i1:
4545
; CHECK: @ %bb.0: @ %entry
4646
; CHECK-NEXT: cmp r0, r1
47-
; CHECK-NEXT: mov.w r1, #0
4847
; CHECK-NEXT: cset r0, lo
4948
; CHECK-NEXT: and r0, r0, #1
5049
; CHECK-NEXT: rsbs r0, r0, #0
51-
; CHECK-NEXT: bfi r1, r0, #0, #4
52-
; CHECK-NEXT: bfi r1, r0, #4, #4
53-
; CHECK-NEXT: bfi r1, r0, #8, #4
54-
; CHECK-NEXT: bfi r1, r0, #12, #4
55-
; CHECK-NEXT: vmsr p0, r1
50+
; CHECK-NEXT: vmsr p0, r0
5651
; CHECK-NEXT: vpsel q0, q0, q1
5752
; CHECK-NEXT: bx lr
5853
entry:
@@ -106,19 +101,10 @@ define arm_aapcs_vfpcc <8 x i16> @build_varN_v8i1(i32 %s, i32 %t, <8 x i16> %a,
106101
; CHECK-LABEL: build_varN_v8i1:
107102
; CHECK: @ %bb.0: @ %entry
108103
; CHECK-NEXT: cmp r0, r1
109-
; CHECK-NEXT: mov.w r1, #0
110104
; CHECK-NEXT: cset r0, lo
111105
; CHECK-NEXT: and r0, r0, #1
112106
; CHECK-NEXT: rsbs r0, r0, #0
113-
; CHECK-NEXT: bfi r1, r0, #0, #2
114-
; CHECK-NEXT: bfi r1, r0, #2, #2
115-
; CHECK-NEXT: bfi r1, r0, #4, #2
116-
; CHECK-NEXT: bfi r1, r0, #6, #2
117-
; CHECK-NEXT: bfi r1, r0, #8, #2
118-
; CHECK-NEXT: bfi r1, r0, #10, #2
119-
; CHECK-NEXT: bfi r1, r0, #12, #2
120-
; CHECK-NEXT: bfi r1, r0, #14, #2
121-
; CHECK-NEXT: vmsr p0, r1
107+
; CHECK-NEXT: vmsr p0, r0
122108
; CHECK-NEXT: vpsel q0, q0, q1
123109
; CHECK-NEXT: bx lr
124110
entry:
@@ -172,27 +158,10 @@ define arm_aapcs_vfpcc <16 x i8> @build_varN_v16i1(i32 %s, i32 %t, <16 x i8> %a,
172158
; CHECK-LABEL: build_varN_v16i1:
173159
; CHECK: @ %bb.0: @ %entry
174160
; CHECK-NEXT: cmp r0, r1
175-
; CHECK-NEXT: mov.w r1, #0
176161
; CHECK-NEXT: cset r0, lo
177162
; CHECK-NEXT: and r0, r0, #1
178163
; CHECK-NEXT: rsbs r0, r0, #0
179-
; CHECK-NEXT: bfi r1, r0, #0, #1
180-
; CHECK-NEXT: bfi r1, r0, #1, #1
181-
; CHECK-NEXT: bfi r1, r0, #2, #1
182-
; CHECK-NEXT: bfi r1, r0, #3, #1
183-
; CHECK-NEXT: bfi r1, r0, #4, #1
184-
; CHECK-NEXT: bfi r1, r0, #5, #1
185-
; CHECK-NEXT: bfi r1, r0, #6, #1
186-
; CHECK-NEXT: bfi r1, r0, #7, #1
187-
; CHECK-NEXT: bfi r1, r0, #8, #1
188-
; CHECK-NEXT: bfi r1, r0, #9, #1
189-
; CHECK-NEXT: bfi r1, r0, #10, #1
190-
; CHECK-NEXT: bfi r1, r0, #11, #1
191-
; CHECK-NEXT: bfi r1, r0, #12, #1
192-
; CHECK-NEXT: bfi r1, r0, #13, #1
193-
; CHECK-NEXT: bfi r1, r0, #14, #1
194-
; CHECK-NEXT: bfi r1, r0, #15, #1
195-
; CHECK-NEXT: vmsr p0, r1
164+
; CHECK-NEXT: vmsr p0, r0
196165
; CHECK-NEXT: vpsel q0, q0, q1
197166
; CHECK-NEXT: bx lr
198167
entry:

0 commit comments

Comments
 (0)