Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit cdbdfa2

Browse files
committed
[x86,SDAG] Introduce any- and sign-extend-vector-inreg nodes analogous
to the zero-extend-vector-inreg node introduced previously for the same purpose: manage the type legalization of widened extend operations, especially to support the experimental widening mode for x86. I'm adding both because sign-extend is expanded in terms of any-extend with shifts to propagate the sign bit. This removes the last fundamental scalarization from vec_cast2.ll (a test case that hit many really bad edge cases for widening legalization), although the trunc tests in that file still appear scalarized because the the shuffle legalization is scalarizing. Funny thing, I've been working on that. Some initial experiments with this and SSE2 scenarios is showing moderately good behavior already for sign extension. Still some work to do on the shuffle combining on X86 before we're generating optimal sequences, but avoiding scalarization is a huge step forward. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212714 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 35dda8a commit cdbdfa2

File tree

9 files changed

+165
-9
lines changed

9 files changed

+165
-9
lines changed

include/llvm/CodeGen/ISDOpcodes.h

+22
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,28 @@ namespace ISD {
379379
/// operand, a ValueType node.
380380
SIGN_EXTEND_INREG,
381381

382+
/// ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an
383+
/// in-register any-extension of the low lanes of an integer vector. The
384+
/// result type must have fewer elements than the operand type, and those
385+
/// elements must be larger integer types such that the total size of the
386+
/// operand type and the result type match. Each of the low operand
387+
/// elements is any-extended into the corresponding, wider result
388+
/// elements with the high bits becoming undef.
389+
ANY_EXTEND_VECTOR_INREG,
390+
391+
/// SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an
392+
/// in-register sign-extension of the low lanes of an integer vector. The
393+
/// result type must have fewer elements than the operand type, and those
394+
/// elements must be larger integer types such that the total size of the
395+
/// operand type and the result type match. Each of the low operand
396+
/// elements is sign-extended into the corresponding, wider result
397+
/// elements.
398+
// FIXME: The SIGN_EXTEND_INREG node isn't specifically limited to
399+
// scalars, but it also doesn't handle vectors well. Either it should be
400+
// restricted to scalars or this node (and its handling) should be merged
401+
// into it.
402+
SIGN_EXTEND_VECTOR_INREG,
403+
382404
/// ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an
383405
/// in-register zero-extension of the low lanes of an integer vector. The
384406
/// result type must have fewer elements than the operand type, and those

include/llvm/CodeGen/SelectionDAG.h

+12
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,18 @@ class SelectionDAG {
562562
/// value assuming it was the smaller SrcTy value.
563563
SDValue getZeroExtendInReg(SDValue Op, SDLoc DL, EVT SrcTy);
564564

565+
/// getAnyExtendVectorInReg - Return an operation which will any-extend the
566+
/// low lanes of the operand into the specified vector type. For example,
567+
/// this can convert a v16i8 into a v4i32 by any-extending the low four
568+
/// lanes of the operand from i8 to i32.
569+
SDValue getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT);
570+
571+
/// getSignExtendVectorInReg - Return an operation which will sign extend the
572+
/// low lanes of the operand into the specified vector type. For example,
573+
/// this can convert a v16i8 into a v4i32 by sign extending the low four
574+
/// lanes of the operand from i8 to i32.
575+
SDValue getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT);
576+
565577
/// getZeroExtendVectorInReg - Return an operation which will zero extend the
566578
/// low lanes of the operand into the specified vector type. For example,
567579
/// this can convert a v16i8 into a v4i32 by zero extending the low four

lib/CodeGen/SelectionDAG/LegalizeTypes.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -645,11 +645,11 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
645645
bool WidenVectorOperand(SDNode *N, unsigned OpNo);
646646
SDValue WidenVecOp_BITCAST(SDNode *N);
647647
SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
648+
SDValue WidenVecOp_EXTEND(SDNode *N);
648649
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
649650
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
650651
SDValue WidenVecOp_STORE(SDNode* N);
651652
SDValue WidenVecOp_SETCC(SDNode* N);
652-
SDValue WidenVecOp_ZERO_EXTEND(SDNode *N);
653653

654654
SDValue WidenVecOp_Convert(SDNode *N);
655655

lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

+66
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,20 @@ class VectorLegalizer {
7575
/// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
7676
SDValue ExpandSEXTINREG(SDValue Op);
7777

78+
/// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG.
79+
///
80+
/// Shuffles the low lanes of the operand into place and bitcasts to the proper
81+
/// type. The contents of the bits in the extended part of each element are
82+
/// undef.
83+
SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
84+
85+
/// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG.
86+
///
87+
/// Shuffles the low lanes of the operand into place, bitcasts to the proper
88+
/// type, then shifts left and arithmetic shifts right to introduce a sign
89+
/// extension.
90+
SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
91+
7892
/// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG.
7993
///
8094
/// Shuffles the low lanes of the operand into place and blends zeros into
@@ -280,6 +294,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
280294
case ISD::FP_EXTEND:
281295
case ISD::FMA:
282296
case ISD::SIGN_EXTEND_INREG:
297+
case ISD::ANY_EXTEND_VECTOR_INREG:
298+
case ISD::SIGN_EXTEND_VECTOR_INREG:
283299
case ISD::ZERO_EXTEND_VECTOR_INREG:
284300
QueryType = Node->getValueType(0);
285301
break;
@@ -621,6 +637,10 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
621637
switch (Op->getOpcode()) {
622638
case ISD::SIGN_EXTEND_INREG:
623639
return ExpandSEXTINREG(Op);
640+
case ISD::ANY_EXTEND_VECTOR_INREG:
641+
return ExpandANY_EXTEND_VECTOR_INREG(Op);
642+
case ISD::SIGN_EXTEND_VECTOR_INREG:
643+
return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
624644
case ISD::ZERO_EXTEND_VECTOR_INREG:
625645
return ExpandZERO_EXTEND_VECTOR_INREG(Op);
626646
case ISD::BSWAP:
@@ -717,6 +737,52 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
717737
return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
718738
}
719739

740+
// Generically expand a vector anyext in register to a shuffle of the relevant
741+
// lanes into the appropriate locations, with other lanes left undef.
742+
SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
743+
SDLoc DL(Op);
744+
EVT VT = Op.getValueType();
745+
int NumElements = VT.getVectorNumElements();
746+
SDValue Src = Op.getOperand(0);
747+
EVT SrcVT = Src.getValueType();
748+
int NumSrcElements = SrcVT.getVectorNumElements();
749+
750+
// Build a base mask of undef shuffles.
751+
SmallVector<int, 16> ShuffleMask;
752+
ShuffleMask.resize(NumSrcElements, -1);
753+
754+
// Place the extended lanes into the correct locations.
755+
int ExtLaneScale = NumSrcElements / NumElements;
756+
int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0;
757+
for (int i = 0; i < NumElements; ++i)
758+
ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
759+
760+
return DAG.getNode(
761+
ISD::BITCAST, DL, VT,
762+
DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
763+
}
764+
765+
SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
766+
SDLoc DL(Op);
767+
EVT VT = Op.getValueType();
768+
SDValue Src = Op.getOperand(0);
769+
EVT SrcVT = Src.getValueType();
770+
771+
// First build an any-extend node which can be legalized above when we
772+
// recurse through it.
773+
Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
774+
775+
// Now we need sign extend. Do this by shifting the elements. Even if these
776+
// aren't legal operations, they have a better chance of being legalized
777+
// without full scalarization than the sign extension does.
778+
unsigned EltWidth = VT.getVectorElementType().getSizeInBits();
779+
unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits();
780+
SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, VT);
781+
return DAG.getNode(ISD::SRA, DL, VT,
782+
DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
783+
ShiftAmount);
784+
}
785+
720786
// Generically expand a vector zext in register to a shuffle of the relevant
721787
// lanes into the appropriate locations, a blend of zero into the high bits,
722788
// and a bitcast to the wider element type.

lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+20-8
Original file line numberDiff line numberDiff line change
@@ -2398,16 +2398,19 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
23982398
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
23992399
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
24002400
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
2401-
case ISD::ZERO_EXTEND: Res = WidenVecOp_ZERO_EXTEND(N); break;
2401+
2402+
case ISD::ANY_EXTEND:
2403+
case ISD::SIGN_EXTEND:
2404+
case ISD::ZERO_EXTEND:
2405+
Res = WidenVecOp_EXTEND(N);
2406+
break;
24022407

24032408
case ISD::FP_EXTEND:
24042409
case ISD::FP_TO_SINT:
24052410
case ISD::FP_TO_UINT:
24062411
case ISD::SINT_TO_FP:
24072412
case ISD::UINT_TO_FP:
24082413
case ISD::TRUNCATE:
2409-
case ISD::SIGN_EXTEND:
2410-
case ISD::ANY_EXTEND:
24112414
Res = WidenVecOp_Convert(N);
24122415
break;
24132416
}
@@ -2428,14 +2431,14 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
24282431
return false;
24292432
}
24302433

2431-
SDValue DAGTypeLegalizer::WidenVecOp_ZERO_EXTEND(SDNode *N) {
2434+
SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
24322435
SDLoc DL(N);
24332436
EVT VT = N->getValueType(0);
24342437

24352438
SDValue InOp = N->getOperand(0);
24362439
// If some legalization strategy other than widening is used on the operand,
2437-
// we can't safely assume that just zero-extending the low lanes is the
2438-
// correct transformation.
2440+
// we can't safely assume that just extending the low lanes is the correct
2441+
// transformation.
24392442
if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector)
24402443
return WidenVecOp_Convert(N);
24412444
InOp = GetWidenedVector(InOp);
@@ -2476,9 +2479,18 @@ SDValue DAGTypeLegalizer::WidenVecOp_ZERO_EXTEND(SDNode *N) {
24762479
return WidenVecOp_Convert(N);
24772480
}
24782481

2479-
// Use a special DAG node to represent the operation of zero extending the
2482+
// Use special DAG nodes to represent the operation of extending the
24802483
// low lanes.
2481-
return DAG.getZeroExtendVectorInReg(InOp, DL, VT);
2484+
switch (N->getOpcode()) {
2485+
default:
2486+
llvm_unreachable("Extend legalization on on extend operation!");
2487+
case ISD::ANY_EXTEND:
2488+
return DAG.getAnyExtendVectorInReg(InOp, DL, VT);
2489+
case ISD::SIGN_EXTEND:
2490+
return DAG.getSignExtendVectorInReg(InOp, DL, VT);
2491+
case ISD::ZERO_EXTEND:
2492+
return DAG.getZeroExtendVectorInReg(InOp, DL, VT);
2493+
}
24822494
}
24832495

24842496
SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

+20
Original file line numberDiff line numberDiff line change
@@ -1033,6 +1033,26 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) {
10331033
getConstant(Imm, Op.getValueType()));
10341034
}
10351035

1036+
SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
1037+
assert(VT.isVector() && "This DAG node is restricted to vector types.");
1038+
assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
1039+
"The sizes of the input and result must match in order to perform the "
1040+
"extend in-register.");
1041+
assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
1042+
"The destination vector type must have fewer lanes than the input.");
1043+
return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op);
1044+
}
1045+
1046+
SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
1047+
assert(VT.isVector() && "This DAG node is restricted to vector types.");
1048+
assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
1049+
"The sizes of the input and result must match in order to perform the "
1050+
"extend in-register.");
1051+
assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
1052+
"The destination vector type must have fewer lanes than the input.");
1053+
return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op);
1054+
}
1055+
10361056
SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
10371057
assert(VT.isVector() && "This DAG node is restricted to vector types.");
10381058
assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&

lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
221221
case ISD::ZERO_EXTEND: return "zero_extend";
222222
case ISD::ANY_EXTEND: return "any_extend";
223223
case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
224+
case ISD::ANY_EXTEND_VECTOR_INREG: return "any_extend_vector_inreg";
225+
case ISD::SIGN_EXTEND_VECTOR_INREG: return "sign_extend_vector_inreg";
224226
case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg";
225227
case ISD::TRUNCATE: return "truncate";
226228
case ISD::FP_ROUND: return "fp_round";

lib/CodeGen/TargetLoweringBase.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,10 @@ void TargetLoweringBase::initActions() {
746746
if (VT >= MVT::FIRST_VECTOR_VALUETYPE &&
747747
VT <= MVT::LAST_VECTOR_VALUETYPE) {
748748
setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
749+
setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG,
750+
(MVT::SimpleValueType)VT, Expand);
751+
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG,
752+
(MVT::SimpleValueType)VT, Expand);
749753
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG,
750754
(MVT::SimpleValueType)VT, Expand);
751755
}

test/CodeGen/X86/vec_cast2.ll

+18
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,17 @@
44
;CHECK-LABEL: foo1_8:
55
;CHECK: vcvtdq2ps
66
;CHECK: ret
7+
;
8+
;CHECK-WIDE-LABEL: foo1_8:
9+
;CHECK-WIDE: vpmovzxbd %xmm0, %xmm1
10+
;CHECK-WIDE-NEXT: vpslld $24, %xmm1, %xmm1
11+
;CHECK-WIDE-NEXT: vpsrad $24, %xmm1, %xmm1
12+
;CHECK-WIDE-NEXT: vpshufb {{.*}}, %xmm0, %xmm0
13+
;CHECK-WIDE-NEXT: vpslld $24, %xmm0, %xmm0
14+
;CHECK-WIDE-NEXT: vpsrad $24, %xmm0, %xmm0
15+
;CHECK-WIDE-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
16+
;CHECK-WIDE-NEXT: vcvtdq2ps %ymm0, %ymm0
17+
;CHECK-WIDE-NEXT: ret
718
define <8 x float> @foo1_8(<8 x i8> %src) {
819
%res = sitofp <8 x i8> %src to <8 x float>
920
ret <8 x float> %res
@@ -12,6 +23,13 @@ define <8 x float> @foo1_8(<8 x i8> %src) {
1223
;CHECK-LABEL: foo1_4:
1324
;CHECK: vcvtdq2ps
1425
;CHECK: ret
26+
;
27+
;CHECK-WIDE-LABEL: foo1_4:
28+
;CHECK-WIDE: vpmovzxbd %xmm0, %xmm0
29+
;CHECK-WIDE-NEXT: vpslld $24, %xmm0, %xmm0
30+
;CHECK-WIDE-NEXT: vpsrad $24, %xmm0, %xmm0
31+
;CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0
32+
;CHECK-WIDE-NEXT: ret
1533
define <4 x float> @foo1_4(<4 x i8> %src) {
1634
%res = sitofp <4 x i8> %src to <4 x float>
1735
ret <4 x float> %res

0 commit comments

Comments
 (0)