Skip to content

Commit d83a47d

Browse files
author
git apple-llvm automerger
committed
Merge commit '411ea8e9dd69' from llvm.org/main into next
2 parents 1dabe1d + 411ea8e commit d83a47d

File tree

5 files changed

+280
-0
lines changed

5 files changed

+280
-0
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
657657
setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
658658
setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
659659

660+
if (Subtarget.isISA3_0() && isPPC64) {
661+
setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
662+
setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
663+
setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
664+
setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
665+
setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
666+
setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
667+
setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
668+
setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
669+
}
670+
660671
// We want to custom lower some of our intrinsics.
661672
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
662673
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
@@ -11917,6 +11928,62 @@ SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
1191711928
return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
1191811929
}
1191911930

11931+
// Adjust the length value for a load/store with length to account for the
11932+
// instructions requiring a left justified length, and for non-byte element
11933+
// types requiring scaling by element size.
11934+
static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
11935+
SelectionDAG &DAG) {
11936+
SDLoc dl(Val);
11937+
EVT VT = Val->getValueType(0);
11938+
unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
11939+
unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
11940+
SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
11941+
return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
11942+
}
11943+
11944+
SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
11945+
auto VPLD = cast<VPLoadSDNode>(Op);
11946+
bool Future = Subtarget.isISAFuture();
11947+
SDLoc dl(Op);
11948+
assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
11949+
"Mask predication not supported");
11950+
EVT PtrVT = getPointerTy(DAG.getDataLayout());
11951+
SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
11952+
unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
11953+
unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
11954+
Len = AdjustLength(Len, EltBits, !Future, DAG);
11955+
SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11956+
VPLD->getOperand(1), Len};
11957+
SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
11958+
SDValue VPL =
11959+
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys, Ops,
11960+
VPLD->getMemoryVT(), VPLD->getMemOperand());
11961+
return VPL;
11962+
}
11963+
11964+
SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
11965+
auto VPST = cast<VPStoreSDNode>(Op);
11966+
assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
11967+
"Mask predication not supported");
11968+
EVT PtrVT = getPointerTy(DAG.getDataLayout());
11969+
SDLoc dl(Op);
11970+
SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
11971+
unsigned EltBits =
11972+
Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
11973+
bool Future = Subtarget.isISAFuture();
11974+
unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
11975+
Len = AdjustLength(Len, EltBits, !Future, DAG);
11976+
SDValue Ops[] = {
11977+
VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11978+
DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
11979+
VPST->getOperand(2), Len};
11980+
SDVTList Tys = DAG.getVTList(MVT::Other);
11981+
SDValue VPS =
11982+
DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops,
11983+
VPST->getMemoryVT(), VPST->getMemOperand());
11984+
return VPS;
11985+
}
11986+
1192011987
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
1192111988
SelectionDAG &DAG) const {
1192211989
SDLoc dl(Op);
@@ -12771,6 +12838,10 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1277112838
if (Op->getFlags().hasNoFPExcept())
1277212839
return Op;
1277312840
return SDValue();
12841+
case ISD::VP_LOAD:
12842+
return LowerVP_LOAD(Op, DAG);
12843+
case ISD::VP_STORE:
12844+
return LowerVP_STORE(Op, DAG);
1277412845
}
1277512846
}
1277612847

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1345,6 +1345,9 @@ namespace llvm {
13451345
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
13461346
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
13471347

1348+
SDValue LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const;
1349+
SDValue LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const;
1350+
13481351
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
13491352
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
13501353
SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ using namespace llvm;
2424

2525
#define DEBUG_TYPE "ppctti"
2626

27+
static cl::opt<bool> Pwr9EVL("ppc-pwr9-evl",
28+
cl::desc("Allow vp.load and vp.store for pwr9"),
29+
cl::init(false), cl::Hidden);
30+
2731
static cl::opt<bool> VecMaskCost("ppc-vec-mask-cost",
2832
cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden);
2933

@@ -1031,3 +1035,42 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
10311035
bool PPCTTIImpl::supportsTailCallFor(const CallBase *CB) const {
10321036
return TLI->supportsTailCallFor(CB);
10331037
}
1038+
1039+
// Target hook used by CodeGen to decide whether to expand vector predication
1040+
// intrinsics into scalar operations or to use special ISD nodes to represent
1041+
// them. The Target will not see the intrinsics.
1042+
TargetTransformInfo::VPLegalization
1043+
PPCTTIImpl::getVPLegalizationStrategy(const VPIntrinsic &PI) const {
1044+
using VPLegalization = TargetTransformInfo::VPLegalization;
1045+
unsigned Directive = ST->getCPUDirective();
1046+
VPLegalization DefaultLegalization = BaseT::getVPLegalizationStrategy(PI);
1047+
if (Directive != PPC::DIR_PWR10 && Directive != PPC::DIR_PWR_FUTURE &&
1048+
(!Pwr9EVL || Directive != PPC::DIR_PWR9))
1049+
return DefaultLegalization;
1050+
1051+
if (!ST->isPPC64())
1052+
return DefaultLegalization;
1053+
1054+
unsigned IID = PI.getIntrinsicID();
1055+
if (IID != Intrinsic::vp_load && IID != Intrinsic::vp_store)
1056+
return DefaultLegalization;
1057+
1058+
bool IsLoad = IID == Intrinsic::vp_load;
1059+
Type *VecTy = IsLoad ? PI.getType() : PI.getOperand(0)->getType();
1060+
EVT VT = TLI->getValueType(DL, VecTy, true);
1061+
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
1062+
VT != MVT::v16i8)
1063+
return DefaultLegalization;
1064+
1065+
auto IsAllTrueMask = [](Value *MaskVal) {
1066+
if (Value *SplattedVal = getSplatValue(MaskVal))
1067+
if (auto *ConstValue = dyn_cast<Constant>(SplattedVal))
1068+
return ConstValue->isAllOnesValue();
1069+
return false;
1070+
};
1071+
unsigned MaskIx = IsLoad ? 1 : 2;
1072+
if (!IsAllTrueMask(PI.getOperand(MaskIx)))
1073+
return DefaultLegalization;
1074+
1075+
return VPLegalization(VPLegalization::Legal, VPLegalization::Legal);
1076+
}

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ class PPCTTIImpl final : public BasicTTIImplBase<PPCTTIImpl> {
150150
ArrayRef<Type *> Types) const override;
151151
bool supportsTailCallFor(const CallBase *CB) const override;
152152

153+
TargetTransformInfo::VPLegalization
154+
getVPLegalizationStrategy(const VPIntrinsic &PI) const override;
155+
153156
private:
154157
// The following constant is used for estimating costs on power9.
155158
static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -verify-machineinstrs -mcpu=pwr10 \
3+
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
4+
; RUN: llc -verify-machineinstrs -mcpu=future \
5+
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck -check-prefix=FUTURE %s
6+
7+
; RUN: llc -verify-machineinstrs -mcpu=pwr10 \
8+
; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
9+
; RUN: llc -verify-machineinstrs -mcpu=future \
10+
; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck --check-prefix=FUTURE %s
11+
12+
; Function Attrs: nounwind readnone
13+
define void @stxvl1(<16 x i8> %a, ptr %b, i64 %c) {
14+
; CHECK-LABEL: stxvl1:
15+
; CHECK: # %bb.0: # %entry
16+
; CHECK-NEXT: sldi 3, 6, 56
17+
; CHECK-NEXT: stxvl 34, 5, 3
18+
; CHECK-NEXT: blr
19+
;
20+
; FUTURE-LABEL: stxvl1:
21+
; FUTURE: # %bb.0: # %entry
22+
; FUTURE-NEXT: stxvrl 34, 5, 6
23+
; FUTURE-NEXT: blr
24+
entry:
25+
%cconv = trunc i64 %c to i32
26+
tail call void @llvm.vp.store.v16i8.p0(<16 x i8> %a, ptr %b, <16 x i1> splat (i1 true), i32 %cconv)
27+
ret void
28+
}
29+
30+
; Function Attrs: nounwind readnone
31+
define void @stxvl2(<8 x i16> %a, ptr %b, i64 %c) {
32+
; CHECK-LABEL: stxvl2:
33+
; CHECK: # %bb.0: # %entry
34+
; CHECK-NEXT: sldi 3, 6, 57
35+
; CHECK-NEXT: stxvl 34, 5, 3
36+
; CHECK-NEXT: blr
37+
;
38+
; FUTURE-LABEL: stxvl2:
39+
; FUTURE: # %bb.0: # %entry
40+
; FUTURE-NEXT: sldi 3, 6, 1
41+
; FUTURE-NEXT: stxvrl 34, 5, 3
42+
; FUTURE-NEXT: blr
43+
entry:
44+
%cconv = trunc i64 %c to i32
45+
tail call void @llvm.vp.store.v8i16.p0(<8 x i16> %a, ptr %b, <8 x i1> splat (i1 true), i32 %cconv)
46+
ret void
47+
}
48+
49+
; Function Attrs: nounwind readnone
50+
define void @stxvl4(<4 x i32> %a, ptr %b, i64 %c) {
51+
; CHECK-LABEL: stxvl4:
52+
; CHECK: # %bb.0: # %entry
53+
; CHECK-NEXT: sldi 3, 6, 58
54+
; CHECK-NEXT: stxvl 34, 5, 3
55+
; CHECK-NEXT: blr
56+
;
57+
; FUTURE-LABEL: stxvl4:
58+
; FUTURE: # %bb.0: # %entry
59+
; FUTURE-NEXT: sldi 3, 6, 2
60+
; FUTURE-NEXT: stxvrl 34, 5, 3
61+
; FUTURE-NEXT: blr
62+
entry:
63+
%cconv = trunc i64 %c to i32
64+
tail call void @llvm.vp.store.v4i32.p0(<4 x i32> %a, ptr %b, <4 x i1> splat (i1 true), i32 %cconv)
65+
ret void
66+
}
67+
68+
; Function Attrs: nounwind readnone
69+
define void @stxvl8(<2 x i64> %a, ptr %b, i64 %c) {
70+
; CHECK-LABEL: stxvl8:
71+
; CHECK: # %bb.0: # %entry
72+
; CHECK-NEXT: sldi 3, 6, 59
73+
; CHECK-NEXT: stxvl 34, 5, 3
74+
; CHECK-NEXT: blr
75+
;
76+
; FUTURE-LABEL: stxvl8:
77+
; FUTURE: # %bb.0: # %entry
78+
; FUTURE-NEXT: sldi 3, 6, 3
79+
; FUTURE-NEXT: stxvrl 34, 5, 3
80+
; FUTURE-NEXT: blr
81+
entry:
82+
%cconv = trunc i64 %c to i32
83+
tail call void @llvm.vp.store.v2i64.p0(<2 x i64> %a, ptr %b, <2 x i1> splat (i1 true), i32 %cconv)
84+
ret void
85+
}
86+
87+
; Function Attrs: nounwind readnone
88+
define <16 x i8> @lxvl1(ptr %a, i64 %b) {
89+
; CHECK-LABEL: lxvl1:
90+
; CHECK: # %bb.0: # %entry
91+
; CHECK-NEXT: sldi 4, 4, 56
92+
; CHECK-NEXT: lxvl 34, 3, 4
93+
; CHECK-NEXT: blr
94+
;
95+
; FUTURE-LABEL: lxvl1:
96+
; FUTURE: # %bb.0: # %entry
97+
; FUTURE-NEXT: lxvrl 34, 3, 4
98+
; FUTURE-NEXT: blr
99+
entry:
100+
%bconv = trunc i64 %b to i32
101+
%0 = tail call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %a, <16 x i1> splat (i1 true), i32 %bconv)
102+
ret <16 x i8> %0
103+
}
104+
105+
; Function Attrs: nounwind readnone
106+
define <8 x i16> @lxvl2(ptr %a, i64 %b) {
107+
; CHECK-LABEL: lxvl2:
108+
; CHECK: # %bb.0: # %entry
109+
; CHECK-NEXT: sldi 4, 4, 57
110+
; CHECK-NEXT: lxvl 34, 3, 4
111+
; CHECK-NEXT: blr
112+
;
113+
; FUTURE-LABEL: lxvl2:
114+
; FUTURE: # %bb.0: # %entry
115+
; FUTURE-NEXT: sldi 4, 4, 1
116+
; FUTURE-NEXT: lxvrl 34, 3, 4
117+
; FUTURE-NEXT: blr
118+
entry:
119+
%bconv = trunc i64 %b to i32
120+
%0 = tail call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %a, <8 x i1> splat (i1 true), i32 %bconv)
121+
ret <8 x i16> %0
122+
}
123+
124+
; Function Attrs: nounwind readnone
125+
define <4 x i32> @lxvl4(ptr %a, i64 %b) {
126+
; CHECK-LABEL: lxvl4:
127+
; CHECK: # %bb.0: # %entry
128+
; CHECK-NEXT: sldi 4, 4, 58
129+
; CHECK-NEXT: lxvl 34, 3, 4
130+
; CHECK-NEXT: blr
131+
;
132+
; FUTURE-LABEL: lxvl4:
133+
; FUTURE: # %bb.0: # %entry
134+
; FUTURE-NEXT: sldi 4, 4, 2
135+
; FUTURE-NEXT: lxvrl 34, 3, 4
136+
; FUTURE-NEXT: blr
137+
entry:
138+
%bconv = trunc i64 %b to i32
139+
%0 = tail call <4 x i32> @llvm.vp.load.v4i32.p0(ptr %a, <4 x i1> splat (i1 true), i32 %bconv)
140+
ret <4 x i32> %0
141+
}
142+
143+
; Function Attrs: nounwind readnone
144+
define <2 x i64> @lxvl8(ptr %a, i64 %b) {
145+
; CHECK-LABEL: lxvl8:
146+
; CHECK: # %bb.0: # %entry
147+
; CHECK-NEXT: sldi 4, 4, 59
148+
; CHECK-NEXT: lxvl 34, 3, 4
149+
; CHECK-NEXT: blr
150+
;
151+
; FUTURE-LABEL: lxvl8:
152+
; FUTURE: # %bb.0: # %entry
153+
; FUTURE-NEXT: sldi 4, 4, 3
154+
; FUTURE-NEXT: lxvrl 34, 3, 4
155+
; FUTURE-NEXT: blr
156+
entry:
157+
%bconv = trunc i64 %b to i32
158+
%0 = tail call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %a, <2 x i1> splat (i1 true), i32 %bconv)
159+
ret <2 x i64> %0
160+
}

0 commit comments

Comments
 (0)