Skip to content

Commit 9970273

Browse files
committedFeb 8, 2022
[AMDGPURewriteOutArguments] Don't use pointer element type
Instead of using the pointer element type, look at how the pointer is actually being used in store instructions, while looking through bitcasts. This makes the transform compatible with opaque pointers and a bit more general. It's worth noting that I have dropped the 3-vector to 4-vector shufflevector special case, because this is now handled in a different way: If the value is actually used as a 4-vector, then we're directly going to use that type, instead of shuffling to a 3-vector in between. Differential Revision: https://reviews.llvm.org/D119237
1 parent 0b00cd1 commit 9970273

File tree

3 files changed

+175
-177
lines changed

3 files changed

+175
-177
lines changed
 

‎llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp

+49-103
Original file line numberDiff line numberDiff line change
@@ -83,12 +83,8 @@ class AMDGPURewriteOutArguments : public FunctionPass {
8383
const DataLayout *DL = nullptr;
8484
MemoryDependenceResults *MDA = nullptr;
8585

86-
bool checkArgumentUses(Value &Arg) const;
87-
bool isOutArgumentCandidate(Argument &Arg) const;
88-
89-
#ifndef NDEBUG
90-
bool isVec3ToVec4Shuffle(Type *Ty0, Type* Ty1) const;
91-
#endif
86+
Type *getStoredType(Value &Arg) const;
87+
Type *getOutArgumentType(Argument &Arg) const;
9288

9389
public:
9490
static char ID;
@@ -114,95 +110,68 @@ INITIALIZE_PASS_END(AMDGPURewriteOutArguments, DEBUG_TYPE,
114110

115111
char AMDGPURewriteOutArguments::ID = 0;
116112

117-
bool AMDGPURewriteOutArguments::checkArgumentUses(Value &Arg) const {
113+
Type *AMDGPURewriteOutArguments::getStoredType(Value &Arg) const {
118114
const int MaxUses = 10;
119115
int UseCount = 0;
120116

121-
for (Use &U : Arg.uses()) {
122-
StoreInst *SI = dyn_cast<StoreInst>(U.getUser());
123-
if (UseCount > MaxUses)
124-
return false;
117+
SmallVector<Use *> Worklist;
118+
for (Use &U : Arg.uses())
119+
Worklist.push_back(&U);
125120

126-
if (!SI) {
127-
auto *BCI = dyn_cast<BitCastInst>(U.getUser());
128-
if (!BCI || !BCI->hasOneUse())
129-
return false;
130-
131-
// We don't handle multiple stores currently, so stores to aggregate
132-
// pointers aren't worth the trouble since they are canonically split up.
133-
Type *DestEltTy = BCI->getType()->getPointerElementType();
134-
if (DestEltTy->isAggregateType())
135-
return false;
136-
137-
// We could handle these if we had a convenient way to bitcast between
138-
// them.
139-
Type *SrcEltTy = Arg.getType()->getPointerElementType();
140-
if (SrcEltTy->isArrayTy())
141-
return false;
142-
143-
// Special case handle structs with single members. It is useful to handle
144-
// some casts between structs and non-structs, but we can't bitcast
145-
// directly between them. Blender uses some casts that look like
146-
// { <3 x float> }* to <4 x float>*
147-
if ((SrcEltTy->isStructTy() && (SrcEltTy->getStructNumElements() != 1)))
148-
return false;
149-
150-
// Clang emits OpenCL 3-vector type accesses with a bitcast to the
151-
// equivalent 4-element vector and accesses that, and we're looking for
152-
// this pointer cast.
153-
if (DL->getTypeAllocSize(SrcEltTy) != DL->getTypeAllocSize(DestEltTy))
154-
return false;
155-
156-
return checkArgumentUses(*BCI);
121+
Type *StoredType = nullptr;
122+
while (!Worklist.empty()) {
123+
Use *U = Worklist.pop_back_val();
124+
125+
if (auto *BCI = dyn_cast<BitCastInst>(U->getUser())) {
126+
for (Use &U : BCI->uses())
127+
Worklist.push_back(&U);
128+
continue;
157129
}
158130

159-
if (!SI->isSimple() ||
160-
U.getOperandNo() != StoreInst::getPointerOperandIndex())
161-
return false;
131+
if (auto *SI = dyn_cast<StoreInst>(U->getUser())) {
132+
if (UseCount++ > MaxUses)
133+
return nullptr;
134+
135+
if (!SI->isSimple() ||
136+
U->getOperandNo() != StoreInst::getPointerOperandIndex())
137+
return nullptr;
162138

163-
++UseCount;
139+
if (StoredType && StoredType != SI->getValueOperand()->getType())
140+
return nullptr; // More than one type.
141+
StoredType = SI->getValueOperand()->getType();
142+
continue;
143+
}
144+
145+
// Unsupported user.
146+
return nullptr;
164147
}
165148

166-
// Skip unused arguments.
167-
return UseCount > 0;
149+
return StoredType;
168150
}
169151

170-
bool AMDGPURewriteOutArguments::isOutArgumentCandidate(Argument &Arg) const {
152+
Type *AMDGPURewriteOutArguments::getOutArgumentType(Argument &Arg) const {
171153
const unsigned MaxOutArgSizeBytes = 4 * MaxNumRetRegs;
172154
PointerType *ArgTy = dyn_cast<PointerType>(Arg.getType());
173155

174156
// TODO: It might be useful for any out arguments, not just privates.
175157
if (!ArgTy || (ArgTy->getAddressSpace() != DL->getAllocaAddrSpace() &&
176158
!AnyAddressSpace) ||
177-
Arg.hasByValAttr() || Arg.hasStructRetAttr() ||
178-
DL->getTypeStoreSize(ArgTy->getPointerElementType()) > MaxOutArgSizeBytes) {
179-
return false;
159+
Arg.hasByValAttr() || Arg.hasStructRetAttr()) {
160+
return nullptr;
180161
}
181162

182-
return checkArgumentUses(Arg);
163+
Type *StoredType = getStoredType(Arg);
164+
if (!StoredType || DL->getTypeStoreSize(StoredType) > MaxOutArgSizeBytes)
165+
return nullptr;
166+
167+
return StoredType;
183168
}
184169

185170
bool AMDGPURewriteOutArguments::doInitialization(Module &M) {
186171
DL = &M.getDataLayout();
187172
return false;
188173
}
189174

190-
#ifndef NDEBUG
191-
bool AMDGPURewriteOutArguments::isVec3ToVec4Shuffle(Type *Ty0, Type* Ty1) const {
192-
auto *VT0 = dyn_cast<FixedVectorType>(Ty0);
193-
auto *VT1 = dyn_cast<FixedVectorType>(Ty1);
194-
if (!VT0 || !VT1)
195-
return false;
196-
197-
if (VT0->getNumElements() != 3 ||
198-
VT1->getNumElements() != 4)
199-
return false;
200-
201-
return DL->getTypeSizeInBits(VT0->getElementType()) ==
202-
DL->getTypeSizeInBits(VT1->getElementType());
203-
}
204-
#endif
205-
206175
bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
207176
if (skipFunction(F))
208177
return false;
@@ -215,7 +184,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
215184
MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
216185

217186
unsigned ReturnNumRegs = 0;
218-
SmallSet<int, 4> OutArgIndexes;
187+
SmallDenseMap<int, Type *, 4> OutArgIndexes;
219188
SmallVector<Type *, 4> ReturnTypes;
220189
Type *RetTy = F.getReturnType();
221190
if (!RetTy->isVoidTy()) {
@@ -227,12 +196,12 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
227196
ReturnTypes.push_back(RetTy);
228197
}
229198

230-
SmallVector<Argument *, 4> OutArgs;
199+
SmallVector<std::pair<Argument *, Type *>, 4> OutArgs;
231200
for (Argument &Arg : F.args()) {
232-
if (isOutArgumentCandidate(Arg)) {
201+
if (Type *Ty = getOutArgumentType(Arg)) {
233202
LLVM_DEBUG(dbgs() << "Found possible out argument " << Arg
234203
<< " in function " << F.getName() << '\n');
235-
OutArgs.push_back(&Arg);
204+
OutArgs.push_back({&Arg, Ty});
236205
}
237206
}
238207

@@ -264,11 +233,12 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
264233
// first. On the second iteration we've removed that out clobbering argument
265234
// (by effectively moving it into another function) and will find the second
266235
// argument is OK to move.
267-
for (Argument *OutArg : OutArgs) {
236+
for (const auto &Pair : OutArgs) {
268237
bool ThisReplaceable = true;
269238
SmallVector<std::pair<ReturnInst *, StoreInst *>, 4> ReplaceableStores;
270239

271-
Type *ArgTy = OutArg->getType()->getPointerElementType();
240+
Argument *OutArg = Pair.first;
241+
Type *ArgTy = Pair.second;
272242

273243
// Skip this argument if converting it will push us over the register
274244
// count to return limit.
@@ -324,7 +294,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
324294

325295
if (ThisReplaceable) {
326296
ReturnTypes.push_back(ArgTy);
327-
OutArgIndexes.insert(OutArg->getArgNo());
297+
OutArgIndexes.insert({OutArg->getArgNo(), ArgTy});
328298
++NumOutArgumentsReplaced;
329299
Changing = true;
330300
}
@@ -376,32 +346,8 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
376346
if (RetVal)
377347
NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++);
378348

379-
for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second) {
380-
Argument *Arg = ReturnPoint.first;
381-
Value *Val = ReturnPoint.second;
382-
Type *EltTy = Arg->getType()->getPointerElementType();
383-
if (Val->getType() != EltTy) {
384-
Type *EffectiveEltTy = EltTy;
385-
if (StructType *CT = dyn_cast<StructType>(EltTy)) {
386-
assert(CT->getNumElements() == 1);
387-
EffectiveEltTy = CT->getElementType(0);
388-
}
389-
390-
if (DL->getTypeSizeInBits(EffectiveEltTy) !=
391-
DL->getTypeSizeInBits(Val->getType())) {
392-
assert(isVec3ToVec4Shuffle(EffectiveEltTy, Val->getType()));
393-
Val = B.CreateShuffleVector(Val, ArrayRef<int>{0, 1, 2});
394-
}
395-
396-
Val = B.CreateBitCast(Val, EffectiveEltTy);
397-
398-
// Re-create single element composite.
399-
if (EltTy != EffectiveEltTy)
400-
Val = B.CreateInsertValue(UndefValue::get(EltTy), Val, 0);
401-
}
402-
403-
NewRetVal = B.CreateInsertValue(NewRetVal, Val, RetIdx++);
404-
}
349+
for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second)
350+
NewRetVal = B.CreateInsertValue(NewRetVal, ReturnPoint.second, RetIdx++);
405351

406352
if (RetVal)
407353
RI->setOperand(0, NewRetVal);
@@ -433,7 +379,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
433379

434380
PointerType *ArgType = cast<PointerType>(Arg.getType());
435381

436-
auto *EltTy = ArgType->getPointerElementType();
382+
Type *EltTy = OutArgIndexes[Arg.getArgNo()];
437383
const auto Align =
438384
DL->getValueOrABITypeAlignment(Arg.getParamAlign(), EltTy);
439385

‎llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-any-address-space-out-arguments -amdgpu-rewrite-out-arguments < %s | FileCheck %s
22

33
; CHECK: %void_one_out_non_private_arg_i32_1_use = type { i32 }
4-
; CHECK: %bitcast_pointer_as1 = type { <3 x i32> }
4+
; CHECK: %bitcast_pointer_as1 = type { <4 x i32> }
55

66
; CHECK-LABEL: define private %void_one_out_non_private_arg_i32_1_use @void_one_out_non_private_arg_i32_1_use.body(i32 addrspace(1)* %val) #0 {
77
; CHECK-NEXT: ret %void_one_out_non_private_arg_i32_1_use zeroinitializer
@@ -19,9 +19,8 @@ define void @void_one_out_non_private_arg_i32_1_use(i32 addrspace(1)* %val) #0 {
1919
; CHECK-LABEL: define private %bitcast_pointer_as1 @bitcast_pointer_as1.body(<3 x i32> addrspace(1)* %out) #0 {
2020
; CHECK-NEXT: %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef
2121
; CHECK-NEXT: %bitcast = bitcast <3 x i32> addrspace(1)* %out to <4 x i32> addrspace(1)*
22-
; CHECK-NEXT: %1 = shufflevector <4 x i32> %load, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
23-
; CHECK-NEXT: %2 = insertvalue %bitcast_pointer_as1 undef, <3 x i32> %1, 0
24-
; CHECK-NEXT: ret %bitcast_pointer_as1 %2
22+
; CHECK-NEXT: %1 = insertvalue %bitcast_pointer_as1 undef, <4 x i32> %load, 0
23+
; CHECK-NEXT: ret %bitcast_pointer_as1 %1
2524

2625
; CHECK-LABEL: define void @bitcast_pointer_as1(<3 x i32> addrspace(1)* %0) #1 {
2726
; CHECK-NEXT: %2 = call %bitcast_pointer_as1 @bitcast_pointer_as1.body(<3 x i32> addrspace(1)* undef)

‎llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll

+123-70
Original file line numberDiff line numberDiff line change
@@ -814,16 +814,16 @@ attributes #2 = { alwaysinline nounwind }
814814
; CHECK-SAME: (void ()** [[OUT:%.*]]) #[[ATTR0]] {
815815
; CHECK-NEXT: [[FUNC:%.*]] = load i32 ()*, i32 ()** undef, align 8
816816
; CHECK-NEXT: [[CAST:%.*]] = bitcast void ()** [[OUT]] to i32 ()**
817-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 ()* [[FUNC]] to void ()*
818-
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[BITCAST_FUNC_PTR_TYPE:%.*]] undef, void ()* [[TMP1]], 0
819-
; CHECK-NEXT: ret [[BITCAST_FUNC_PTR_TYPE]] [[TMP2]]
817+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_FUNC_PTR_TYPE:%.*]] undef, i32 ()* [[FUNC]], 0
818+
; CHECK-NEXT: ret [[BITCAST_FUNC_PTR_TYPE]] [[TMP1]]
820819
;
821820
;
822821
; CHECK-LABEL: define {{[^@]+}}@bitcast_func_ptr_type
823822
; CHECK-SAME: (void ()** [[TMP0:%.*]]) #[[ATTR2]] {
824823
; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_FUNC_PTR_TYPE:%.*]] @bitcast_func_ptr_type.body(void ()** undef)
825824
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_FUNC_PTR_TYPE]] [[TMP2]], 0
826-
; CHECK-NEXT: store void ()* [[TMP3]], void ()** [[TMP0]], align 8
825+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast void ()** [[TMP0]] to i32 ()**
826+
; CHECK-NEXT: store i32 ()* [[TMP3]], i32 ()** [[TMP4]], align 8
827827
; CHECK-NEXT: ret void
828828
;
829829
;
@@ -925,201 +925,248 @@ attributes #2 = { alwaysinline nounwind }
925925
; CHECK-SAME: (<3 x i32>* [[OUT:%.*]]) #[[ATTR0]] {
926926
; CHECK-NEXT: [[LOAD:%.*]] = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef, align 16
927927
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <3 x i32>* [[OUT]] to <4 x i32>*
928-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
929-
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[BITCAST_POINTER_V4I32_V3I32:%.*]] undef, <3 x i32> [[TMP1]], 0
930-
; CHECK-NEXT: ret [[BITCAST_POINTER_V4I32_V3I32]] [[TMP2]]
928+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_V4I32_V3I32:%.*]] undef, <4 x i32> [[LOAD]], 0
929+
; CHECK-NEXT: ret [[BITCAST_POINTER_V4I32_V3I32]] [[TMP1]]
931930
;
932931
;
933932
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3i32
934933
; CHECK-SAME: (<3 x i32>* [[TMP0:%.*]]) #[[ATTR2]] {
935934
; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_V4I32_V3I32:%.*]] @bitcast_pointer_v4i32_v3i32.body(<3 x i32>* undef)
936935
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_V4I32_V3I32]] [[TMP2]], 0
937-
; CHECK-NEXT: store <3 x i32> [[TMP3]], <3 x i32>* [[TMP0]], align 16
936+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <3 x i32>* [[TMP0]] to <4 x i32>*
937+
; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 16
938938
; CHECK-NEXT: ret void
939939
;
940940
;
941941
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3f32.body
942942
; CHECK-SAME: (<3 x float>* [[OUT:%.*]]) #[[ATTR0]] {
943943
; CHECK-NEXT: [[LOAD:%.*]] = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef, align 16
944944
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <3 x float>* [[OUT]] to <4 x i32>*
945-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
946-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <3 x i32> [[TMP1]] to <3 x float>
947-
; CHECK-NEXT: [[TMP3:%.*]] = insertvalue [[BITCAST_POINTER_V4I32_V3F32:%.*]] undef, <3 x float> [[TMP2]], 0
948-
; CHECK-NEXT: ret [[BITCAST_POINTER_V4I32_V3F32]] [[TMP3]]
945+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_V4I32_V3F32:%.*]] undef, <4 x i32> [[LOAD]], 0
946+
; CHECK-NEXT: ret [[BITCAST_POINTER_V4I32_V3F32]] [[TMP1]]
949947
;
950948
;
951949
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3f32
952950
; CHECK-SAME: (<3 x float>* [[TMP0:%.*]]) #[[ATTR2]] {
953951
; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_V4I32_V3F32:%.*]] @bitcast_pointer_v4i32_v3f32.body(<3 x float>* undef)
954952
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_V4I32_V3F32]] [[TMP2]], 0
955-
; CHECK-NEXT: store <3 x float> [[TMP3]], <3 x float>* [[TMP0]], align 16
953+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <3 x float>* [[TMP0]] to <4 x i32>*
954+
; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 16
956955
; CHECK-NEXT: ret void
957956
;
958957
;
959958
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f32.body
960959
; CHECK-SAME: (float* [[OUT:%.*]]) #[[ATTR0]] {
961960
; CHECK-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
962961
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float* [[OUT]] to i32*
963-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[LOAD]] to float
964-
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[BITCAST_POINTER_I32_F32:%.*]] undef, float [[TMP1]], 0
965-
; CHECK-NEXT: ret [[BITCAST_POINTER_I32_F32]] [[TMP2]]
962+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_I32_F32:%.*]] undef, i32 [[LOAD]], 0
963+
; CHECK-NEXT: ret [[BITCAST_POINTER_I32_F32]] [[TMP1]]
966964
;
967965
;
968966
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f32
969967
; CHECK-SAME: (float* [[TMP0:%.*]]) #[[ATTR2]] {
970968
; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_I32_F32:%.*]] @bitcast_pointer_i32_f32.body(float* undef)
971969
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_I32_F32]] [[TMP2]], 0
972-
; CHECK-NEXT: store float [[TMP3]], float* [[TMP0]], align 4
970+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP0]] to i32*
971+
; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP4]], align 4
973972
; CHECK-NEXT: ret void
974973
;
975974
;
976-
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f16
975+
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f16.body
977976
; CHECK-SAME: (half* [[OUT:%.*]]) #[[ATTR0]] {
978977
; CHECK-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
979978
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast half* [[OUT]] to i32*
980-
; CHECK-NEXT: store i32 [[LOAD]], i32* [[BITCAST]], align 4
979+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_I32_F16:%.*]] undef, i32 [[LOAD]], 0
980+
; CHECK-NEXT: ret [[BITCAST_POINTER_I32_F16]] [[TMP1]]
981+
;
982+
;
983+
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f16
984+
; CHECK-SAME: (half* [[TMP0:%.*]]) #[[ATTR2]] {
985+
; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_I32_F16:%.*]] @bitcast_pointer_i32_f16.body(half* undef)
986+
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_I32_F16]] [[TMP2]], 0
987+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast half* [[TMP0]] to i32*
988+
; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP4]], align 4
981989
; CHECK-NEXT: ret void
982990
;
983991
;
984-
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_f16_i32
992+
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_f16_i32.body
985993
; CHECK-SAME: (i32* [[OUT:%.*]]) #[[ATTR0]] {
986994
; CHECK-NEXT: [[LOAD:%.*]] = load volatile half, half addrspace(1)* undef, align 2
987995
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[OUT]] to half*
988-
; CHECK-NEXT: store half [[LOAD]], half* [[BITCAST]], align 2
996+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_F16_I32:%.*]] undef, half [[LOAD]], 0
997+
; CHECK-NEXT: ret [[BITCAST_POINTER_F16_I32]] [[TMP1]]
998+
;
999+
;
1000+
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_f16_i32
1001+
; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
1002+
; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_F16_I32:%.*]] @bitcast_pointer_f16_i32.body(i32* undef)
1003+
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_F16_I32]] [[TMP2]], 0
1004+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to half*
1005+
; CHECK-NEXT: store half [[TMP3]], half* [[TMP4]], align 2
9891006
; CHECK-NEXT: ret void
9901007
;
9911008
;
9921009
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3f32.body
9931010
; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] {
9941011
; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
9951012
; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x float>*
996-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[EXTRACTVEC]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
997-
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[STRUCT_V3F32:%.*]] undef, <3 x float> [[TMP1]], 0
998-
; CHECK-NEXT: [[TMP3:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3F32:%.*]] undef, [[STRUCT_V3F32]] [[TMP2]], 0
999-
; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V3F32]] [[TMP3]]
1013+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3F32:%.*]] undef, <4 x float> [[EXTRACTVEC]], 0
1014+
; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V3F32]] [[TMP1]]
10001015
;
10011016
;
10021017
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3f32
10031018
; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] {
10041019
; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V3F32:%.*]] @bitcast_struct_v3f32_v3f32.body(%struct.v3f32* undef, <3 x float> [[TMP1]])
10051020
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V3F32]] [[TMP3]], 0
1006-
; CHECK-NEXT: store [[STRUCT_V3F32:%.*]] [[TMP4]], %struct.v3f32* [[TMP0]], align 16
1021+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <4 x float>*
1022+
; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
10071023
; CHECK-NEXT: ret void
10081024
;
10091025
;
10101026
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3i32.body
10111027
; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <3 x i32> [[VALUE:%.*]]) #[[ATTR0]] {
10121028
; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[VALUE]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
10131029
; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x i32>*
1014-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[EXTRACTVEC]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
1015-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <3 x i32> [[TMP1]] to <3 x float>
1016-
; CHECK-NEXT: [[TMP3:%.*]] = insertvalue [[STRUCT_V3F32:%.*]] undef, <3 x float> [[TMP2]], 0
1017-
; CHECK-NEXT: [[TMP4:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3I32:%.*]] undef, [[STRUCT_V3F32]] [[TMP3]], 0
1018-
; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V3I32]] [[TMP4]]
1030+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3I32:%.*]] undef, <4 x i32> [[EXTRACTVEC]], 0
1031+
; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V3I32]] [[TMP1]]
10191032
;
10201033
;
10211034
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3i32
10221035
; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <3 x i32> [[TMP1:%.*]]) #[[ATTR2]] {
10231036
; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V3I32:%.*]] @bitcast_struct_v3f32_v3i32.body(%struct.v3f32* undef, <3 x i32> [[TMP1]])
10241037
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V3I32]] [[TMP3]], 0
1025-
; CHECK-NEXT: store [[STRUCT_V3F32:%.*]] [[TMP4]], %struct.v3f32* [[TMP0]], align 16
1038+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <4 x i32>*
1039+
; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 16
10261040
; CHECK-NEXT: ret void
10271041
;
10281042
;
10291043
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v4f32.body
10301044
; CHECK-SAME: (%struct.v4f32* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] {
10311045
; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v4f32* [[OUT]] to <4 x float>*
1032-
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[STRUCT_V4F32:%.*]] undef, <4 x float> [[VALUE]], 0
1033-
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V4F32:%.*]] undef, [[STRUCT_V4F32]] [[TMP1]], 0
1034-
; CHECK-NEXT: ret [[BITCAST_STRUCT_V4F32_V4F32]] [[TMP2]]
1046+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V4F32:%.*]] undef, <4 x float> [[VALUE]], 0
1047+
; CHECK-NEXT: ret [[BITCAST_STRUCT_V4F32_V4F32]] [[TMP1]]
10351048
;
10361049
;
10371050
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v4f32
10381051
; CHECK-SAME: (%struct.v4f32* [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] {
10391052
; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V4F32_V4F32:%.*]] @bitcast_struct_v4f32_v4f32.body(%struct.v4f32* undef, <4 x float> [[TMP1]])
10401053
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V4F32_V4F32]] [[TMP3]], 0
1041-
; CHECK-NEXT: store [[STRUCT_V4F32:%.*]] [[TMP4]], %struct.v4f32* [[TMP0]], align 16
1054+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v4f32* [[TMP0]] to <4 x float>*
1055+
; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
10421056
; CHECK-NEXT: ret void
10431057
;
10441058
;
10451059
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v4i32.body
10461060
; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <4 x i32> [[VALUE:%.*]]) #[[ATTR0]] {
10471061
; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x i32>*
1048-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VALUE]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
1049-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <3 x i32> [[TMP1]] to <3 x float>
1050-
; CHECK-NEXT: [[TMP3:%.*]] = insertvalue [[STRUCT_V3F32:%.*]] undef, <3 x float> [[TMP2]], 0
1051-
; CHECK-NEXT: [[TMP4:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V4I32:%.*]] undef, [[STRUCT_V3F32]] [[TMP3]], 0
1052-
; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V4I32]] [[TMP4]]
1062+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V4I32:%.*]] undef, <4 x i32> [[VALUE]], 0
1063+
; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V4I32]] [[TMP1]]
10531064
;
10541065
;
10551066
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v4i32
10561067
; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <4 x i32> [[TMP1:%.*]]) #[[ATTR2]] {
10571068
; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V4I32:%.*]] @bitcast_struct_v3f32_v4i32.body(%struct.v3f32* undef, <4 x i32> [[TMP1]])
10581069
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V4I32]] [[TMP3]], 0
1059-
; CHECK-NEXT: store [[STRUCT_V3F32:%.*]] [[TMP4]], %struct.v3f32* [[TMP0]], align 16
1070+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <4 x i32>*
1071+
; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 16
10601072
; CHECK-NEXT: ret void
10611073
;
10621074
;
10631075
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v3f32.body
10641076
; CHECK-SAME: (%struct.v4f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] {
10651077
; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
10661078
; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v4f32* [[OUT]] to <4 x float>*
1067-
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[STRUCT_V4F32:%.*]] undef, <4 x float> [[EXTRACTVEC]], 0
1068-
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V3F32:%.*]] undef, [[STRUCT_V4F32]] [[TMP1]], 0
1069-
; CHECK-NEXT: ret [[BITCAST_STRUCT_V4F32_V3F32]] [[TMP2]]
1079+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V3F32:%.*]] undef, <4 x float> [[EXTRACTVEC]], 0
1080+
; CHECK-NEXT: ret [[BITCAST_STRUCT_V4F32_V3F32]] [[TMP1]]
10701081
;
10711082
;
10721083
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v3f32
10731084
; CHECK-SAME: (%struct.v4f32* [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] {
10741085
; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V4F32_V3F32:%.*]] @bitcast_struct_v4f32_v3f32.body(%struct.v4f32* undef, <3 x float> [[TMP1]])
10751086
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V4F32_V3F32]] [[TMP3]], 0
1076-
; CHECK-NEXT: store [[STRUCT_V4F32:%.*]] [[TMP4]], %struct.v4f32* [[TMP0]], align 16
1087+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v4f32* [[TMP0]] to <4 x float>*
1088+
; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
10771089
; CHECK-NEXT: ret void
10781090
;
10791091
;
1080-
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v2f32
1092+
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v2f32.body
10811093
; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] {
10821094
; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <2 x float>*
1083-
; CHECK-NEXT: store <2 x float> [[VALUE]], <2 x float>* [[CAST]], align 8
1095+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V2F32:%.*]] undef, <2 x float> [[VALUE]], 0
1096+
; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V2F32]] [[TMP1]]
1097+
;
1098+
;
1099+
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v2f32
1100+
; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <2 x float> [[TMP1:%.*]]) #[[ATTR2]] {
1101+
; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V2F32:%.*]] @bitcast_struct_v3f32_v2f32.body(%struct.v3f32* undef, <2 x float> [[TMP1]])
1102+
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V2F32]] [[TMP3]], 0
1103+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <2 x float>*
1104+
; CHECK-NEXT: store <2 x float> [[TMP4]], <2 x float>* [[TMP5]], align 8
10841105
; CHECK-NEXT: ret void
10851106
;
10861107
;
1087-
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v3f32
1108+
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v3f32.body
10881109
; CHECK-SAME: (%struct.v3f32.f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] {
10891110
; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
10901111
; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32.f32* [[OUT]] to <4 x float>*
1091-
; CHECK-NEXT: store <4 x float> [[EXTRACTVEC]], <4 x float>* [[CAST]], align 16
1112+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_F32_V3F32:%.*]] undef, <4 x float> [[EXTRACTVEC]], 0
1113+
; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_F32_V3F32]] [[TMP1]]
1114+
;
1115+
;
1116+
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v3f32
1117+
; CHECK-SAME: (%struct.v3f32.f32* [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] {
1118+
; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_F32_V3F32:%.*]] @bitcast_struct_v3f32_f32_v3f32.body(%struct.v3f32.f32* undef, <3 x float> [[TMP1]])
1119+
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_F32_V3F32]] [[TMP3]], 0
1120+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v3f32.f32* [[TMP0]] to <4 x float>*
1121+
; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
10921122
; CHECK-NEXT: ret void
10931123
;
10941124
;
1095-
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v4f32
1125+
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v4f32.body
10961126
; CHECK-SAME: (%struct.v3f32.f32* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] {
10971127
; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32.f32* [[OUT]] to <4 x float>*
1098-
; CHECK-NEXT: store <4 x float> [[VALUE]], <4 x float>* [[CAST]], align 16
1128+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_F32_V4F32:%.*]] undef, <4 x float> [[VALUE]], 0
1129+
; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_F32_V4F32]] [[TMP1]]
1130+
;
1131+
;
1132+
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v4f32
1133+
; CHECK-SAME: (%struct.v3f32.f32* [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] {
1134+
; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_F32_V4F32:%.*]] @bitcast_struct_v3f32_f32_v4f32.body(%struct.v3f32.f32* undef, <4 x float> [[TMP1]])
1135+
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_F32_V4F32]] [[TMP3]], 0
1136+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v3f32.f32* [[TMP0]] to <4 x float>*
1137+
; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
10991138
; CHECK-NEXT: ret void
11001139
;
11011140
;
11021141
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_i128_v4f32.body
11031142
; CHECK-SAME: (%struct.i128* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] {
11041143
; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.i128* [[OUT]] to <4 x float>*
1105-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[VALUE]] to i128
1106-
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[STRUCT_I128:%.*]] undef, i128 [[TMP1]], 0
1107-
; CHECK-NEXT: [[TMP3:%.*]] = insertvalue [[BITCAST_STRUCT_I128_V4F32:%.*]] undef, [[STRUCT_I128]] [[TMP2]], 0
1108-
; CHECK-NEXT: ret [[BITCAST_STRUCT_I128_V4F32]] [[TMP3]]
1144+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_I128_V4F32:%.*]] undef, <4 x float> [[VALUE]], 0
1145+
; CHECK-NEXT: ret [[BITCAST_STRUCT_I128_V4F32]] [[TMP1]]
11091146
;
11101147
;
11111148
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_i128_v4f32
11121149
; CHECK-SAME: (%struct.i128* [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] {
11131150
; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_I128_V4F32:%.*]] @bitcast_struct_i128_v4f32.body(%struct.i128* undef, <4 x float> [[TMP1]])
11141151
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_I128_V4F32]] [[TMP3]], 0
1115-
; CHECK-NEXT: store [[STRUCT_I128:%.*]] [[TMP4]], %struct.i128* [[TMP0]], align 4
1152+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.i128* [[TMP0]] to <4 x float>*
1153+
; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
11161154
; CHECK-NEXT: ret void
11171155
;
11181156
;
1119-
; CHECK-LABEL: define {{[^@]+}}@bitcast_array_v4i32_v4f32
1157+
; CHECK-LABEL: define {{[^@]+}}@bitcast_array_v4i32_v4f32.body
11201158
; CHECK-SAME: ([4 x i32]* [[OUT:%.*]], [4 x float] [[VALUE:%.*]]) #[[ATTR0]] {
11211159
; CHECK-NEXT: [[CAST:%.*]] = bitcast [4 x i32]* [[OUT]] to [4 x float]*
1122-
; CHECK-NEXT: store [4 x float] [[VALUE]], [4 x float]* [[CAST]], align 4
1160+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_ARRAY_V4I32_V4F32:%.*]] undef, [4 x float] [[VALUE]], 0
1161+
; CHECK-NEXT: ret [[BITCAST_ARRAY_V4I32_V4F32]] [[TMP1]]
1162+
;
1163+
;
1164+
; CHECK-LABEL: define {{[^@]+}}@bitcast_array_v4i32_v4f32
1165+
; CHECK-SAME: ([4 x i32]* [[TMP0:%.*]], [4 x float] [[TMP1:%.*]]) #[[ATTR2]] {
1166+
; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_ARRAY_V4I32_V4F32:%.*]] @bitcast_array_v4i32_v4f32.body([4 x i32]* undef, [4 x float] [[TMP1]])
1167+
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_ARRAY_V4I32_V4F32]] [[TMP3]], 0
1168+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast [4 x i32]* [[TMP0]] to [4 x float]*
1169+
; CHECK-NEXT: store [4 x float] [[TMP4]], [4 x float]* [[TMP5]], align 4
11231170
; CHECK-NEXT: ret void
11241171
;
11251172
;
@@ -1130,30 +1177,36 @@ attributes #2 = { alwaysinline nounwind }
11301177
; CHECK: ret0:
11311178
; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
11321179
; CHECK-NEXT: [[CAST0:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x float>*
1133-
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[EXTRACTVEC]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
1134-
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[STRUCT_V3F32:%.*]] undef, <3 x float> [[TMP0]], 0
1135-
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32:%.*]] undef, [[STRUCT_V3F32]] [[TMP1]], 0
1136-
; CHECK-NEXT: ret [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP2]]
1180+
; CHECK-NEXT: [[TMP0:%.*]] = insertvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32:%.*]] undef, <4 x float> [[EXTRACTVEC]], 0
1181+
; CHECK-NEXT: ret [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP0]]
11371182
; CHECK: ret1:
11381183
; CHECK-NEXT: [[CAST1:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x float>*
11391184
; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, <4 x float> addrspace(1)* undef, align 16
1140-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[LOAD]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
1141-
; CHECK-NEXT: [[TMP4:%.*]] = insertvalue [[STRUCT_V3F32]] undef, <3 x float> [[TMP3]], 0
1142-
; CHECK-NEXT: [[TMP5:%.*]] = insertvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] undef, [[STRUCT_V3F32]] [[TMP4]], 0
1143-
; CHECK-NEXT: ret [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP5]]
1185+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] undef, <4 x float> [[LOAD]], 0
1186+
; CHECK-NEXT: ret [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP1]]
11441187
;
11451188
;
11461189
; CHECK-LABEL: define {{[^@]+}}@multi_return_bitcast_struct_v3f32_v3f32
11471190
; CHECK-SAME: (i1 [[TMP0:%.*]], %struct.v3f32* [[TMP1:%.*]], <3 x float> [[TMP2:%.*]]) #[[ATTR2]] {
11481191
; CHECK-NEXT: [[TMP4:%.*]] = call [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32:%.*]] @multi_return_bitcast_struct_v3f32_v3f32.body(i1 [[TMP0]], %struct.v3f32* undef, <3 x float> [[TMP2]])
11491192
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP4]], 0
1150-
; CHECK-NEXT: store [[STRUCT_V3F32:%.*]] [[TMP5]], %struct.v3f32* [[TMP1]], align 16
1193+
; CHECK-NEXT: [[TMP6:%.*]] = bitcast %struct.v3f32* [[TMP1]] to <4 x float>*
1194+
; CHECK-NEXT: store <4 x float> [[TMP5]], <4 x float>* [[TMP6]], align 16
11511195
; CHECK-NEXT: ret void
11521196
;
11531197
;
1154-
; CHECK-LABEL: define {{[^@]+}}@bitcast_v3f32_struct_v3f32
1198+
; CHECK-LABEL: define {{[^@]+}}@bitcast_v3f32_struct_v3f32.body
11551199
; CHECK-SAME: (<3 x float>* [[OUT:%.*]], [[STRUCT_V3F32:%.*]] [[VALUE:%.*]]) #[[ATTR0]] {
11561200
; CHECK-NEXT: [[CAST:%.*]] = bitcast <3 x float>* [[OUT]] to %struct.v3f32*
1157-
; CHECK-NEXT: store [[STRUCT_V3F32]] [[VALUE]], %struct.v3f32* [[CAST]], align 4
1201+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_V3F32_STRUCT_V3F32:%.*]] undef, [[STRUCT_V3F32]] [[VALUE]], 0
1202+
; CHECK-NEXT: ret [[BITCAST_V3F32_STRUCT_V3F32]] [[TMP1]]
1203+
;
1204+
;
1205+
; CHECK-LABEL: define {{[^@]+}}@bitcast_v3f32_struct_v3f32
1206+
; CHECK-SAME: (<3 x float>* [[TMP0:%.*]], [[STRUCT_V3F32:%.*]] [[TMP1:%.*]]) #[[ATTR2]] {
1207+
; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_V3F32_STRUCT_V3F32:%.*]] @bitcast_v3f32_struct_v3f32.body(<3 x float>* undef, [[STRUCT_V3F32]] [[TMP1]])
1208+
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_V3F32_STRUCT_V3F32]] [[TMP3]], 0
1209+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <3 x float>* [[TMP0]] to %struct.v3f32*
1210+
; CHECK-NEXT: store [[STRUCT_V3F32]] [[TMP4]], %struct.v3f32* [[TMP5]], align 16
11581211
; CHECK-NEXT: ret void
11591212
;

0 commit comments

Comments
 (0)
Please sign in to comment.