Skip to content

Commit 1194b9c

Browse files
committed
AMDGPU {NFC}: Add code object v5 support and generate metadata for implicit kernel args
Summary: Add code object v5 support (deafult is still v4) Generate metadata for implicit kernel args for the new ABI Set the metadata version to be 1.2 Reviewers: t-tye, b-sumner, arsenm, and bcahoon Fixes: SWDEV-307188, SWDEV-307189 Differential Revision: https://reviews.llvm.org/D118272
1 parent 4f71051 commit 1194b9c

18 files changed

+681
-74
lines changed

llvm/docs/AMDGPUUsage.rst

+181-43
Large diffs are not rendered by default.

llvm/include/llvm/BinaryFormat/ELF.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,8 @@ enum {
372372
// was never defined for V1.
373373
ELFABIVERSION_AMDGPU_HSA_V2 = 0,
374374
ELFABIVERSION_AMDGPU_HSA_V3 = 1,
375-
ELFABIVERSION_AMDGPU_HSA_V4 = 2
375+
ELFABIVERSION_AMDGPU_HSA_V4 = 2,
376+
ELFABIVERSION_AMDGPU_HSA_V5 = 3
376377
};
377378

378379
#define ELF_RELOC(name, value) name = value,

llvm/include/llvm/Support/AMDGPUMetadata.h

+5
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ constexpr uint32_t VersionMajorV4 = 1;
4444
/// HSA metadata minor version for code object V4.
4545
constexpr uint32_t VersionMinorV4 = 1;
4646

47+
/// HSA metadata major version for code object V5.
48+
constexpr uint32_t VersionMajorV5 = 1;
49+
/// HSA metadata minor version for code object V5.
50+
constexpr uint32_t VersionMinorV5 = 2;
51+
4752
/// HSA metadata beginning assembler directive.
4853
constexpr char AssemblerDirectiveBegin[] = ".amd_amdgpu_hsa_metadata";
4954
/// HSA metadata ending assembler directive.

llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp

+13
Original file line numberDiff line numberDiff line change
@@ -117,15 +117,28 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::DocNode &Node) {
117117
.Case("image", true)
118118
.Case("pipe", true)
119119
.Case("queue", true)
120+
.Case("hidden_block_count_x", true)
121+
.Case("hidden_block_count_y", true)
122+
.Case("hidden_block_count_z", true)
123+
.Case("hidden_group_size_x", true)
124+
.Case("hidden_group_size_y", true)
125+
.Case("hidden_group_size_z", true)
126+
.Case("hidden_remainder_x", true)
127+
.Case("hidden_remainder_y", true)
128+
.Case("hidden_remainder_z", true)
120129
.Case("hidden_global_offset_x", true)
121130
.Case("hidden_global_offset_y", true)
122131
.Case("hidden_global_offset_z", true)
132+
.Case("hidden_grid_dims", true)
123133
.Case("hidden_none", true)
124134
.Case("hidden_printf_buffer", true)
125135
.Case("hidden_hostcall_buffer", true)
126136
.Case("hidden_default_queue", true)
127137
.Case("hidden_completion_action", true)
128138
.Case("hidden_multigrid_sync_arg", true)
139+
.Case("hidden_private_base", true)
140+
.Case("hidden_shared_base", true)
141+
.Case("hidden_queue_ptr", true)
129142
.Default(false);
130143
}))
131144
return false;

llvm/lib/ObjectYAML/ELFYAML.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
579579
BCase(EF_AMDGPU_FEATURE_SRAMECC_V3);
580580
break;
581581
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
582+
case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
582583
BCaseMask(EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4,
583584
EF_AMDGPU_FEATURE_XNACK_V4);
584585
BCaseMask(EF_AMDGPU_FEATURE_XNACK_ANY_V4,

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
8888
HSAMetadataStream.reset(new HSAMD::MetadataStreamerV2());
8989
} else if (isHsaAbiVersion3(getGlobalSTI())) {
9090
HSAMetadataStream.reset(new HSAMD::MetadataStreamerV3());
91+
} else if (isHsaAbiVersion5(getGlobalSTI())) {
92+
HSAMetadataStream.reset(new HSAMD::MetadataStreamerV5());
9193
} else {
9294
HSAMetadataStream.reset(new HSAMD::MetadataStreamerV4());
9395
}
@@ -118,7 +120,7 @@ void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) {
118120
TM.getTargetTriple().getOS() != Triple::AMDPAL)
119121
return;
120122

121-
if (isHsaAbiVersion3Or4(getGlobalSTI()))
123+
if (isHsaAbiVersion3AndAbove(getGlobalSTI()))
122124
getTargetStreamer()->EmitDirectiveAMDGCNTarget();
123125

124126
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
@@ -127,7 +129,7 @@ void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) {
127129
if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
128130
getTargetStreamer()->getPALMetadata()->readFromIR(M);
129131

130-
if (isHsaAbiVersion3Or4(getGlobalSTI()))
132+
if (isHsaAbiVersion3AndAbove(getGlobalSTI()))
131133
return;
132134

133135
// HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2.
@@ -259,7 +261,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
259261

260262
void AMDGPUAsmPrinter::emitFunctionEntryLabel() {
261263
if (TM.getTargetTriple().getOS() == Triple::AMDHSA &&
262-
isHsaAbiVersion3Or4(getGlobalSTI())) {
264+
isHsaAbiVersion3AndAbove(getGlobalSTI())) {
263265
AsmPrinter::emitFunctionEntryLabel();
264266
return;
265267
}

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

+99-7
Original file line numberDiff line numberDiff line change
@@ -672,15 +672,15 @@ void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
672672
Kern[".kind"] = Kern.getDocument()->getNode("fini");
673673
}
674674

675-
void MetadataStreamerV3::emitKernelArgs(const Function &Func,
676-
const GCNSubtarget &ST,
675+
void MetadataStreamerV3::emitKernelArgs(const MachineFunction &MF,
677676
msgpack::MapDocNode Kern) {
677+
auto &Func = MF.getFunction();
678678
unsigned Offset = 0;
679679
auto Args = HSAMetadataDoc->getArrayNode();
680680
for (auto &Arg : Func.args())
681681
emitKernelArg(Arg, Offset, Args);
682682

683-
emitHiddenKernelArgs(Func, ST, Offset, Args);
683+
emitHiddenKernelArgs(MF, Offset, Args);
684684

685685
Kern[".args"] = Args;
686686
}
@@ -789,10 +789,12 @@ void MetadataStreamerV3::emitKernelArg(
789789
Args.push_back(Arg);
790790
}
791791

792-
void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
793-
const GCNSubtarget &ST,
792+
void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF,
794793
unsigned &Offset,
795794
msgpack::ArrayDocNode Args) {
795+
auto &Func = MF.getFunction();
796+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
797+
796798
unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
797799
if (!HiddenArgNumBytes)
798800
return;
@@ -910,7 +912,6 @@ void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
910912
const SIProgramInfo &ProgramInfo) {
911913
auto &Func = MF.getFunction();
912914
auto Kern = getHSAKernelProps(MF, ProgramInfo);
913-
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
914915

915916
assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
916917
Func.getCallingConv() == CallingConv::SPIR_KERNEL);
@@ -924,7 +925,7 @@ void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
924925
(Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
925926
emitKernelLanguage(Func, Kern);
926927
emitKernelAttrs(Func, Kern);
927-
emitKernelArgs(Func, ST, Kern);
928+
emitKernelArgs(MF, Kern);
928929
}
929930

930931
Kernels.push_back(Kern);
@@ -954,6 +955,97 @@ void MetadataStreamerV4::begin(const Module &Mod,
954955
getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
955956
}
956957

958+
//===----------------------------------------------------------------------===//
959+
// HSAMetadataStreamerV5
960+
//===----------------------------------------------------------------------===//
961+
962+
void MetadataStreamerV5::emitVersion() {
963+
auto Version = HSAMetadataDoc->getArrayNode();
964+
Version.push_back(Version.getDocument()->getNode(VersionMajorV5));
965+
Version.push_back(Version.getDocument()->getNode(VersionMinorV5));
966+
getRootMetadata("amdhsa.version") = Version;
967+
}
968+
969+
void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF,
970+
unsigned &Offset,
971+
msgpack::ArrayDocNode Args) {
972+
auto &Func = MF.getFunction();
973+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
974+
const Module *M = Func.getParent();
975+
auto &DL = M->getDataLayout();
976+
977+
auto Int64Ty = Type::getInt64Ty(Func.getContext());
978+
auto Int32Ty = Type::getInt32Ty(Func.getContext());
979+
auto Int16Ty = Type::getInt16Ty(Func.getContext());
980+
981+
emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
982+
emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
983+
emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
984+
985+
emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
986+
emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
987+
emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
988+
989+
emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
990+
emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
991+
emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
992+
993+
// Reserved for hidden_tool_correlation_id.
994+
Offset += 8;
995+
996+
Offset += 8; // Reserved.
997+
998+
emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);
999+
emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);
1000+
emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);
1001+
1002+
emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
1003+
1004+
Offset += 6; // Reserved.
1005+
auto Int8PtrTy =
1006+
Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
1007+
1008+
if (M->getNamedMetadata("llvm.printf.fmts")) {
1009+
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
1010+
Args);
1011+
} else
1012+
Offset += 8; // Skipped.
1013+
1014+
if (M->getModuleFlag("amdgpu_hostcall")) {
1015+
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
1016+
Args);
1017+
} else
1018+
Offset += 8; // Skipped.
1019+
1020+
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
1021+
Args);
1022+
1023+
// Ignore temporarily until it is implemented.
1024+
// emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
1025+
Offset += 8;
1026+
1027+
if (Func.hasFnAttribute("calls-enqueue-kernel")) {
1028+
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
1029+
Args);
1030+
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
1031+
Args);
1032+
} else
1033+
Offset += 16; // Skipped.
1034+
1035+
Offset += 72; // Reserved.
1036+
1037+
// hidden_private_base and hidden_shared_base are only used by GFX8.
1038+
if (ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) {
1039+
emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
1040+
emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
1041+
} else
1042+
Offset += 8; // Skipped.
1043+
1044+
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
1045+
if (MFI.hasQueuePtr())
1046+
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
1047+
}
1048+
9571049
} // end namespace HSAMD
9581050
} // end namespace AMDGPU
9591051
} // end namespace llvm

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h

+31-10
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@ class MetadataStreamer {
5353

5454
virtual void emitKernel(const MachineFunction &MF,
5555
const SIProgramInfo &ProgramInfo) = 0;
56+
57+
protected:
58+
virtual void emitVersion() = 0;
59+
virtual void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
60+
msgpack::ArrayDocNode Args) = 0;
5661
};
5762

5863
// TODO: Rename MetadataStreamerV3 -> MetadataStreamerMsgPackV3.
@@ -79,16 +84,15 @@ class MetadataStreamerV3 : public MetadataStreamer {
7984
msgpack::MapDocNode getHSAKernelProps(const MachineFunction &MF,
8085
const SIProgramInfo &ProgramInfo) const;
8186

82-
void emitVersion();
87+
void emitVersion() override;
8388

8489
void emitPrintf(const Module &Mod);
8590

8691
void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern);
8792

8893
void emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern);
8994

90-
void emitKernelArgs(const Function &Func, const GCNSubtarget &ST,
91-
msgpack::MapDocNode Kern);
95+
void emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern);
9296

9397
void emitKernelArg(const Argument &Arg, unsigned &Offset,
9498
msgpack::ArrayDocNode Args);
@@ -100,8 +104,8 @@ class MetadataStreamerV3 : public MetadataStreamer {
100104
StringRef BaseTypeName = "", StringRef AccQual = "",
101105
StringRef TypeQual = "");
102106

103-
void emitHiddenKernelArgs(const Function &Func, const GCNSubtarget &ST,
104-
unsigned &Offset, msgpack::ArrayDocNode Args);
107+
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
108+
msgpack::ArrayDocNode Args) override;
105109

106110
msgpack::DocNode &getRootMetadata(StringRef Key) {
107111
return HSAMetadataDoc->getRoot().getMap(/*Convert=*/true)[Key];
@@ -127,9 +131,9 @@ class MetadataStreamerV3 : public MetadataStreamer {
127131
};
128132

129133
// TODO: Rename MetadataStreamerV4 -> MetadataStreamerMsgPackV4.
130-
class MetadataStreamerV4 final : public MetadataStreamerV3 {
131-
void emitVersion();
132-
134+
class MetadataStreamerV4 : public MetadataStreamerV3 {
135+
protected:
136+
void emitVersion() override;
133137
void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID);
134138

135139
public:
@@ -140,6 +144,18 @@ class MetadataStreamerV4 final : public MetadataStreamerV3 {
140144
const IsaInfo::AMDGPUTargetID &TargetID) override;
141145
};
142146

147+
// TODO: Rename MetadataStreamerV5 -> MetadataStreamerMsgPackV5.
148+
class MetadataStreamerV5 final : public MetadataStreamerV4 {
149+
protected:
150+
void emitVersion() override;
151+
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
152+
msgpack::ArrayDocNode Args) override;
153+
154+
public:
155+
MetadataStreamerV5() = default;
156+
~MetadataStreamerV5() = default;
157+
};
158+
143159
// TODO: Rename MetadataStreamerV2 -> MetadataStreamerYamlV2.
144160
class MetadataStreamerV2 final : public MetadataStreamer {
145161
private:
@@ -167,8 +183,6 @@ class MetadataStreamerV2 final : public MetadataStreamer {
167183
const MachineFunction &MF,
168184
const SIProgramInfo &ProgramInfo) const;
169185

170-
void emitVersion();
171-
172186
void emitPrintf(const Module &Mod);
173187

174188
void emitKernelLanguage(const Function &Func);
@@ -191,6 +205,13 @@ class MetadataStreamerV2 final : public MetadataStreamer {
191205
return HSAMetadata;
192206
}
193207

208+
protected:
209+
void emitVersion() override;
210+
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
211+
msgpack::ArrayDocNode Args) override {
212+
llvm_unreachable("Dummy override should not be invoked!");
213+
}
214+
194215
public:
195216
MetadataStreamerV2() = default;
196217
~MetadataStreamerV2() = default;

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -4778,6 +4778,7 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI,
47784778
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
47794779
return legalizeTrapHsaQueuePtr(MI, MRI, B);
47804780
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
4781+
case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
47814782
return ST.supportsGetDoorbellID() ?
47824783
legalizeTrapHsa(MI, MRI, B) :
47834784
legalizeTrapHsaQueuePtr(MI, MRI, B);

0 commit comments

Comments
 (0)