@@ -672,15 +672,15 @@ void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
672
672
Kern[" .kind" ] = Kern.getDocument ()->getNode (" fini" );
673
673
}
674
674
675
- void MetadataStreamerV3::emitKernelArgs (const Function &Func,
676
- const GCNSubtarget &ST,
675
+ void MetadataStreamerV3::emitKernelArgs (const MachineFunction &MF,
677
676
msgpack::MapDocNode Kern) {
677
+ auto &Func = MF.getFunction ();
678
678
unsigned Offset = 0 ;
679
679
auto Args = HSAMetadataDoc->getArrayNode ();
680
680
for (auto &Arg : Func.args ())
681
681
emitKernelArg (Arg, Offset, Args);
682
682
683
- emitHiddenKernelArgs (Func, ST , Offset, Args);
683
+ emitHiddenKernelArgs (MF , Offset, Args);
684
684
685
685
Kern[" .args" ] = Args;
686
686
}
@@ -789,10 +789,12 @@ void MetadataStreamerV3::emitKernelArg(
789
789
Args.push_back (Arg);
790
790
}
791
791
792
- void MetadataStreamerV3::emitHiddenKernelArgs (const Function &Func,
793
- const GCNSubtarget &ST,
792
+ void MetadataStreamerV3::emitHiddenKernelArgs (const MachineFunction &MF,
794
793
unsigned &Offset,
795
794
msgpack::ArrayDocNode Args) {
795
+ auto &Func = MF.getFunction ();
796
+ const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
797
+
796
798
unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes (Func);
797
799
if (!HiddenArgNumBytes)
798
800
return ;
@@ -910,7 +912,6 @@ void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
910
912
const SIProgramInfo &ProgramInfo) {
911
913
auto &Func = MF.getFunction ();
912
914
auto Kern = getHSAKernelProps (MF, ProgramInfo);
913
- const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
914
915
915
916
assert (Func.getCallingConv () == CallingConv::AMDGPU_KERNEL ||
916
917
Func.getCallingConv () == CallingConv::SPIR_KERNEL);
@@ -924,7 +925,7 @@ void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
924
925
(Twine (Func.getName ()) + Twine (" .kd" )).str (), /* Copy=*/ true );
925
926
emitKernelLanguage (Func, Kern);
926
927
emitKernelAttrs (Func, Kern);
927
- emitKernelArgs (Func, ST , Kern);
928
+ emitKernelArgs (MF , Kern);
928
929
}
929
930
930
931
Kernels.push_back (Kern);
@@ -954,6 +955,97 @@ void MetadataStreamerV4::begin(const Module &Mod,
954
955
getRootMetadata (" amdhsa.kernels" ) = HSAMetadataDoc->getArrayNode ();
955
956
}
956
957
958
+ // ===----------------------------------------------------------------------===//
959
+ // HSAMetadataStreamerV5
960
+ // ===----------------------------------------------------------------------===//
961
+
962
+ void MetadataStreamerV5::emitVersion () {
963
+ auto Version = HSAMetadataDoc->getArrayNode ();
964
+ Version.push_back (Version.getDocument ()->getNode (VersionMajorV5));
965
+ Version.push_back (Version.getDocument ()->getNode (VersionMinorV5));
966
+ getRootMetadata (" amdhsa.version" ) = Version;
967
+ }
968
+
969
+ void MetadataStreamerV5::emitHiddenKernelArgs (const MachineFunction &MF,
970
+ unsigned &Offset,
971
+ msgpack::ArrayDocNode Args) {
972
+ auto &Func = MF.getFunction ();
973
+ const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
974
+ const Module *M = Func.getParent ();
975
+ auto &DL = M->getDataLayout ();
976
+
977
+ auto Int64Ty = Type::getInt64Ty (Func.getContext ());
978
+ auto Int32Ty = Type::getInt32Ty (Func.getContext ());
979
+ auto Int16Ty = Type::getInt16Ty (Func.getContext ());
980
+
981
+ emitKernelArg (DL, Int32Ty, Align (4 ), " hidden_block_count_x" , Offset, Args);
982
+ emitKernelArg (DL, Int32Ty, Align (4 ), " hidden_block_count_y" , Offset, Args);
983
+ emitKernelArg (DL, Int32Ty, Align (4 ), " hidden_block_count_z" , Offset, Args);
984
+
985
+ emitKernelArg (DL, Int16Ty, Align (2 ), " hidden_group_size_x" , Offset, Args);
986
+ emitKernelArg (DL, Int16Ty, Align (2 ), " hidden_group_size_y" , Offset, Args);
987
+ emitKernelArg (DL, Int16Ty, Align (2 ), " hidden_group_size_z" , Offset, Args);
988
+
989
+ emitKernelArg (DL, Int16Ty, Align (2 ), " hidden_remainder_x" , Offset, Args);
990
+ emitKernelArg (DL, Int16Ty, Align (2 ), " hidden_remainder_y" , Offset, Args);
991
+ emitKernelArg (DL, Int16Ty, Align (2 ), " hidden_remainder_z" , Offset, Args);
992
+
993
+ // Reserved for hidden_tool_correlation_id.
994
+ Offset += 8 ;
995
+
996
+ Offset += 8 ; // Reserved.
997
+
998
+ emitKernelArg (DL, Int64Ty, Align (8 ), " hidden_global_offset_x" , Offset, Args);
999
+ emitKernelArg (DL, Int64Ty, Align (8 ), " hidden_global_offset_y" , Offset, Args);
1000
+ emitKernelArg (DL, Int64Ty, Align (8 ), " hidden_global_offset_z" , Offset, Args);
1001
+
1002
+ emitKernelArg (DL, Int16Ty, Align (2 ), " hidden_grid_dims" , Offset, Args);
1003
+
1004
+ Offset += 6 ; // Reserved.
1005
+ auto Int8PtrTy =
1006
+ Type::getInt8PtrTy (Func.getContext (), AMDGPUAS::GLOBAL_ADDRESS);
1007
+
1008
+ if (M->getNamedMetadata (" llvm.printf.fmts" )) {
1009
+ emitKernelArg (DL, Int8PtrTy, Align (8 ), " hidden_printf_buffer" , Offset,
1010
+ Args);
1011
+ } else
1012
+ Offset += 8 ; // Skipped.
1013
+
1014
+ if (M->getModuleFlag (" amdgpu_hostcall" )) {
1015
+ emitKernelArg (DL, Int8PtrTy, Align (8 ), " hidden_hostcall_buffer" , Offset,
1016
+ Args);
1017
+ } else
1018
+ Offset += 8 ; // Skipped.
1019
+
1020
+ emitKernelArg (DL, Int8PtrTy, Align (8 ), " hidden_multigrid_sync_arg" , Offset,
1021
+ Args);
1022
+
1023
+ // Ignore temporarily until it is implemented.
1024
+ // emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
1025
+ Offset += 8 ;
1026
+
1027
+ if (Func.hasFnAttribute (" calls-enqueue-kernel" )) {
1028
+ emitKernelArg (DL, Int8PtrTy, Align (8 ), " hidden_default_queue" , Offset,
1029
+ Args);
1030
+ emitKernelArg (DL, Int8PtrTy, Align (8 ), " hidden_completion_action" , Offset,
1031
+ Args);
1032
+ } else
1033
+ Offset += 16 ; // Skipped.
1034
+
1035
+ Offset += 72 ; // Reserved.
1036
+
1037
+ // hidden_private_base and hidden_shared_base are only used by GFX8.
1038
+ if (ST.getGeneration () == AMDGPUSubtarget::VOLCANIC_ISLANDS) {
1039
+ emitKernelArg (DL, Int32Ty, Align (4 ), " hidden_private_base" , Offset, Args);
1040
+ emitKernelArg (DL, Int32Ty, Align (4 ), " hidden_shared_base" , Offset, Args);
1041
+ } else
1042
+ Offset += 8 ; // Skipped.
1043
+
1044
+ const SIMachineFunctionInfo &MFI = *MF.getInfo <SIMachineFunctionInfo>();
1045
+ if (MFI.hasQueuePtr ())
1046
+ emitKernelArg (DL, Int8PtrTy, Align (8 ), " hidden_queue_ptr" , Offset, Args);
1047
+ }
1048
+
957
1049
} // end namespace HSAMD
958
1050
} // end namespace AMDGPU
959
1051
} // end namespace llvm
0 commit comments