Skip to content
This repository was archived by the owner on Nov 1, 2021. It is now read-only.

Commit a7d03b5

Browse files
committed
[OpenCL] Add intel_reqd_sub_group_size attribute support
Summary: Add intel_reqd_sub_group_size attribute support as intel extension cl_intel_required_subgroup_size from https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_required_subgroup_size.txt Reviewers: Anastasia, bader, hfinkel, pxli168 Reviewed By: Anastasia, bader, pxli168 Subscribers: cfe-commits, yaxunl Differential Revision: https://reviews.llvm.org/D30805 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@302125 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent c7f0671 commit a7d03b5

File tree

8 files changed

+82
-23
lines changed

8 files changed

+82
-23
lines changed

Diff for: include/clang/Basic/Attr.td

+7
Original file line numberDiff line numberDiff line change
@@ -864,6 +864,13 @@ def OpenCLUnrollHint : InheritableAttr {
864864
let Documentation = [OpenCLUnrollHintDocs];
865865
}
866866

867+
def OpenCLIntelReqdSubGroupSize: InheritableAttr {
868+
let Spellings = [GNU<"intel_reqd_sub_group_size">];
869+
let Args = [UnsignedArgument<"SubGroupSize">];
870+
let Subjects = SubjectList<[Function], ErrorDiag>;
871+
let Documentation = [OpenCLIntelReqdSubGroupSizeDocs];
872+
}
873+
867874
// This attribute is both a type attribute, and a declaration attribute (for
868875
// parameter variables).
869876
def OpenCLAccess : Attr {

Diff for: include/clang/Basic/AttrDocs.td

+15
Original file line numberDiff line numberDiff line change
@@ -2216,6 +2216,21 @@ s6.11.5 for details.
22162216
}];
22172217
}
22182218

2219+
def OpenCLIntelReqdSubGroupSizeDocs : Documentation {
2220+
let Category = DocCatStmt;
2221+
let Heading = "__attribute__((intel_reqd_sub_group_size))";
2222+
let Content = [{
2223+
The optional attribute intel_reqd_sub_group_size can be used to indicate that
2224+
the kernel must be compiled and executed with the specified subgroup size. When
2225+
this attribute is present, get_max_sub_group_size() is guaranteed to return the
2226+
specified integer value. This is important for the correctness of many subgroup
2227+
algorithms, and in some cases may be used by the compiler to generate more optimal
2228+
code. See `cl_intel_required_subgroup_size
2229+
<https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_required_subgroup_size.txt>`
2230+
for details.
2231+
}];
2232+
}
2233+
22192234
def OpenCLAccessDocs : Documentation {
22202235
let Category = DocCatStmt;
22212236
let Heading = "__read_only, __write_only, __read_write (read_only, write_only, read_write)";

Diff for: lib/CodeGen/CodeGenFunction.cpp

+20-12
Original file line numberDiff line numberDiff line change
@@ -658,34 +658,42 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
658658
GenOpenCLArgMetadata(FD, Fn, CGM, Context, Builder, getContext());
659659

660660
if (const VecTypeHintAttr *A = FD->getAttr<VecTypeHintAttr>()) {
661-
QualType hintQTy = A->getTypeHint();
662-
const ExtVectorType *hintEltQTy = hintQTy->getAs<ExtVectorType>();
663-
bool isSignedInteger =
664-
hintQTy->isSignedIntegerType() ||
665-
(hintEltQTy && hintEltQTy->getElementType()->isSignedIntegerType());
666-
llvm::Metadata *attrMDArgs[] = {
661+
QualType HintQTy = A->getTypeHint();
662+
const ExtVectorType *HintEltQTy = HintQTy->getAs<ExtVectorType>();
663+
bool IsSignedInteger =
664+
HintQTy->isSignedIntegerType() ||
665+
(HintEltQTy && HintEltQTy->getElementType()->isSignedIntegerType());
666+
llvm::Metadata *AttrMDArgs[] = {
667667
llvm::ConstantAsMetadata::get(llvm::UndefValue::get(
668668
CGM.getTypes().ConvertType(A->getTypeHint()))),
669669
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
670670
llvm::IntegerType::get(Context, 32),
671-
llvm::APInt(32, (uint64_t)(isSignedInteger ? 1 : 0))))};
672-
Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, attrMDArgs));
671+
llvm::APInt(32, (uint64_t)(IsSignedInteger ? 1 : 0))))};
672+
Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, AttrMDArgs));
673673
}
674674

675675
if (const WorkGroupSizeHintAttr *A = FD->getAttr<WorkGroupSizeHintAttr>()) {
676-
llvm::Metadata *attrMDArgs[] = {
676+
llvm::Metadata *AttrMDArgs[] = {
677677
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())),
678678
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())),
679679
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))};
680-
Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, attrMDArgs));
680+
Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, AttrMDArgs));
681681
}
682682

683683
if (const ReqdWorkGroupSizeAttr *A = FD->getAttr<ReqdWorkGroupSizeAttr>()) {
684-
llvm::Metadata *attrMDArgs[] = {
684+
llvm::Metadata *AttrMDArgs[] = {
685685
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())),
686686
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())),
687687
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))};
688-
Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, attrMDArgs));
688+
Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, AttrMDArgs));
689+
}
690+
691+
if (const OpenCLIntelReqdSubGroupSizeAttr *A =
692+
FD->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) {
693+
llvm::Metadata *AttrMDArgs[] = {
694+
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getSubGroupSize()))};
695+
Fn->setMetadata("intel_reqd_sub_group_size",
696+
llvm::MDNode::get(Context, AttrMDArgs));
689697
}
690698
}
691699

Diff for: lib/CodeGen/CodeGenFunction.h

+2-10
Original file line numberDiff line numberDiff line change
@@ -1413,16 +1413,8 @@ class CodeGenFunction : public CodeGenTypeCache {
14131413
/// True if we need emit the life-time markers.
14141414
const bool ShouldEmitLifetimeMarkers;
14151415

1416-
/// Add a kernel metadata node to the named metadata node 'opencl.kernels'.
1417-
/// In the kernel metadata node, reference the kernel function and metadata
1418-
/// nodes for its optional attribute qualifiers (OpenCL 1.1 6.7.2):
1419-
/// - A node for the vec_type_hint(<type>) qualifier contains string
1420-
/// "vec_type_hint", an undefined value of the <type> data type,
1421-
/// and a Boolean that is true if the <type> is integer and signed.
1422-
/// - A node for the work_group_size_hint(X,Y,Z) qualifier contains string
1423-
/// "work_group_size_hint", and three 32-bit integers X, Y and Z.
1424-
/// - A node for the reqd_work_group_size(X,Y,Z) qualifier contains string
1425-
/// "reqd_work_group_size", and three 32-bit integers X, Y and Z.
1416+
/// Add OpenCL kernel arg metadata and the kernel attribute meatadata to
1417+
/// the function metadata.
14261418
void EmitOpenCLKernelMetadata(const FunctionDecl *FD,
14271419
llvm::Function *Fn);
14281420

Diff for: lib/Sema/SemaDeclAttr.cpp

+28
Original file line numberDiff line numberDiff line change
@@ -2891,6 +2891,28 @@ static void handleWorkGroupSize(Sema &S, Decl *D,
28912891
Attr.getAttributeSpellingListIndex()));
28922892
}
28932893

2894+
// Handles intel_reqd_sub_group_size.
2895+
static void handleSubGroupSize(Sema &S, Decl *D, const AttributeList &Attr) {
2896+
uint32_t SGSize;
2897+
const Expr *E = Attr.getArgAsExpr(0);
2898+
if (!checkUInt32Argument(S, Attr, E, SGSize))
2899+
return;
2900+
if (SGSize == 0) {
2901+
S.Diag(Attr.getLoc(), diag::err_attribute_argument_is_zero)
2902+
<< Attr.getName() << E->getSourceRange();
2903+
return;
2904+
}
2905+
2906+
OpenCLIntelReqdSubGroupSizeAttr *Existing =
2907+
D->getAttr<OpenCLIntelReqdSubGroupSizeAttr>();
2908+
if (Existing && Existing->getSubGroupSize() != SGSize)
2909+
S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute) << Attr.getName();
2910+
2911+
D->addAttr(::new (S.Context) OpenCLIntelReqdSubGroupSizeAttr(
2912+
Attr.getRange(), S.Context, SGSize,
2913+
Attr.getAttributeSpellingListIndex()));
2914+
}
2915+
28942916
static void handleVecTypeHint(Sema &S, Decl *D, const AttributeList &Attr) {
28952917
if (!Attr.hasParsedType()) {
28962918
S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
@@ -6157,6 +6179,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
61576179
case AttributeList::AT_ReqdWorkGroupSize:
61586180
handleWorkGroupSize<ReqdWorkGroupSizeAttr>(S, D, Attr);
61596181
break;
6182+
case AttributeList::AT_OpenCLIntelReqdSubGroupSize:
6183+
handleSubGroupSize(S, D, Attr);
6184+
break;
61606185
case AttributeList::AT_VecTypeHint:
61616186
handleVecTypeHint(S, D, Attr);
61626187
break;
@@ -6521,6 +6546,9 @@ void Sema::ProcessDeclAttributeList(Scope *S, Decl *D,
65216546
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
65226547
<< A << ExpectedKernelFunction;
65236548
D->setInvalidDecl();
6549+
} else if (Attr *A = D->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) {
6550+
Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
6551+
D->setInvalidDecl();
65246552
}
65256553
}
65266554
}

Diff for: test/CodeGenOpenCL/kernel-attributes.cl

+4
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@ kernel __attribute__((vec_type_hint(int))) __attribute__((reqd_work_group_size(
88
kernel __attribute__((vec_type_hint(uint4))) __attribute__((work_group_size_hint(8,16,32))) void kernel2(int a) {}
99
// CHECK: define void @kernel2(i32 {{[^%]*}}%a) {{[^{]+}} !vec_type_hint ![[MD3:[0-9]+]] !work_group_size_hint ![[MD4:[0-9]+]]
1010

11+
kernel __attribute__((intel_reqd_sub_group_size(8))) void kernel3(int a) {}
12+
// CHECK: define void @kernel3(i32 {{[^%]*}}%a) {{[^{]+}} !intel_reqd_sub_group_size ![[MD5:[0-9]+]]
13+
1114
// CHECK: [[MD1]] = !{i32 undef, i32 1}
1215
// CHECK: [[MD2]] = !{i32 1, i32 2, i32 4}
1316
// CHECK: [[MD3]] = !{<4 x i32> undef, i32 0}
1417
// CHECK: [[MD4]] = !{i32 8, i32 16, i32 32}
18+
// CHECK: [[MD5]] = !{i32 8}

Diff for: test/Misc/pragma-attribute-supported-attributes-list.test

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
// The number of supported attributes should never go down!
44

5-
// CHECK: #pragma clang attribute supports 59 attributes:
5+
// CHECK: #pragma clang attribute supports 60 attributes:
66
// CHECK-NEXT: AMDGPUFlatWorkGroupSize (SubjectMatchRule_function)
77
// CHECK-NEXT: AMDGPUNumSGPR (SubjectMatchRule_function)
88
// CHECK-NEXT: AMDGPUNumVGPR (SubjectMatchRule_function)
@@ -42,6 +42,7 @@
4242
// CHECK-NEXT: ObjCRuntimeName (SubjectMatchRule_objc_interface, SubjectMatchRule_objc_protocol)
4343
// CHECK-NEXT: ObjCRuntimeVisible (SubjectMatchRule_objc_interface)
4444
// CHECK-NEXT: ObjCSubclassingRestricted (SubjectMatchRule_objc_interface)
45+
// CHECK-NEXT: OpenCLIntelReqdSubGroupSize (SubjectMatchRule_function)
4546
// CHECK-NEXT: OpenCLNoSVM (SubjectMatchRule_variable)
4647
// CHECK-NEXT: OptimizeNone (SubjectMatchRule_function, SubjectMatchRule_objc_method)
4748
// CHECK-NEXT: Overloadable (SubjectMatchRule_function)

Diff for: test/SemaOpenCL/invalid-kernel-attrs.cl

+4
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,7 @@ void f_kernel_image2d_t( kernel image2d_t image ) { // expected-error {{'kernel'
3333
kernel __attribute__((reqd_work_group_size(1,2,0))) void kernel11(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}}
3434
kernel __attribute__((reqd_work_group_size(1,0,2))) void kernel12(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}}
3535
kernel __attribute__((reqd_work_group_size(0,1,2))) void kernel13(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}}
36+
37+
__attribute__((intel_reqd_sub_group_size(8))) void kernel14(){} // expected-error {{attribute 'intel_reqd_sub_group_size' can only be applied to a kernel}}
38+
kernel __attribute__((intel_reqd_sub_group_size(0))) void kernel15(){} // expected-error {{'intel_reqd_sub_group_size' attribute must be greater than 0}}
39+
kernel __attribute__((intel_reqd_sub_group_size(8))) __attribute__((intel_reqd_sub_group_size(16))) void kernel16() {} //expected-warning{{attribute 'intel_reqd_sub_group_size' is already applied with different parameters}}

0 commit comments

Comments
 (0)