@@ -4911,18 +4911,28 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4911
4911
" teams directive expected to be "
4912
4912
" emitted only for the host!" );
4913
4913
4914
+ auto &Bld = CGF.Builder ;
4915
+
4916
+ // If the target directive is combined with a teams directive:
4917
+ // Return the value in the num_teams clause, if any.
4918
+ // Otherwise, return 0 to denote the runtime default.
4919
+ if (isOpenMPTeamsDirective (D.getDirectiveKind ())) {
4920
+ if (const auto *NumTeamsClause = D.getSingleClause <OMPNumTeamsClause>()) {
4921
+ CodeGenFunction::RunCleanupsScope NumTeamsScope (CGF);
4922
+ auto NumTeams = CGF.EmitScalarExpr (NumTeamsClause->getNumTeams (),
4923
+ /* IgnoreResultAssign*/ true );
4924
+ return Bld.CreateIntCast (NumTeams, CGF.Int32Ty ,
4925
+ /* IsSigned=*/ true );
4926
+ }
4927
+
4928
+ // The default value is 0.
4929
+ return Bld.getInt32 (0 );
4930
+ }
4931
+
4914
4932
// If the target directive is combined with a parallel directive but not a
4915
4933
// teams directive, start one team.
4916
- if (isOpenMPParallelDirective (D.getDirectiveKind ()) &&
4917
- !isOpenMPTeamsDirective (D.getDirectiveKind ()))
4918
- return CGF.Builder .getInt32 (1 );
4919
-
4920
- // FIXME: For the moment we do not support combined directives with target and
4921
- // teams, so we do not expect to get any num_teams clause in the provided
4922
- // directive. Once we support that, this assertion can be replaced by the
4923
- // actual emission of the clause expression.
4924
- assert (D.getSingleClause <OMPNumTeamsClause>() == nullptr &&
4925
- " Not expecting clause in directive." );
4934
+ if (isOpenMPParallelDirective (D.getDirectiveKind ()))
4935
+ return Bld.getInt32 (1 );
4926
4936
4927
4937
// If the current target region has a teams region enclosed, we need to get
4928
4938
// the number of teams to pass to the runtime function call. This is done
@@ -4940,13 +4950,13 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4940
4950
CGOpenMPInnerExprInfo CGInfo (CGF, CS);
4941
4951
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII (CGF, &CGInfo);
4942
4952
llvm::Value *NumTeams = CGF.EmitScalarExpr (NTE->getNumTeams ());
4943
- return CGF. Builder .CreateIntCast (NumTeams, CGF.Int32Ty ,
4944
- /* IsSigned=*/ true );
4953
+ return Bld .CreateIntCast (NumTeams, CGF.Int32Ty ,
4954
+ /* IsSigned=*/ true );
4945
4955
}
4946
4956
4947
4957
// If we have an enclosed teams directive but no num_teams clause we use
4948
4958
// the default value 0.
4949
- return CGF. Builder .getInt32 (0 );
4959
+ return Bld .getInt32 (0 );
4950
4960
}
4951
4961
4952
4962
// No teams associated with the directive.
@@ -4986,9 +4996,20 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
4986
4996
//
4987
4997
// If this is not a teams directive return nullptr.
4988
4998
4989
- if (isOpenMPParallelDirective (D.getDirectiveKind ())) {
4999
+ if (isOpenMPTeamsDirective (D.getDirectiveKind ()) ||
5000
+ isOpenMPParallelDirective (D.getDirectiveKind ())) {
4990
5001
llvm::Value *DefaultThreadLimitVal = Bld.getInt32 (0 );
4991
5002
llvm::Value *NumThreadsVal = nullptr ;
5003
+ llvm::Value *ThreadLimitVal = nullptr ;
5004
+
5005
+ if (const auto *ThreadLimitClause =
5006
+ D.getSingleClause <OMPThreadLimitClause>()) {
5007
+ CodeGenFunction::RunCleanupsScope ThreadLimitScope (CGF);
5008
+ auto ThreadLimit = CGF.EmitScalarExpr (ThreadLimitClause->getThreadLimit (),
5009
+ /* IgnoreResultAssign*/ true );
5010
+ ThreadLimitVal = Bld.CreateIntCast (ThreadLimit, CGF.Int32Ty ,
5011
+ /* IsSigned=*/ true );
5012
+ }
4992
5013
4993
5014
if (const auto *NumThreadsClause =
4994
5015
D.getSingleClause <OMPNumThreadsClause>()) {
@@ -5000,15 +5021,21 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
5000
5021
Bld.CreateIntCast (NumThreads, CGF.Int32Ty , /* IsSigned=*/ true );
5001
5022
}
5002
5023
5003
- return NumThreadsVal ? NumThreadsVal : DefaultThreadLimitVal;
5004
- }
5024
+ // Select the lesser of thread_limit and num_threads.
5025
+ if (NumThreadsVal)
5026
+ ThreadLimitVal = ThreadLimitVal
5027
+ ? Bld.CreateSelect (Bld.CreateICmpSLT (NumThreadsVal,
5028
+ ThreadLimitVal),
5029
+ NumThreadsVal, ThreadLimitVal)
5030
+ : NumThreadsVal;
5005
5031
5006
- // FIXME: For the moment we do not support combined directives with target and
5007
- // teams, so we do not expect to get any thread_limit clause in the provided
5008
- // directive. Once we support that, this assertion can be replaced by the
5009
- // actual emission of the clause expression.
5010
- assert (D.getSingleClause <OMPThreadLimitClause>() == nullptr &&
5011
- " Not expecting clause in directive." );
5032
+ // Set default value passed to the runtime if either teams or a target
5033
+ // parallel type directive is found but no clause is specified.
5034
+ if (!ThreadLimitVal)
5035
+ ThreadLimitVal = DefaultThreadLimitVal;
5036
+
5037
+ return ThreadLimitVal;
5038
+ }
5012
5039
5013
5040
// If the current target region has a teams region enclosed, we need to get
5014
5041
// the thread limit to pass to the runtime function call. This is done
@@ -6217,6 +6244,10 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
6217
6244
CodeGenFunction::EmitOMPTargetParallelDeviceFunction (
6218
6245
CGM, ParentName, cast<OMPTargetParallelDirective>(*S));
6219
6246
break ;
6247
+ case Stmt::OMPTargetTeamsDirectiveClass:
6248
+ CodeGenFunction::EmitOMPTargetTeamsDeviceFunction (
6249
+ CGM, ParentName, cast<OMPTargetTeamsDirective>(*S));
6250
+ break ;
6220
6251
default :
6221
6252
llvm_unreachable (" Unknown target directive for OpenMP device codegen." );
6222
6253
}
0 commit comments