Skip to content
This repository was archived by the owner on Nov 1, 2021. It is now read-only.

Commit def8b33

Browse files
author
Samuel Antao
committed
[CUDA][OpenMP] Create generic offload toolchains
Summary: This patch introduces the concept of offloading tool chain and offloading kind. Each tool chain may have associated an offloading kind that marks it as used in a given programming model that requires offloading. It also adds the logic to iterate on the tool chains based on the kind. Currently, only CUDA is supported, but in general a programming model (an offloading kind) may have associated multiple tool chains that require supporting offloading. This patch does not add tests - its goal is to keep the existing functionality. This patch is the first of a series of three that attempts to make the current support of CUDA more generic and easier to extend to other programming models, namely OpenMP. It tries to capture the suggestions/improvements/concerns on the initial proposal in http://lists.llvm.org/pipermail/cfe-dev/2016-February/047547.html. It only tackles the more consensual part of the proposal, i.e.does not address the problem of intermediate files bundling yet. Reviewers: ABataev, jlebar, echristo, hfinkel, tra Subscribers: guansong, Hahnfeld, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: http://reviews.llvm.org/D18170 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@272571 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent facdebd commit def8b33

File tree

6 files changed

+112
-30
lines changed

6 files changed

+112
-30
lines changed

Diff for: include/clang/Driver/Action.h

+15
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,21 @@ class Action {
6868
JobClassLast=VerifyPCHJobClass
6969
};
7070

71+
// The offloading kind determines if this action is binded to a particular
72+
// programming model. Each entry reserves one bit. We also have a special kind
73+
// to designate the host offloading tool chain.
74+
//
75+
// FIXME: This is currently used to indicate that tool chains are used in a
76+
// given programming, but will be used here as well once a generic offloading
77+
// action is implemented.
78+
enum OffloadKind {
79+
OFK_None = 0x00,
80+
// The host offloading tool chain.
81+
OFK_Host = 0x01,
82+
// The device offloading tool chains - one bit for each programming model.
83+
OFK_Cuda = 0x02,
84+
};
85+
7186
static const char *getClassName(ActionClass AC);
7287

7388
private:

Diff for: include/clang/Driver/Compilation.h

+53-9
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "clang/Driver/Util.h"
1616
#include "llvm/ADT/DenseMap.h"
1717
#include "llvm/Support/Path.h"
18+
#include <map>
1819

1920
namespace llvm {
2021
namespace opt {
@@ -38,8 +39,16 @@ class Compilation {
3839
/// The default tool chain.
3940
const ToolChain &DefaultToolChain;
4041

41-
const ToolChain *CudaHostToolChain;
42-
const ToolChain *CudaDeviceToolChain;
42+
/// A mask of all the programming models the host has to support in the
43+
/// current compilation.
44+
unsigned ActiveOffloadMask;
45+
46+
/// Array with the toolchains of offloading host and devices in the order they
47+
/// were requested by the user. We are preserving that order in case the code
48+
/// generation needs to derive a programming-model-specific semantic out of
49+
/// it.
50+
std::multimap<Action::OffloadKind, const ToolChain *>
51+
OrderedOffloadingToolchains;
4352

4453
/// The original (untranslated) input argument list.
4554
llvm::opt::InputArgList *Args;
@@ -89,16 +98,51 @@ class Compilation {
8998
const Driver &getDriver() const { return TheDriver; }
9099

91100
const ToolChain &getDefaultToolChain() const { return DefaultToolChain; }
92-
const ToolChain *getCudaHostToolChain() const { return CudaHostToolChain; }
93-
const ToolChain *getCudaDeviceToolChain() const {
94-
return CudaDeviceToolChain;
101+
const ToolChain *getOffloadingHostToolChain() const {
102+
auto It = OrderedOffloadingToolchains.find(Action::OFK_Host);
103+
if (It != OrderedOffloadingToolchains.end())
104+
return It->second;
105+
return nullptr;
106+
}
107+
unsigned isOffloadingHostKind(Action::OffloadKind Kind) const {
108+
return ActiveOffloadMask & Kind;
109+
}
110+
111+
/// Iterator that visits device toolchains of a given kind.
112+
typedef const std::multimap<Action::OffloadKind,
113+
const ToolChain *>::const_iterator
114+
const_offload_toolchains_iterator;
115+
typedef std::pair<const_offload_toolchains_iterator,
116+
const_offload_toolchains_iterator>
117+
const_offload_toolchains_range;
118+
119+
template <Action::OffloadKind Kind>
120+
const_offload_toolchains_range getOffloadToolChains() const {
121+
return OrderedOffloadingToolchains.equal_range(Kind);
95122
}
96123

97-
void setCudaHostToolChain(const ToolChain *HostToolChain) {
98-
CudaHostToolChain = HostToolChain;
124+
// Return an offload toolchain of the provided kind. Only one is expected to
125+
// exist.
126+
template <Action::OffloadKind Kind>
127+
const ToolChain *getSingleOffloadToolChain() const {
128+
auto TCs = getOffloadToolChains<Kind>();
129+
130+
assert(TCs.first != TCs.second &&
131+
"No tool chains of the selected kind exist!");
132+
assert(std::next(TCs.first) == TCs.second &&
133+
"More than one tool chain of the this kind exist.");
134+
return TCs.first->second;
99135
}
100-
void setCudaDeviceToolChain(const ToolChain *DeviceToolChain) {
101-
CudaDeviceToolChain = DeviceToolChain;
136+
137+
void addOffloadDeviceToolChain(const ToolChain *DeviceToolChain,
138+
Action::OffloadKind OffloadKind) {
139+
assert(OffloadKind != Action::OFK_Host && OffloadKind != Action::OFK_None &&
140+
"This is not a device tool chain!");
141+
142+
// Update the host offload kind to also contain this kind.
143+
ActiveOffloadMask |= OffloadKind;
144+
OrderedOffloadingToolchains.insert(
145+
std::make_pair(OffloadKind, DeviceToolChain));
102146
}
103147

104148
const llvm::opt::InputArgList &getInputArgs() const { return *Args; }

Diff for: include/clang/Driver/Driver.h

+5
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,11 @@ class Driver {
275275
/// @name Primary Functionality
276276
/// @{
277277

278+
/// CreateOffloadingDeviceToolChains - create all the toolchains required to
279+
/// support offloading devices given the programming models specified in the
280+
/// current compilation. Also, update the host tool chain kind accordingly.
281+
void CreateOffloadingDeviceToolChains(Compilation &C, InputList &Inputs);
282+
278283
/// BuildCompilation - Construct a compilation object for a command
279284
/// line argument vector.
280285
///

Diff for: lib/Driver/Compilation.cpp

+6-3
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,13 @@ using namespace llvm::opt;
2424

2525
Compilation::Compilation(const Driver &D, const ToolChain &_DefaultToolChain,
2626
InputArgList *_Args, DerivedArgList *_TranslatedArgs)
27-
: TheDriver(D), DefaultToolChain(_DefaultToolChain),
28-
CudaHostToolChain(&DefaultToolChain), CudaDeviceToolChain(nullptr),
27+
: TheDriver(D), DefaultToolChain(_DefaultToolChain), ActiveOffloadMask(0u),
2928
Args(_Args), TranslatedArgs(_TranslatedArgs), Redirects(nullptr),
30-
ForDiagnostics(false) {}
29+
ForDiagnostics(false) {
30+
// The offloading host toolchain is the default tool chain.
31+
OrderedOffloadingToolchains.insert(
32+
std::make_pair(Action::OFK_Host, &DefaultToolChain));
33+
}
3134

3235
Compilation::~Compilation() {
3336
delete TranslatedArgs;

Diff for: lib/Driver/Driver.cpp

+29-14
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,31 @@ void Driver::setLTOMode(const llvm::opt::ArgList &Args) {
422422
}
423423
}
424424

425+
void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
426+
InputList &Inputs) {
427+
428+
//
429+
// CUDA
430+
//
431+
// We need to generate a CUDA toolchain if any of the inputs has a CUDA type.
432+
if (llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
433+
return types::isCuda(I.first);
434+
})) {
435+
const ToolChain &TC = getToolChain(
436+
C.getInputArgs(),
437+
llvm::Triple(C.getOffloadingHostToolChain()->getTriple().isArch64Bit()
438+
? "nvptx64-nvidia-cuda"
439+
: "nvptx-nvidia-cuda"));
440+
C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda);
441+
}
442+
443+
//
444+
// TODO: Add support for other offloading programming models here.
445+
//
446+
447+
return;
448+
}
449+
425450
Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
426451
llvm::PrettyStackTraceString CrashInfo("Compilation construction");
427452

@@ -549,18 +574,8 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
549574
InputList Inputs;
550575
BuildInputs(C->getDefaultToolChain(), *TranslatedArgs, Inputs);
551576

552-
// Initialize the CUDA device TC only if we have any CUDA Inputs. This is
553-
// necessary so that we don't break compilations that pass flags that are
554-
// incompatible with the NVPTX TC (e.g. -mthread-model single).
555-
if (llvm::any_of(Inputs, [](const std::pair<types::ID, const Arg *> &I) {
556-
return I.first == types::TY_CUDA || I.first == types::TY_PP_CUDA ||
557-
I.first == types::TY_CUDA_DEVICE;
558-
})) {
559-
C->setCudaDeviceToolChain(
560-
&getToolChain(C->getArgs(), llvm::Triple(TC.getTriple().isArch64Bit()
561-
? "nvptx64-nvidia-cuda"
562-
: "nvptx-nvidia-cuda")));
563-
}
577+
// Populate the tool chains for the offloading devices, if any.
578+
CreateOffloadingDeviceToolChains(*C, Inputs);
564579

565580
// Construct the list of abstract actions to perform for this compilation. On
566581
// MachO targets this uses the driver-driver and universal actions.
@@ -1390,7 +1405,7 @@ static Action *buildCudaActions(Compilation &C, DerivedArgList &Args,
13901405
CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg));
13911406

13921407
// Build actions for all device inputs.
1393-
assert(C.getCudaDeviceToolChain() &&
1408+
assert(C.getSingleOffloadToolChain<Action::OFK_Cuda>() &&
13941409
"Missing toolchain for device-side compilation.");
13951410
ActionList CudaDeviceActions;
13961411
C.getDriver().BuildActions(C, Args, CudaDeviceInputs, CudaDeviceActions);
@@ -2031,7 +2046,7 @@ InputInfo Driver::BuildJobsForActionNoCache(
20312046
// Initial processing of CudaDeviceAction carries host params.
20322047
// Call BuildJobsForAction() again, now with correct device parameters.
20332048
InputInfo II = BuildJobsForAction(
2034-
C, *CDA->input_begin(), C.getCudaDeviceToolChain(),
2049+
C, *CDA->input_begin(), C.getSingleOffloadToolChain<Action::OFK_Cuda>(),
20352050
CDA->getGpuArchName(), CDA->isAtTopLevel(), /*MultipleArchs=*/true,
20362051
LinkingOutput, CachedResults);
20372052
// Currently II's Action is *CDA->input_begin(). Set it to CDA instead, so

Diff for: lib/Driver/Tools.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -3767,10 +3767,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
37673767
// particular compilation pass we're constructing here. For now we
37683768
// can check which toolchain we're using and pick the other one to
37693769
// extract the triple.
3770-
if (&getToolChain() == C.getCudaDeviceToolChain())
3771-
AuxToolChain = C.getCudaHostToolChain();
3772-
else if (&getToolChain() == C.getCudaHostToolChain())
3773-
AuxToolChain = C.getCudaDeviceToolChain();
3770+
if (&getToolChain() == C.getSingleOffloadToolChain<Action::OFK_Cuda>())
3771+
AuxToolChain = C.getOffloadingHostToolChain();
3772+
else if (&getToolChain() == C.getOffloadingHostToolChain())
3773+
AuxToolChain = C.getSingleOffloadToolChain<Action::OFK_Cuda>();
37743774
else
37753775
llvm_unreachable("Can't figure out CUDA compilation mode.");
37763776
assert(AuxToolChain != nullptr && "No aux toolchain.");

0 commit comments

Comments
 (0)