Skip to content
This repository was archived by the owner on Nov 1, 2021. It is now read-only.

Commit 771d6cd

Browse files
author
Justin Lebar
committed
[CodeGen] [CUDA] Add the ability set default attrs on functions in linked modules.
Summary: Now when you ask clang to link in a bitcode module, you can tell it to set attributes on that module's functions to match what we would have set if we'd emitted those functions ourselves. This is particularly important for fast-math attributes in CUDA compilations. Each CUDA compilation links in libdevice, a bitcode library provided by nvidia as part of the CUDA distribution. Without this patch, if we have a user-function F that is compiled with -ffast-math that calls a function G from libdevice, F will have the unsafe-fp-math=true (etc.) attributes, but G will have no attributes. Since F calls G, the inliner will merge G's attributes into F's. It considers the lack of an unsafe-fp-math=true attribute on G to be tantamount to unsafe-fp-math=false, so it "merges" these by setting unsafe-fp-math=false on F. This then continues up the call graph, until every function that (transitively) calls something in libdevice gets unsafe-fp-math=false set, thus disabling fastmath in almost all CUDA code. Reviewers: echristo Subscribers: hfinkel, llvm-commits, mehdi_amini Differential Revision: https://reviews.llvm.org/D28538 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@293097 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent bc0be02 commit 771d6cd

File tree

7 files changed

+282
-150
lines changed

7 files changed

+282
-150
lines changed

include/clang/CodeGen/CodeGenAction.h

+20-10
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,28 @@ class BackendConsumer;
2323

2424
class CodeGenAction : public ASTFrontendAction {
2525
private:
26+
// Let BackendConsumer access LinkModule.
27+
friend class BackendConsumer;
28+
29+
/// Info about module to link into a module we're generating.
30+
struct LinkModule {
31+
/// The module to link in.
32+
std::unique_ptr<llvm::Module> Module;
33+
34+
/// If true, we set attributes on Module's functions according to our
35+
/// CodeGenOptions and LangOptions, as though we were generating the
36+
/// function ourselves.
37+
bool PropagateAttrs;
38+
39+
/// Bitwise combination of llvm::LinkerFlags used when we link the module.
40+
unsigned LinkFlags;
41+
};
42+
2643
unsigned Act;
2744
std::unique_ptr<llvm::Module> TheModule;
28-
// Vector of {Linker::Flags, Module*} pairs to specify bitcode
29-
// modules to link in using corresponding linker flags.
30-
SmallVector<std::pair<unsigned, llvm::Module *>, 4> LinkModules;
45+
46+
/// Bitcode modules to link in to our module.
47+
SmallVector<LinkModule, 4> LinkModules;
3148
llvm::LLVMContext *VMContext;
3249
bool OwnsVMContext;
3350

@@ -51,13 +68,6 @@ class CodeGenAction : public ASTFrontendAction {
5168
public:
5269
~CodeGenAction() override;
5370

54-
/// setLinkModule - Set the link module to be used by this action. If a link
55-
/// module is not provided, and CodeGenOptions::LinkBitcodeFile is non-empty,
56-
/// the action will load it from the specified file.
57-
void addLinkModule(llvm::Module *Mod, unsigned LinkFlags) {
58-
LinkModules.push_back(std::make_pair(LinkFlags, Mod));
59-
}
60-
6171
/// Take the generated LLVM module, for use after the action has been run.
6272
/// The result may be null on failure.
6373
std::unique_ptr<llvm::Module> takeModule();

include/clang/Frontend/CodeGenOptions.h

+13-2
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,19 @@ class CodeGenOptions : public CodeGenOptionsBase {
130130
/// The float precision limit to use, if non-empty.
131131
std::string LimitFloatPrecision;
132132

133-
/// The name of the bitcode file to link before optzns.
134-
std::vector<std::pair<unsigned, std::string>> LinkBitcodeFiles;
133+
struct BitcodeFileToLink {
134+
/// The filename of the bitcode file to link in.
135+
std::string Filename;
136+
/// If true, we set attributes functions in the bitcode library according to
137+
/// our CodeGenOptions, much as we set attrs on functions that we generate
138+
/// ourselves.
139+
bool PropagateAttrs = false;
140+
/// Bitwise combination of llvm::Linker::Flags, passed to the LLVM linker.
141+
unsigned LinkFlags = 0;
142+
};
143+
144+
/// The files specified here are linked in to the module before optimizations.
145+
std::vector<BitcodeFileToLink> LinkBitcodeFiles;
135146

136147
/// The user provided name for the "main file", if non-empty. This is useful
137148
/// in situations where the input file name does not match the original input

lib/CodeGen/CGCall.cpp

+108-93
Original file line numberDiff line numberDiff line change
@@ -1620,15 +1620,113 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx,
16201620
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
16211621
}
16221622

1623+
void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
1624+
bool AttrOnCallSite,
1625+
llvm::AttrBuilder &FuncAttrs) {
1626+
// OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed.
1627+
if (!HasOptnone) {
1628+
if (CodeGenOpts.OptimizeSize)
1629+
FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize);
1630+
if (CodeGenOpts.OptimizeSize == 2)
1631+
FuncAttrs.addAttribute(llvm::Attribute::MinSize);
1632+
}
1633+
1634+
if (CodeGenOpts.DisableRedZone)
1635+
FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
1636+
if (CodeGenOpts.NoImplicitFloat)
1637+
FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);
1638+
1639+
if (AttrOnCallSite) {
1640+
// Attributes that should go on the call site only.
1641+
if (!CodeGenOpts.SimplifyLibCalls ||
1642+
CodeGenOpts.isNoBuiltinFunc(Name.data()))
1643+
FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin);
1644+
if (!CodeGenOpts.TrapFuncName.empty())
1645+
FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName);
1646+
} else {
1647+
// Attributes that should go on the function, but not the call site.
1648+
if (!CodeGenOpts.DisableFPElim) {
1649+
FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
1650+
} else if (CodeGenOpts.OmitLeafFramePointer) {
1651+
FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
1652+
FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
1653+
} else {
1654+
FuncAttrs.addAttribute("no-frame-pointer-elim", "true");
1655+
FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
1656+
}
1657+
1658+
FuncAttrs.addAttribute("less-precise-fpmad",
1659+
llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
1660+
1661+
if (!CodeGenOpts.FPDenormalMode.empty())
1662+
FuncAttrs.addAttribute("denormal-fp-math", CodeGenOpts.FPDenormalMode);
1663+
1664+
FuncAttrs.addAttribute("no-trapping-math",
1665+
llvm::toStringRef(CodeGenOpts.NoTrappingMath));
1666+
1667+
// TODO: Are these all needed?
1668+
// unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags.
1669+
FuncAttrs.addAttribute("no-infs-fp-math",
1670+
llvm::toStringRef(CodeGenOpts.NoInfsFPMath));
1671+
FuncAttrs.addAttribute("no-nans-fp-math",
1672+
llvm::toStringRef(CodeGenOpts.NoNaNsFPMath));
1673+
FuncAttrs.addAttribute("unsafe-fp-math",
1674+
llvm::toStringRef(CodeGenOpts.UnsafeFPMath));
1675+
FuncAttrs.addAttribute("use-soft-float",
1676+
llvm::toStringRef(CodeGenOpts.SoftFloat));
1677+
FuncAttrs.addAttribute("stack-protector-buffer-size",
1678+
llvm::utostr(CodeGenOpts.SSPBufferSize));
1679+
FuncAttrs.addAttribute("no-signed-zeros-fp-math",
1680+
llvm::toStringRef(CodeGenOpts.NoSignedZeros));
1681+
FuncAttrs.addAttribute(
1682+
"correctly-rounded-divide-sqrt-fp-math",
1683+
llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt));
1684+
1685+
// TODO: Reciprocal estimate codegen options should apply to instructions?
1686+
std::vector<std::string> &Recips = getTarget().getTargetOpts().Reciprocals;
1687+
if (!Recips.empty())
1688+
FuncAttrs.addAttribute("reciprocal-estimates",
1689+
llvm::join(Recips.begin(), Recips.end(), ","));
1690+
1691+
if (CodeGenOpts.StackRealignment)
1692+
FuncAttrs.addAttribute("stackrealign");
1693+
if (CodeGenOpts.Backchain)
1694+
FuncAttrs.addAttribute("backchain");
1695+
}
1696+
1697+
if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
1698+
// Conservatively, mark all functions and calls in CUDA as convergent
1699+
// (meaning, they may call an intrinsically convergent op, such as
1700+
// __syncthreads(), and so can't have certain optimizations applied around
1701+
// them). LLVM will remove this attribute where it safely can.
1702+
FuncAttrs.addAttribute(llvm::Attribute::Convergent);
1703+
1704+
// Exceptions aren't supported in CUDA device code.
1705+
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
1706+
1707+
// Respect -fcuda-flush-denormals-to-zero.
1708+
if (getLangOpts().CUDADeviceFlushDenormalsToZero)
1709+
FuncAttrs.addAttribute("nvptx-f32ftz", "true");
1710+
}
1711+
}
1712+
1713+
void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) {
1714+
llvm::AttrBuilder FuncAttrs;
1715+
ConstructDefaultFnAttrList(F.getName(),
1716+
F.hasFnAttribute(llvm::Attribute::OptimizeNone),
1717+
/* AttrOnCallsite = */ false, FuncAttrs);
1718+
llvm::AttributeSet AS = llvm::AttributeSet::get(
1719+
getLLVMContext(), llvm::AttributeSet::FunctionIndex, FuncAttrs);
1720+
F.addAttributes(llvm::AttributeSet::FunctionIndex, AS);
1721+
}
1722+
16231723
void CodeGenModule::ConstructAttributeList(
16241724
StringRef Name, const CGFunctionInfo &FI, CGCalleeInfo CalleeInfo,
16251725
AttributeListType &PAL, unsigned &CallingConv, bool AttrOnCallSite) {
16261726
llvm::AttrBuilder FuncAttrs;
16271727
llvm::AttrBuilder RetAttrs;
1628-
bool HasOptnone = false;
16291728

16301729
CallingConv = FI.getEffectiveCallingConvention();
1631-
16321730
if (FI.isNoReturn())
16331731
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
16341732

@@ -1639,7 +1737,7 @@ void CodeGenModule::ConstructAttributeList(
16391737

16401738
const Decl *TargetDecl = CalleeInfo.getCalleeDecl();
16411739

1642-
bool HasAnyX86InterruptAttr = false;
1740+
bool HasOptnone = false;
16431741
// FIXME: handle sseregparm someday...
16441742
if (TargetDecl) {
16451743
if (TargetDecl->hasAttr<ReturnsTwiceAttr>())
@@ -1679,7 +1777,6 @@ void CodeGenModule::ConstructAttributeList(
16791777
if (TargetDecl->hasAttr<ReturnsNonNullAttr>())
16801778
RetAttrs.addAttribute(llvm::Attribute::NonNull);
16811779

1682-
HasAnyX86InterruptAttr = TargetDecl->hasAttr<AnyX86InterruptAttr>();
16831780
HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
16841781
if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) {
16851782
Optional<unsigned> NumElemsParam;
@@ -1691,86 +1788,19 @@ void CodeGenModule::ConstructAttributeList(
16911788
}
16921789
}
16931790

1694-
// OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed.
1695-
if (!HasOptnone) {
1696-
if (CodeGenOpts.OptimizeSize)
1697-
FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize);
1698-
if (CodeGenOpts.OptimizeSize == 2)
1699-
FuncAttrs.addAttribute(llvm::Attribute::MinSize);
1700-
}
1791+
ConstructDefaultFnAttrList(Name, HasOptnone, AttrOnCallSite, FuncAttrs);
17011792

1702-
if (CodeGenOpts.DisableRedZone)
1703-
FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
1704-
if (CodeGenOpts.NoImplicitFloat)
1705-
FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);
17061793
if (CodeGenOpts.EnableSegmentedStacks &&
17071794
!(TargetDecl && TargetDecl->hasAttr<NoSplitStackAttr>()))
17081795
FuncAttrs.addAttribute("split-stack");
17091796

1710-
if (AttrOnCallSite) {
1711-
// Attributes that should go on the call site only.
1712-
if (!CodeGenOpts.SimplifyLibCalls ||
1713-
CodeGenOpts.isNoBuiltinFunc(Name.data()))
1714-
FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin);
1715-
if (!CodeGenOpts.TrapFuncName.empty())
1716-
FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName);
1717-
} else {
1718-
// Attributes that should go on the function, but not the call site.
1719-
if (!CodeGenOpts.DisableFPElim) {
1720-
FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
1721-
} else if (CodeGenOpts.OmitLeafFramePointer) {
1722-
FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
1723-
FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
1724-
} else {
1725-
FuncAttrs.addAttribute("no-frame-pointer-elim", "true");
1726-
FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
1727-
}
1728-
1797+
if (!AttrOnCallSite) {
17291798
bool DisableTailCalls =
1730-
CodeGenOpts.DisableTailCalls || HasAnyX86InterruptAttr ||
1731-
(TargetDecl && TargetDecl->hasAttr<DisableTailCallsAttr>());
1732-
FuncAttrs.addAttribute(
1733-
"disable-tail-calls",
1734-
llvm::toStringRef(DisableTailCalls));
1735-
1736-
FuncAttrs.addAttribute("less-precise-fpmad",
1737-
llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
1738-
1739-
if (!CodeGenOpts.FPDenormalMode.empty())
1740-
FuncAttrs.addAttribute("denormal-fp-math",
1741-
CodeGenOpts.FPDenormalMode);
1742-
1743-
FuncAttrs.addAttribute("no-trapping-math",
1744-
llvm::toStringRef(CodeGenOpts.NoTrappingMath));
1745-
1746-
// TODO: Are these all needed?
1747-
// unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags.
1748-
FuncAttrs.addAttribute("no-infs-fp-math",
1749-
llvm::toStringRef(CodeGenOpts.NoInfsFPMath));
1750-
FuncAttrs.addAttribute("no-nans-fp-math",
1751-
llvm::toStringRef(CodeGenOpts.NoNaNsFPMath));
1752-
FuncAttrs.addAttribute("unsafe-fp-math",
1753-
llvm::toStringRef(CodeGenOpts.UnsafeFPMath));
1754-
FuncAttrs.addAttribute("use-soft-float",
1755-
llvm::toStringRef(CodeGenOpts.SoftFloat));
1756-
FuncAttrs.addAttribute("stack-protector-buffer-size",
1757-
llvm::utostr(CodeGenOpts.SSPBufferSize));
1758-
FuncAttrs.addAttribute("no-signed-zeros-fp-math",
1759-
llvm::toStringRef(CodeGenOpts.NoSignedZeros));
1760-
FuncAttrs.addAttribute(
1761-
"correctly-rounded-divide-sqrt-fp-math",
1762-
llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt));
1763-
1764-
// TODO: Reciprocal estimate codegen options should apply to instructions?
1765-
std::vector<std::string> &Recips = getTarget().getTargetOpts().Reciprocals;
1766-
if (!Recips.empty())
1767-
FuncAttrs.addAttribute("reciprocal-estimates",
1768-
llvm::join(Recips.begin(), Recips.end(), ","));
1769-
1770-
if (CodeGenOpts.StackRealignment)
1771-
FuncAttrs.addAttribute("stackrealign");
1772-
if (CodeGenOpts.Backchain)
1773-
FuncAttrs.addAttribute("backchain");
1799+
CodeGenOpts.DisableTailCalls ||
1800+
(TargetDecl && (TargetDecl->hasAttr<DisableTailCallsAttr>() ||
1801+
TargetDecl->hasAttr<AnyX86InterruptAttr>()));
1802+
FuncAttrs.addAttribute("disable-tail-calls",
1803+
llvm::toStringRef(DisableTailCalls));
17741804

17751805
// Add target-cpu and target-features attributes to functions. If
17761806
// we have a decl for the function and it has a target attribute then
@@ -1819,21 +1849,6 @@ void CodeGenModule::ConstructAttributeList(
18191849
}
18201850
}
18211851

1822-
if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
1823-
// Conservatively, mark all functions and calls in CUDA as convergent
1824-
// (meaning, they may call an intrinsically convergent op, such as
1825-
// __syncthreads(), and so can't have certain optimizations applied around
1826-
// them). LLVM will remove this attribute where it safely can.
1827-
FuncAttrs.addAttribute(llvm::Attribute::Convergent);
1828-
1829-
// Exceptions aren't supported in CUDA device code.
1830-
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
1831-
1832-
// Respect -fcuda-flush-denormals-to-zero.
1833-
if (getLangOpts().CUDADeviceFlushDenormalsToZero)
1834-
FuncAttrs.addAttribute("nvptx-f32ftz", "true");
1835-
}
1836-
18371852
ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
18381853

18391854
QualType RetTy = FI.getReturnType();

0 commit comments

Comments
 (0)