Skip to content

Commit c639953

Browse files
author
Diego Novillo
committed
Add a new function attribute 'cold' to functions.
Other than recognizing the attribute, the patch does little else. It changes the branch probability analyzer so that edges into blocks postdominated by a cold function are given low weight. Added analysis and code generation tests. Added documentation for the new attribute. llvm-svn: 182638
1 parent 646ec67 commit c639953

File tree

13 files changed

+200
-1
lines changed

13 files changed

+200
-1
lines changed

Diff for: llvm/docs/LangRef.rst

+5
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,11 @@ example:
812812
This attribute indicates that the inliner should attempt to inline
813813
this function into callers whenever possible, ignoring any active
814814
inlining size threshold for this caller.
815+
``cold``
816+
This attribute indicates that this function is rarely called. When
817+
computing edge weights, basic blocks post-dominated by a cold
818+
function call are also considered to be cold; and, thus, given low
819+
weight.
815820
``nonlazybind``
816821
This attribute suppresses lazy symbol binding for the function. This
817822
may make calls to the function faster, at the cost of extra program

Diff for: llvm/include/llvm-c/Core.h

+1
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ typedef enum {
166166
and the path forward agreed upon.
167167
LLVMAddressSafety = 1ULL << 32,
168168
LLVMStackProtectStrongAttribute = 1ULL<<33
169+
LLVMCold = 1ULL << 34
169170
*/
170171
} LLVMAttribute;
171172

Diff for: llvm/include/llvm/Analysis/BranchProbabilityInfo.h

+4
Original file line numberDiff line numberDiff line change
@@ -131,11 +131,15 @@ class BranchProbabilityInfo : public FunctionPass {
131131
/// \brief Track the set of blocks directly succeeded by a returning block.
132132
SmallPtrSet<BasicBlock *, 16> PostDominatedByUnreachable;
133133

134+
/// \brief Track the set of blocks that always lead to a cold call.
135+
SmallPtrSet<BasicBlock *, 16> PostDominatedByColdCall;
136+
134137
/// \brief Get sum of the block successors' weights.
135138
uint32_t getSumForBlock(const BasicBlock *BB) const;
136139

137140
bool calcUnreachableHeuristics(BasicBlock *BB);
138141
bool calcMetadataWeights(BasicBlock *BB);
142+
bool calcColdCallHeuristics(BasicBlock *BB);
139143
bool calcPointerHeuristics(BasicBlock *BB);
140144
bool calcLoopBranchHeuristics(BasicBlock *BB);
141145
bool calcZeroHeuristics(BasicBlock *BB);

Diff for: llvm/include/llvm/IR/Attributes.h

+1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ class Attribute {
6868
///< 0 means unaligned (different from align(1))
6969
AlwaysInline, ///< inline=always
7070
ByVal, ///< Pass structure by value
71+
Cold, ///< Marks function as being in a cold path.
7172
InlineHint, ///< Source said inlining was desirable
7273
InReg, ///< Force argument to be passed in register
7374
MinSize, ///< Function must be optimized for size first

Diff for: llvm/lib/Analysis/BranchProbabilityInfo.cpp

+81
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,20 @@ static const uint32_t UR_TAKEN_WEIGHT = 1;
6969
/// easily subsume it.
7070
static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1;
7171

72+
/// \brief Weight for a branch taken going into a cold block.
73+
///
74+
/// This is the weight for a branch taken toward a block marked
75+
/// cold. A block is marked cold if it's postdominated by a
76+
/// block containing a call to a cold function. Cold functions
77+
/// are those marked with attribute 'cold'.
78+
static const uint32_t CC_TAKEN_WEIGHT = 4;
79+
80+
/// \brief Weight for a branch not-taken into a cold block.
81+
///
82+
/// This is the weight for a branch not taken toward a block marked
83+
/// cold.
84+
static const uint32_t CC_NONTAKEN_WEIGHT = 64;
85+
7286
static const uint32_t PH_TAKEN_WEIGHT = 20;
7387
static const uint32_t PH_NONTAKEN_WEIGHT = 12;
7488

@@ -193,6 +207,69 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
193207
return true;
194208
}
195209

210+
/// \brief Calculate edge weights for edges leading to cold blocks.
211+
///
212+
/// A cold block is one post-dominated by a block with a call to a
213+
/// cold function. Those edges are unlikely to be taken, so we give
214+
/// them relatively low weight.
215+
///
216+
/// Return true if we could compute the weights for cold edges.
217+
/// Return false, otherwise.
218+
bool BranchProbabilityInfo::calcColdCallHeuristics(BasicBlock *BB) {
219+
TerminatorInst *TI = BB->getTerminator();
220+
if (TI->getNumSuccessors() == 0)
221+
return false;
222+
223+
// Determine which successors are post-dominated by a cold block.
224+
SmallVector<unsigned, 4> ColdEdges;
225+
ColdEdges.reserve(TI->getNumSuccessors());
226+
SmallVector<unsigned, 4> NormalEdges;
227+
NormalEdges.reserve(TI->getNumSuccessors());
228+
for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
229+
if (PostDominatedByColdCall.count(*I))
230+
ColdEdges.push_back(I.getSuccessorIndex());
231+
else
232+
NormalEdges.push_back(I.getSuccessorIndex());
233+
234+
// If all successors are in the set of blocks post-dominated by cold calls,
235+
// this block is in the set post-dominated by cold calls.
236+
if (ColdEdges.size() == TI->getNumSuccessors())
237+
PostDominatedByColdCall.insert(BB);
238+
else {
239+
// Otherwise, if the block itself contains a cold function, add it to the
240+
// set of blocks postdominated by a cold call.
241+
assert(!PostDominatedByColdCall.count(BB));
242+
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
243+
if (CallInst *CI = dyn_cast<CallInst>(I))
244+
if (CI->hasFnAttr(Attribute::Cold)) {
245+
PostDominatedByColdCall.insert(BB);
246+
break;
247+
}
248+
}
249+
250+
// Skip probabilities if this block has a single successor.
251+
if (TI->getNumSuccessors() == 1 || ColdEdges.empty())
252+
return false;
253+
254+
uint32_t ColdWeight =
255+
std::max(CC_TAKEN_WEIGHT / (unsigned) ColdEdges.size(), MIN_WEIGHT);
256+
for (SmallVector<unsigned, 4>::iterator I = ColdEdges.begin(),
257+
E = ColdEdges.end();
258+
I != E; ++I)
259+
setEdgeWeight(BB, *I, ColdWeight);
260+
261+
if (NormalEdges.empty())
262+
return true;
263+
uint32_t NormalWeight = std::max(
264+
CC_NONTAKEN_WEIGHT / (unsigned) NormalEdges.size(), NORMAL_WEIGHT);
265+
for (SmallVector<unsigned, 4>::iterator I = NormalEdges.begin(),
266+
E = NormalEdges.end();
267+
I != E; ++I)
268+
setEdgeWeight(BB, *I, NormalWeight);
269+
270+
return true;
271+
}
272+
196273
// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion
197274
// between two pointer or pointer and NULL will fail.
198275
bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) {
@@ -397,6 +474,7 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
397474
LastF = &F; // Store the last function we ran on for printing.
398475
LI = &getAnalysis<LoopInfo>();
399476
assert(PostDominatedByUnreachable.empty());
477+
assert(PostDominatedByColdCall.empty());
400478

401479
// Walk the basic blocks in post-order so that we can build up state about
402480
// the successors of a block iteratively.
@@ -408,6 +486,8 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
408486
continue;
409487
if (calcMetadataWeights(*I))
410488
continue;
489+
if (calcColdCallHeuristics(*I))
490+
continue;
411491
if (calcLoopBranchHeuristics(*I))
412492
continue;
413493
if (calcPointerHeuristics(*I))
@@ -420,6 +500,7 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
420500
}
421501

422502
PostDominatedByUnreachable.clear();
503+
PostDominatedByColdCall.clear();
423504
return false;
424505
}
425506

Diff for: llvm/lib/AsmParser/LLLexer.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,7 @@ lltok::Kind LLLexer::LexIdentifier() {
564564

565565
KEYWORD(alwaysinline);
566566
KEYWORD(byval);
567+
KEYWORD(cold);
567568
KEYWORD(inlinehint);
568569
KEYWORD(inreg);
569570
KEYWORD(minsize);

Diff for: llvm/lib/AsmParser/LLParser.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -909,6 +909,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
909909
continue;
910910
}
911911
case lltok::kw_alwaysinline: B.addAttribute(Attribute::AlwaysInline); break;
912+
case lltok::kw_cold: B.addAttribute(Attribute::Cold); break;
912913
case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break;
913914
case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break;
914915
case lltok::kw_naked: B.addAttribute(Attribute::Naked); break;
@@ -1222,6 +1223,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
12221223

12231224
case lltok::kw_alignstack:
12241225
case lltok::kw_alwaysinline:
1226+
case lltok::kw_cold:
12251227
case lltok::kw_inlinehint:
12261228
case lltok::kw_minsize:
12271229
case lltok::kw_naked:

Diff for: llvm/lib/AsmParser/LLToken.h

+1
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ namespace lltok {
9696
kw_alwaysinline,
9797
kw_sanitize_address,
9898
kw_byval,
99+
kw_cold,
99100
kw_inlinehint,
100101
kw_inreg,
101102
kw_minsize,

Diff for: llvm/lib/IR/Attributes.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
217217
return "uwtable";
218218
if (hasAttribute(Attribute::ZExt))
219219
return "zeroext";
220+
if (hasAttribute(Attribute::Cold))
221+
return "cold";
220222

221223
// FIXME: These should be output like this:
222224
//
@@ -396,6 +398,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
396398
case Attribute::SanitizeMemory: return 1ULL << 37;
397399
case Attribute::NoBuiltin: return 1ULL << 38;
398400
case Attribute::Returned: return 1ULL << 39;
401+
case Attribute::Cold: return 1ULL << 40;
399402
}
400403
llvm_unreachable("Unsupported attribute type");
401404
}

Diff for: llvm/lib/IR/Verifier.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -692,7 +692,8 @@ void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
692692
I->getKindAsEnum() == Attribute::SanitizeMemory ||
693693
I->getKindAsEnum() == Attribute::MinSize ||
694694
I->getKindAsEnum() == Attribute::NoDuplicate ||
695-
I->getKindAsEnum() == Attribute::NoBuiltin) {
695+
I->getKindAsEnum() == Attribute::NoBuiltin ||
696+
I->getKindAsEnum() == Attribute::Cold) {
696697
if (!isFunction)
697698
CheckFailed("Attribute '" + I->getKindAsString() +
698699
"' only applies to functions!", V);

Diff for: llvm/test/Analysis/BranchProbabilityInfo/basic.ll

+58
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,61 @@ return:
115115
}
116116

117117
!2 = metadata !{metadata !"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64}
118+
119+
declare void @coldfunc() cold
120+
121+
define i32 @test5(i32 %a, i32 %b, i1 %flag) {
122+
; CHECK: Printing analysis {{.*}} for function 'test5'
123+
entry:
124+
br i1 %flag, label %then, label %else
125+
; CHECK: edge entry -> then probability is 4 / 68
126+
; CHECK: edge entry -> else probability is 64 / 68
127+
128+
then:
129+
call void @coldfunc()
130+
br label %exit
131+
; CHECK: edge then -> exit probability is 16 / 16 = 100%
132+
133+
else:
134+
br label %exit
135+
; CHECK: edge else -> exit probability is 16 / 16 = 100%
136+
137+
exit:
138+
%result = phi i32 [ %a, %then ], [ %b, %else ]
139+
ret i32 %result
140+
}
141+
142+
declare i32 @regular_function(i32 %i)
143+
144+
define i32 @test_cold_call_sites(i32* %a) {
145+
; Test that edges to blocks post-dominated by cold call sites
146+
; are marked as not expected to be taken.
147+
; TODO(dnovillo) The calls to regular_function should not be merged, but
148+
; they are currently being merged. Convert this into a code generation test
149+
; after that is fixed.
150+
151+
; CHECK: Printing analysis {{.*}} for function 'test_cold_call_sites'
152+
; CHECK: edge entry -> then probability is 4 / 68 = 5.88235%
153+
; CHECK: edge entry -> else probability is 64 / 68 = 94.1176% [HOT edge]
154+
155+
entry:
156+
%gep1 = getelementptr i32* %a, i32 1
157+
%val1 = load i32* %gep1
158+
%cond1 = icmp ugt i32 %val1, 1
159+
br i1 %cond1, label %then, label %else
160+
161+
then:
162+
; This function is not declared cold, but this call site is.
163+
%val4 = call i32 @regular_function(i32 %val1) cold
164+
br label %exit
165+
166+
else:
167+
%gep2 = getelementptr i32* %a, i32 2
168+
%val2 = load i32* %gep2
169+
%val3 = call i32 @regular_function(i32 %val2)
170+
br label %exit
171+
172+
exit:
173+
%ret = phi i32 [ %val4, %then ], [ %val3, %else ]
174+
ret i32 %ret
175+
}

Diff for: llvm/test/CodeGen/X86/block-placement.ll

+32
Original file line numberDiff line numberDiff line change
@@ -1089,3 +1089,35 @@ while.end:
10891089
store double %rra.0, double* %arrayidx34, align 8
10901090
br label %for.cond
10911091
}
1092+
1093+
declare void @cold_function() cold
1094+
1095+
define i32 @test_cold_calls(i32* %a) {
1096+
; Test that edges to blocks post-dominated by cold calls are
1097+
; marked as not expected to be taken. They should be laid out
1098+
; at the bottom.
1099+
; CHECK: test_cold_calls:
1100+
; CHECK: %entry
1101+
; CHECK: %else
1102+
; CHECK: %exit
1103+
; CHECK: %then
1104+
1105+
entry:
1106+
%gep1 = getelementptr i32* %a, i32 1
1107+
%val1 = load i32* %gep1
1108+
%cond1 = icmp ugt i32 %val1, 1
1109+
br i1 %cond1, label %then, label %else
1110+
1111+
then:
1112+
call void @cold_function()
1113+
br label %exit
1114+
1115+
else:
1116+
%gep2 = getelementptr i32* %a, i32 2
1117+
%val2 = load i32* %gep2
1118+
br label %exit
1119+
1120+
exit:
1121+
%ret = phi i32 [ %val1, %then ], [ %val2, %else ]
1122+
ret i32 %ret
1123+
}

Diff for: llvm/test/Feature/cold.ll

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
2+
3+
; CHECK: @fun() #0
4+
define void @fun() #0 {
5+
ret void
6+
}
7+
8+
; CHECK: attributes #0 = { cold }
9+
attributes #0 = { cold }

0 commit comments

Comments
 (0)