27
27
#include < algorithm>
28
28
#include < cassert>
29
29
#include < cstdint>
30
+ #include < memory>
30
31
#include < utility>
31
32
32
33
namespace llvm {
@@ -35,6 +36,7 @@ class Function;
35
36
class Loop ;
36
37
class LoopInfo ;
37
38
class raw_ostream ;
39
+ class DominatorTree ;
38
40
class PostDominatorTree ;
39
41
class TargetLibraryInfo ;
40
42
class Value ;
@@ -51,29 +53,87 @@ class Value;
51
53
// / identify an edge, since we can have multiple edges from Src to Dst.
52
54
// / As an example, we can have a switch which jumps to Dst with value 0 and
53
55
// / value 10.
56
+ // /
57
+ // / Process of computing branch probabilities can be logically viewed as three
58
+ // / step process:
59
+ // /
60
+ // / First, if there is a profile information associated with the branch then
61
+ // / it is trivially translated to branch probabilities. There is one exception
62
+ // / from this rule though. Probabilities for edges leading to "unreachable"
63
+ // / blocks (blocks with the estimated weight not greater than
64
+ // / UNREACHABLE_WEIGHT) are evaluated according to static estimation and
65
+ // / override profile information. If no branch probabilities were calculated
66
+ // / on this step then take the next one.
67
+ // /
68
+ // / Second, estimate absolute execution weights for each block based on
69
+ // / statically known information. Roots of such information are "cold",
70
+ // / "unreachable", "noreturn" and "unwind" blocks. Those blocks get their
71
+ // / weights set to BlockExecWeight::COLD, BlockExecWeight::UNREACHABLE,
72
+ // / BlockExecWeight::NORETURN and BlockExecWeight::UNWIND respectively. Then the
73
+ // / weights are propagated to the other blocks up the domination line. In
74
+ // / addition, if all successors have estimated weights set then maximum of these
75
+ // / weights assigned to the block itself (while this is not ideal heuristic in
76
+ // / theory it's simple and works reasonably well in most cases) and the process
77
+ // / repeats. Once the process of weights propagation converges branch
78
+ // / probabilities are set for all such branches that have at least one successor
79
+ // / with the weight set. Default execution weight (BlockExecWeight::DEFAULT) is
80
+ // / used for any successors which doesn't have its weight set. For loop back
81
+ // / branches we use their weights scaled by loop trip count equal to
82
+ // / 'LBH_TAKEN_WEIGHT/LBH_NOTTAKEN_WEIGHT'.
83
+ // /
84
+ // / Here is a simple example demonstrating how the described algorithm works.
85
+ // /
86
+ // / BB1
87
+ // / / \
88
+ /// v v
89
+ // / BB2 BB3
90
+ // / / \
91
+ /// v v
92
+ // / ColdBB UnreachBB
93
+ // /
94
+ // / Initially, ColdBB is associated with COLD_WEIGHT and UnreachBB with
95
+ // / UNREACHABLE_WEIGHT. COLD_WEIGHT is set to BB2 as maximum between its
96
+ // / successors. BB1 and BB3 has no explicit estimated weights and assumed to
97
+ // / have DEFAULT_WEIGHT. Based on assigned weights branches will have the
98
+ // / following probabilities:
99
+ // / P(BB1->BB2) = COLD_WEIGHT/(COLD_WEIGHT + DEFAULT_WEIGHT) =
100
+ // / 0xffff / (0xffff + 0xfffff) = 0.0588(5.9%)
101
+ // / P(BB1->BB3) = DEFAULT_WEIGHT_WEIGHT/(COLD_WEIGHT + DEFAULT_WEIGHT) =
102
+ // / 0xfffff / (0xffff + 0xfffff) = 0.941(94.1%)
103
+ // / P(BB2->ColdBB) = COLD_WEIGHT/(COLD_WEIGHT + UNREACHABLE_WEIGHT) = 1(100%)
104
+ // / P(BB2->UnreachBB) =
105
+ // / UNREACHABLE_WEIGHT/(COLD_WEIGHT+UNREACHABLE_WEIGHT) = 0(0%)
106
+ // /
107
+ // / If no branch probabilities were calculated on this step then take the next
108
+ // / one.
109
+ // /
110
+ // / Third, apply different kinds of local heuristics for each individual
111
+ // / branch until first match. For example probability of a pointer to be null is
112
+ // / estimated as PH_TAKEN_WEIGHT/(PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT). If
113
+ // / no local heuristic has been matched then branch is left with no explicit
114
+ // / probability set and assumed to have default probability.
54
115
class BranchProbabilityInfo {
55
116
public:
56
117
BranchProbabilityInfo () = default ;
57
118
58
119
BranchProbabilityInfo (const Function &F, const LoopInfo &LI,
59
120
const TargetLibraryInfo *TLI = nullptr ,
121
+ DominatorTree *DT = nullptr ,
60
122
PostDominatorTree *PDT = nullptr ) {
61
- calculate (F, LI, TLI, PDT);
123
+ calculate (F, LI, TLI, DT, PDT);
62
124
}
63
125
64
126
BranchProbabilityInfo (BranchProbabilityInfo &&Arg)
65
127
: Probs(std::move(Arg.Probs)), LastF(Arg.LastF),
66
- PostDominatedByUnreachable (std::move(Arg.PostDominatedByUnreachable)),
67
- PostDominatedByColdCall(std::move(Arg.PostDominatedByColdCall)) {}
128
+ EstimatedBlockWeight (std::move(Arg.EstimatedBlockWeight)) {}
68
129
69
130
BranchProbabilityInfo (const BranchProbabilityInfo &) = delete;
70
131
BranchProbabilityInfo &operator =(const BranchProbabilityInfo &) = delete ;
71
132
72
133
BranchProbabilityInfo &operator =(BranchProbabilityInfo &&RHS) {
73
134
releaseMemory ();
74
135
Probs = std::move (RHS.Probs );
75
- PostDominatedByColdCall = std::move (RHS.PostDominatedByColdCall );
76
- PostDominatedByUnreachable = std::move (RHS.PostDominatedByUnreachable );
136
+ EstimatedBlockWeight = std::move (RHS.EstimatedBlockWeight );
77
137
return *this ;
78
138
}
79
139
@@ -143,11 +203,13 @@ class BranchProbabilityInfo {
143
203
}
144
204
145
205
void calculate (const Function &F, const LoopInfo &LI,
146
- const TargetLibraryInfo *TLI, PostDominatorTree *PDT);
206
+ const TargetLibraryInfo *TLI, DominatorTree *DT,
207
+ PostDominatorTree *PDT);
147
208
148
209
// / Forget analysis results for the given basic block.
149
210
void eraseBlock (const BasicBlock *BB);
150
211
212
+ // Data structure to track SCCs for handling irreducible loops.
151
213
class SccInfo {
152
214
// Enum of types to classify basic blocks in SCC. Basic block belonging to
153
215
// SCC is 'Inner' until it is either 'Header' or 'Exiting'. Note that a
@@ -236,6 +298,8 @@ class BranchProbabilityInfo {
236
298
const SccInfo &SccI);
237
299
238
300
const BasicBlock *getBlock () const { return BB; }
301
+ BasicBlock *getBlock () { return const_cast <BasicBlock *>(BB); }
302
+ LoopData getLoopData () const { return LD; }
239
303
Loop *getLoop () const { return LD.first ; }
240
304
int getSccNum () const { return LD.second ; }
241
305
@@ -249,6 +313,7 @@ class BranchProbabilityInfo {
249
313
const BasicBlock *const BB = nullptr ;
250
314
LoopData LD = {nullptr , -1 };
251
315
};
316
+
252
317
// Pair of LoopBlocks representing an edge from first to second block.
253
318
using LoopEdge = std::pair<const LoopBlock &, const LoopBlock &>;
254
319
@@ -258,27 +323,26 @@ class BranchProbabilityInfo {
258
323
// a pair (PredBlock and an index in the successors) to specify an edge.
259
324
using Edge = std::pair<const BasicBlock *, unsigned >;
260
325
261
- // Default weight value. Used when we don't have information about the edge.
262
- // TODO: DEFAULT_WEIGHT makes sense during static predication, when none of
263
- // the successors have a weight yet. But it doesn't make sense when providing
264
- // weight to an edge that may have siblings with non-zero weights. This can
265
- // be handled various ways, but it's probably fine for an edge with unknown
266
- // weight to just "inherit" the non-zero weight of an adjacent successor.
267
- static const uint32_t DEFAULT_WEIGHT = 16 ;
268
-
269
326
DenseMap<Edge, BranchProbability> Probs;
270
327
271
328
// / Track the last function we run over for printing.
272
329
const Function *LastF = nullptr ;
273
330
331
+ const LoopInfo *LI = nullptr ;
332
+
274
333
// / Keeps information about all SCCs in a function.
275
334
std::unique_ptr<const SccInfo> SccI;
276
335
277
- // / Track the set of blocks directly succeeded by a returning block.
278
- SmallPtrSet<const BasicBlock *, 16 > PostDominatedByUnreachable;
336
+ // / Keeps mapping of a basic block to its estimated weight.
337
+ SmallDenseMap<const BasicBlock *, uint32_t > EstimatedBlockWeight;
338
+
339
+ // / Keeps mapping of a loop to estimated weight to enter the loop.
340
+ SmallDenseMap<LoopData, uint32_t > EstimatedLoopWeight;
279
341
280
- // / Track the set of blocks that always lead to a cold call.
281
- SmallPtrSet<const BasicBlock *, 16 > PostDominatedByColdCall;
342
+ // / Helper to construct LoopBlock for \p BB.
343
+ LoopBlock getLoopBlock (const BasicBlock *BB) const {
344
+ return LoopBlock (BB, *LI, *SccI.get ());
345
+ }
282
346
283
347
// / Returns true if destination block belongs to some loop and source block is
284
348
// / either doesn't belong to any loop or belongs to a loop which is not inner
@@ -301,18 +365,55 @@ class BranchProbabilityInfo {
301
365
void getLoopExitBlocks (const LoopBlock &LB,
302
366
SmallVectorImpl<BasicBlock *> &Exits) const ;
303
367
304
- void computePostDominatedByUnreachable (const Function &F,
305
- PostDominatorTree *PDT);
306
- void computePostDominatedByColdCall (const Function &F,
307
- PostDominatorTree *PDT);
308
- bool calcUnreachableHeuristics (const BasicBlock *BB);
368
+ // / Returns estimated weight for \p BB. None if \p BB has no estimated weight.
369
+ Optional<uint32_t > getEstimatedBlockWeight (const BasicBlock *BB) const ;
370
+
371
+ // / Returns estimated weight to enter \p L. In other words it is weight of
372
+ // / loop's header block not scaled by trip count. Returns None if \p L has no
373
+ // / no estimated weight.
374
+ Optional<uint32_t > getEstimatedLoopWeight (const LoopData &L) const ;
375
+
376
+ // / Return estimated weight for \p Edge. Returns None if estimated weight is
377
+ // / unknown.
378
+ Optional<uint32_t > getEstimatedEdgeWeight (const LoopEdge &Edge) const ;
379
+
380
+ // / Iterates over all edges leading from \p SrcBB to \p Successors and
381
+ // / returns maximum of all estimated weights. If at least one edge has unknown
382
+ // / estimated weight None is returned.
383
+ template <class IterT >
384
+ Optional<uint32_t >
385
+ getMaxEstimatedEdgeWeight (const LoopBlock &SrcBB,
386
+ iterator_range<IterT> Successors) const ;
387
+
388
+ // / If \p LoopBB has no estimated weight then set it to \p BBWeight and
389
+ // / return true. Otherwise \p BB's weight remains unchanged and false is
390
+ // / returned. In addition all blocks/loops that might need their weight to be
391
+ // / re-estimated are put into BlockWorkList/LoopWorkList.
392
+ bool updateEstimatedBlockWeight (LoopBlock &LoopBB, uint32_t BBWeight,
393
+ SmallVectorImpl<BasicBlock *> &BlockWorkList,
394
+ SmallVectorImpl<LoopBlock> &LoopWorkList);
395
+
396
+ // / Starting from \p LoopBB (including \p LoopBB itself) propagate \p BBWeight
397
+ // / up the domination tree.
398
+ void propagateEstimatedBlockWeight (const LoopBlock &LoopBB, DominatorTree *DT,
399
+ PostDominatorTree *PDT, uint32_t BBWeight,
400
+ SmallVectorImpl<BasicBlock *> &WorkList,
401
+ SmallVectorImpl<LoopBlock> &LoopWorkList);
402
+
403
+ // / Returns block's weight encoded in the IR.
404
+ Optional<uint32_t > getInitialEstimatedBlockWeight (const BasicBlock *BB);
405
+
406
+ // Computes estimated weights for all blocks in \p F.
407
+ void computeEestimateBlockWeight (const Function &F, DominatorTree *DT,
408
+ PostDominatorTree *PDT);
409
+
410
+ // / Based on computed weights by \p computeEstimatedBlockWeight set
411
+ // / probabilities on branches.
412
+ bool calcEstimatedHeuristics (const BasicBlock *BB);
309
413
bool calcMetadataWeights (const BasicBlock *BB);
310
- bool calcColdCallHeuristics (const BasicBlock *BB);
311
414
bool calcPointerHeuristics (const BasicBlock *BB);
312
- bool calcLoopBranchHeuristics (const BasicBlock *BB, const LoopInfo &LI);
313
415
bool calcZeroHeuristics (const BasicBlock *BB, const TargetLibraryInfo *TLI);
314
416
bool calcFloatingPointHeuristics (const BasicBlock *BB);
315
- bool calcInvokeHeuristics (const BasicBlock *BB);
316
417
};
317
418
318
419
// / Analysis pass which computes \c BranchProbabilityInfo.
0 commit comments