-
Notifications
You must be signed in to change notification settings - Fork 10.5k
/
Copy pathPerformanceInliner.cpp
1591 lines (1349 loc) · 54.6 KB
/
PerformanceInliner.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//===--- PerformanceInliner.cpp - Basic cost based performance inlining ---===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "sil-inliner"
#include "swift/SIL/SILInstruction.h"
#include "swift/SIL/Dominance.h"
#include "swift/SIL/SILModule.h"
#include "swift/SIL/Projection.h"
#include "swift/SIL/InstructionUtils.h"
#include "swift/SILOptimizer/Analysis/ColdBlockInfo.h"
#include "swift/SILOptimizer/Analysis/DominanceAnalysis.h"
#include "swift/SILOptimizer/Analysis/FunctionOrder.h"
#include "swift/SILOptimizer/Analysis/LoopAnalysis.h"
#include "swift/SILOptimizer/Analysis/ArraySemantic.h"
#include "swift/SILOptimizer/PassManager/Passes.h"
#include "swift/SILOptimizer/PassManager/Transforms.h"
#include "swift/SILOptimizer/Utils/Local.h"
#include "swift/SILOptimizer/Utils/ConstantFolding.h"
#include "swift/SILOptimizer/Utils/SILInliner.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/ADT/MapVector.h"
#include <functional>
using namespace swift;
STATISTIC(NumFunctionsInlined, "Number of functions inlined");
llvm::cl::opt<bool> PrintShortestPathInfo(
"print-shortest-path-info", llvm::cl::init(false),
llvm::cl::desc("Print shortest-path information for inlining"));
//===----------------------------------------------------------------------===//
// ConstantTracker
//===----------------------------------------------------------------------===//
// Tracks constants in the caller and callee to get an estimation of what
// values get constant if the callee is inlined.
// This can be seen as a "simulation" of several optimizations: SROA, mem2reg
// and constant propagation.
// Note that this is only a simplified model and not correct in all cases.
// For example aliasing information is not taken into account.
class ConstantTracker {
// Represents a value in integer constant evaluation.
struct IntConst {
IntConst() : isValid(false), isFromCaller(false) { }
IntConst(const APInt &value, bool isFromCaller) :
value(value), isValid(true), isFromCaller(isFromCaller) { }
// The actual value.
APInt value;
// True if the value is valid, i.e. could be evaluated to a constant.
bool isValid;
// True if the value is only valid, because a constant is passed to the
// callee. False if constant propagation could do the same job inside the
// callee without inlining it.
bool isFromCaller;
};
// Links between loaded and stored values.
// The key is a load instruction, the value is the corresponding store
// instruction which stores the loaded value. Both, key and value can also
// be copy_addr instructions.
llvm::DenseMap<SILInstruction *, SILInstruction *> links;
// The current stored values at memory addresses.
// The key is the base address of the memory (after skipping address
// projections). The value are store (or copy_addr) instructions, which
// store the current value.
// This is only an estimation, because e.g. it does not consider potential
// aliasing.
llvm::DenseMap<SILValue, SILInstruction *> memoryContent;
// Cache for evaluated constants.
llvm::SmallDenseMap<BuiltinInst *, IntConst> constCache;
// The caller/callee function which is tracked.
SILFunction *F;
// The constant tracker of the caller function (null if this is the
// tracker of the callee).
ConstantTracker *callerTracker;
// The apply instruction in the caller (null if this is the tracker of the
// callee).
FullApplySite AI;
// Walks through address projections and (optionally) collects them.
// Returns the base address, i.e. the first address which is not a
// projection.
SILValue scanProjections(SILValue addr,
SmallVectorImpl<Projection> *Result = nullptr);
// Get the stored value for a load. The loadInst can be either a real load
// or a copy_addr.
SILValue getStoredValue(SILInstruction *loadInst,
ProjectionPath &projStack);
// Gets the parameter in the caller for a function argument.
SILValue getParam(SILValue value) {
if (SILArgument *arg = dyn_cast<SILArgument>(value)) {
if (AI && arg->isFunctionArg() && arg->getFunction() == F) {
// Continue at the caller.
return AI.getArgument(arg->getIndex());
}
}
return SILValue();
}
SILInstruction *getMemoryContent(SILValue addr) {
// The memory content can be stored in this ConstantTracker or in the
// caller's ConstantTracker.
SILInstruction *storeInst = memoryContent[addr];
if (storeInst)
return storeInst;
if (callerTracker)
return callerTracker->getMemoryContent(addr);
return nullptr;
}
// Gets the estimated definition of a value.
SILInstruction *getDef(SILValue val, ProjectionPath &projStack);
// Gets the estimated integer constant result of a builtin.
IntConst getBuiltinConst(BuiltinInst *BI, int depth);
public:
// Constructor for the caller function.
ConstantTracker(SILFunction *function) :
F(function), callerTracker(nullptr), AI()
{ }
// Constructor for the callee function.
ConstantTracker(SILFunction *function, ConstantTracker *caller,
FullApplySite callerApply) :
F(function), callerTracker(caller), AI(callerApply)
{ }
void beginBlock() {
// Currently we don't do any sophisticated dataflow analysis, so we keep
// the memoryContent alive only for a single block.
memoryContent.clear();
}
// Must be called for each instruction visited in dominance order.
void trackInst(SILInstruction *inst);
// Gets the estimated definition of a value.
SILInstruction *getDef(SILValue val) {
ProjectionPath projStack(val->getType());
return getDef(val, projStack);
}
// Gets the estimated definition of a value if it is in the caller.
SILInstruction *getDefInCaller(SILValue val) {
SILInstruction *def = getDef(val);
if (def && def->getFunction() != F)
return def;
return nullptr;
}
bool isStackAddrInCaller(SILValue val) {
if (SILValue Param = getParam(stripAddressProjections(val))) {
return isa<AllocStackInst>(stripAddressProjections(Param));
}
return false;
}
// Gets the estimated integer constant of a value.
IntConst getIntConst(SILValue val, int depth = 0);
SILBasicBlock *getTakenBlock(TermInst *term);
};
void ConstantTracker::trackInst(SILInstruction *inst) {
if (auto *LI = dyn_cast<LoadInst>(inst)) {
SILValue baseAddr = scanProjections(LI->getOperand());
if (SILInstruction *loadLink = getMemoryContent(baseAddr))
links[LI] = loadLink;
} else if (StoreInst *SI = dyn_cast<StoreInst>(inst)) {
SILValue baseAddr = scanProjections(SI->getOperand(1));
memoryContent[baseAddr] = SI;
} else if (CopyAddrInst *CAI = dyn_cast<CopyAddrInst>(inst)) {
if (!CAI->isTakeOfSrc()) {
// Treat a copy_addr as a load + store
SILValue loadAddr = scanProjections(CAI->getOperand(0));
if (SILInstruction *loadLink = getMemoryContent(loadAddr)) {
links[CAI] = loadLink;
SILValue storeAddr = scanProjections(CAI->getOperand(1));
memoryContent[storeAddr] = CAI;
}
}
}
}
SILValue ConstantTracker::scanProjections(SILValue addr,
SmallVectorImpl<Projection> *Result) {
for (;;) {
if (Projection::isAddressProjection(addr)) {
SILInstruction *I = cast<SILInstruction>(addr);
if (Result) {
Result->push_back(Projection(I));
}
addr = I->getOperand(0);
continue;
}
if (SILValue param = getParam(addr)) {
// Go to the caller.
addr = param;
continue;
}
// Return the base address = the first address which is not a projection.
return addr;
}
}
SILValue ConstantTracker::getStoredValue(SILInstruction *loadInst,
ProjectionPath &projStack) {
SILInstruction *store = links[loadInst];
if (!store && callerTracker)
store = callerTracker->links[loadInst];
if (!store) return SILValue();
assert(isa<LoadInst>(loadInst) || isa<CopyAddrInst>(loadInst));
// Push the address projections of the load onto the stack.
SmallVector<Projection, 4> loadProjections;
scanProjections(loadInst->getOperand(0), &loadProjections);
for (const Projection &proj : loadProjections) {
projStack.push_back(proj);
}
// Pop the address projections of the store from the stack.
SmallVector<Projection, 4> storeProjections;
scanProjections(store->getOperand(1), &storeProjections);
for (auto iter = storeProjections.rbegin(); iter != storeProjections.rend();
++iter) {
const Projection &proj = *iter;
// The corresponding load-projection must match the store-projection.
if (projStack.empty() || projStack.back() != proj)
return SILValue();
projStack.pop_back();
}
if (isa<StoreInst>(store))
return store->getOperand(0);
// The copy_addr instruction is both a load and a store. So we follow the link
// again.
assert(isa<CopyAddrInst>(store));
return getStoredValue(store, projStack);
}
// Get the aggregate member based on the top of the projection stack.
static SILValue getMember(SILInstruction *inst, ProjectionPath &projStack) {
if (!projStack.empty()) {
const Projection &proj = projStack.back();
return proj.getOperandForAggregate(inst);
}
return SILValue();
}
SILInstruction *ConstantTracker::getDef(SILValue val,
ProjectionPath &projStack) {
// Track the value up the dominator tree.
for (;;) {
if (SILInstruction *inst = dyn_cast<SILInstruction>(val)) {
if (Projection::isObjectProjection(inst)) {
// Extract a member from a struct/tuple/enum.
projStack.push_back(Projection(inst));
val = inst->getOperand(0);
continue;
} else if (SILValue member = getMember(inst, projStack)) {
// The opposite of a projection instruction: composing a struct/tuple.
projStack.pop_back();
val = member;
continue;
} else if (SILValue loadedVal = getStoredValue(inst, projStack)) {
// A value loaded from memory.
val = loadedVal;
continue;
} else if (isa<ThinToThickFunctionInst>(inst)) {
val = inst->getOperand(0);
continue;
}
return inst;
} else if (SILValue param = getParam(val)) {
// Continue in the caller.
val = param;
continue;
}
return nullptr;
}
}
ConstantTracker::IntConst ConstantTracker::getBuiltinConst(BuiltinInst *BI, int depth) {
const BuiltinInfo &Builtin = BI->getBuiltinInfo();
OperandValueArrayRef Args = BI->getArguments();
switch (Builtin.ID) {
default: break;
// Fold comparison predicates.
#define BUILTIN(id, name, Attrs)
#define BUILTIN_BINARY_PREDICATE(id, name, attrs, overload) \
case BuiltinValueKind::id:
#include "swift/AST/Builtins.def"
{
IntConst lhs = getIntConst(Args[0], depth);
IntConst rhs = getIntConst(Args[1], depth);
if (lhs.isValid && rhs.isValid) {
return IntConst(constantFoldComparison(lhs.value, rhs.value,
Builtin.ID),
lhs.isFromCaller || rhs.isFromCaller);
}
break;
}
case BuiltinValueKind::SAddOver:
case BuiltinValueKind::UAddOver:
case BuiltinValueKind::SSubOver:
case BuiltinValueKind::USubOver:
case BuiltinValueKind::SMulOver:
case BuiltinValueKind::UMulOver: {
IntConst lhs = getIntConst(Args[0], depth);
IntConst rhs = getIntConst(Args[1], depth);
if (lhs.isValid && rhs.isValid) {
bool IgnoredOverflow;
return IntConst(constantFoldBinaryWithOverflow(lhs.value, rhs.value,
IgnoredOverflow,
getLLVMIntrinsicIDForBuiltinWithOverflow(Builtin.ID)),
lhs.isFromCaller || rhs.isFromCaller);
}
break;
}
case BuiltinValueKind::SDiv:
case BuiltinValueKind::SRem:
case BuiltinValueKind::UDiv:
case BuiltinValueKind::URem: {
IntConst lhs = getIntConst(Args[0], depth);
IntConst rhs = getIntConst(Args[1], depth);
if (lhs.isValid && rhs.isValid && rhs.value != 0) {
bool IgnoredOverflow;
return IntConst(constantFoldDiv(lhs.value, rhs.value,
IgnoredOverflow, Builtin.ID),
lhs.isFromCaller || rhs.isFromCaller);
}
break;
}
case BuiltinValueKind::And:
case BuiltinValueKind::AShr:
case BuiltinValueKind::LShr:
case BuiltinValueKind::Or:
case BuiltinValueKind::Shl:
case BuiltinValueKind::Xor: {
IntConst lhs = getIntConst(Args[0], depth);
IntConst rhs = getIntConst(Args[1], depth);
if (lhs.isValid && rhs.isValid) {
return IntConst(constantFoldBitOperation(lhs.value, rhs.value,
Builtin.ID),
lhs.isFromCaller || rhs.isFromCaller);
}
break;
}
case BuiltinValueKind::Trunc:
case BuiltinValueKind::ZExt:
case BuiltinValueKind::SExt:
case BuiltinValueKind::TruncOrBitCast:
case BuiltinValueKind::ZExtOrBitCast:
case BuiltinValueKind::SExtOrBitCast: {
IntConst val = getIntConst(Args[0], depth);
if (val.isValid) {
return IntConst(constantFoldCast(val.value, Builtin), val.isFromCaller);
}
break;
}
}
return IntConst();
}
// Tries to evaluate the integer constant of a value. The \p depth is used
// to limit the complexity.
ConstantTracker::IntConst ConstantTracker::getIntConst(SILValue val, int depth) {
// Don't spend too much time with constant evaluation.
if (depth >= 10)
return IntConst();
SILInstruction *I = getDef(val);
if (!I)
return IntConst();
if (auto *IL = dyn_cast<IntegerLiteralInst>(I)) {
return IntConst(IL->getValue(), IL->getFunction() != F);
}
if (auto *BI = dyn_cast<BuiltinInst>(I)) {
if (constCache.count(BI) != 0)
return constCache[BI];
IntConst builtinConst = getBuiltinConst(BI, depth + 1);
constCache[BI] = builtinConst;
return builtinConst;
}
return IntConst();
}
// Returns the taken block of a terminator instruction if the condition turns
// out to be constant.
SILBasicBlock *ConstantTracker::getTakenBlock(TermInst *term) {
if (CondBranchInst *CBI = dyn_cast<CondBranchInst>(term)) {
IntConst condConst = getIntConst(CBI->getCondition());
if (condConst.isFromCaller) {
return condConst.value != 0 ? CBI->getTrueBB() : CBI->getFalseBB();
}
return nullptr;
}
if (SwitchValueInst *SVI = dyn_cast<SwitchValueInst>(term)) {
IntConst switchConst = getIntConst(SVI->getOperand());
if (switchConst.isFromCaller) {
for (unsigned Idx = 0; Idx < SVI->getNumCases(); ++Idx) {
auto switchCase = SVI->getCase(Idx);
if (auto *IL = dyn_cast<IntegerLiteralInst>(switchCase.first)) {
if (switchConst.value == IL->getValue())
return switchCase.second;
} else {
return nullptr;
}
}
if (SVI->hasDefault())
return SVI->getDefaultBB();
}
return nullptr;
}
if (SwitchEnumInst *SEI = dyn_cast<SwitchEnumInst>(term)) {
if (SILInstruction *def = getDefInCaller(SEI->getOperand())) {
if (EnumInst *EI = dyn_cast<EnumInst>(def)) {
for (unsigned Idx = 0; Idx < SEI->getNumCases(); ++Idx) {
auto enumCase = SEI->getCase(Idx);
if (enumCase.first == EI->getElement())
return enumCase.second;
}
if (SEI->hasDefault())
return SEI->getDefaultBB();
}
}
return nullptr;
}
if (CheckedCastBranchInst *CCB = dyn_cast<CheckedCastBranchInst>(term)) {
if (SILInstruction *def = getDefInCaller(CCB->getOperand())) {
if (UpcastInst *UCI = dyn_cast<UpcastInst>(def)) {
SILType castType = UCI->getOperand()->getType();
if (CCB->getCastType().isExactSuperclassOf(castType)) {
return CCB->getSuccessBB();
}
if (!castType.isBindableToSuperclassOf(CCB->getCastType())) {
return CCB->getFailureBB();
}
}
}
}
return nullptr;
}
//===----------------------------------------------------------------------===//
// Shortest path analysis
//===----------------------------------------------------------------------===//
/// Computes the length of the shortest path through a block in a scope.
///
/// A scope is either a loop or the whole function. The "length" is an
/// estimation of the execution time. For example if the shortest path of a
/// block B is N, it means that the execution time of the scope (either
/// function or a single loop iteration), when execution includes the block, is
/// at least N, regardless of what branches are taken.
///
/// The shortest path of a block is the sum of the shortest path from the scope
/// entry and the shortest path to the scope exit.
class ShortestPathAnalysis {
public:
enum {
/// We assume that cold block take very long to execute.
ColdBlockLength = 1000,
/// Our assumption on how many times a loop is executed.
LoopCount = 10,
/// To keep things simple we only analyse up to this number of nested loops.
MaxNumLoopLevels = 4,
/// The "weight" for the benefit which a single loop nest gives.
SingleLoopWeight = 4,
/// Pretty large but small enough to add something without overflowing.
InitialDist = (1 << 29)
};
/// A weight for an inlining benefit.
class Weight {
/// How long does it take to execute the scope (either loop or the whole
/// function) of the call to inline? The larger it is the less important it
/// is to inline.
int ScopeLength;
/// Represents the loop nest. The larger it is the more important it is to
/// inline
int LoopWeight;
friend class ShortestPathAnalysis;
public:
Weight(int ScopeLength, int LoopWeight) : ScopeLength(ScopeLength),
LoopWeight(LoopWeight) { }
Weight(const Weight &RHS, int AdditionalLoopWeight) :
Weight(RHS.ScopeLength, RHS.LoopWeight + AdditionalLoopWeight) { }
Weight() : ScopeLength(-1), LoopWeight(0) {
assert(!isValid());
}
bool isValid() const { return ScopeLength >= 0; }
/// Updates the \p Benefit by weighting the \p Importance.
void updateBenefit(int &Benefit, int Importance) const {
assert(isValid());
int newBenefit = 0;
// Use some heuristics. The basic idea is: length is bad, loops are good.
if (ScopeLength > 320) {
newBenefit = Importance;
} else if (ScopeLength > 160) {
newBenefit = Importance + LoopWeight * 4;
} else if (ScopeLength > 80) {
newBenefit = Importance + LoopWeight * 8;
} else if (ScopeLength > 40) {
newBenefit = Importance + LoopWeight * 12;
} else if (ScopeLength > 20) {
newBenefit = Importance + LoopWeight * 16;
} else {
newBenefit = Importance + 20 + LoopWeight * 16;
}
// We don't accumulate the benefit instead we max it.
if (newBenefit > Benefit)
Benefit = newBenefit;
}
#ifndef NDEBUG
friend raw_ostream &operator<<(raw_ostream &os, const Weight &W) {
os << W.LoopWeight << '/' << W.ScopeLength;
return os;
}
#endif
};
private:
/// The distances for a block in it's scope (either loop or function).
struct Distances {
/// The shortest distance from the scope entry to the block entry.
int DistFromEntry = InitialDist;
/// The shortest distance from the block entry to the scope exit.
int DistToExit = InitialDist;
/// Additional length to account for loop iterations. It's != 0 for loop
/// headers (or loop predecessors).
int LoopHeaderLength = 0;
};
/// Holds the distance information for a block in all it's scopes, i.e. in all
/// its containing loops and the function itself.
struct BlockInfo {
private:
/// Distances for all scopes. Element 0 refers to the function, elements
/// > 0 to loops.
Distances Dists[MaxNumLoopLevels];
public:
/// The length of the block itself.
int Length = 0;
/// Returns the distances for the loop with nesting level \p LoopDepth.
Distances &getDistances(int LoopDepth) {
assert(LoopDepth >= 0 && LoopDepth < MaxNumLoopLevels);
return Dists[LoopDepth];
}
/// Returns the length including the LoopHeaderLength for a given
/// \p LoopDepth.
int getLength(int LoopDepth) {
return Length + getDistances(LoopDepth).LoopHeaderLength;
}
/// Returns the length of the shortest path of this block in the loop with
/// nesting level \p LoopDepth.
int getScopeLength(int LoopDepth) {
const Distances &D = getDistances(LoopDepth);
return D.DistFromEntry + D.DistToExit;
}
};
SILFunction *F;
SILLoopInfo *LI;
llvm::DenseMap<const SILBasicBlock *, BlockInfo *> BlockInfos;
std::vector<BlockInfo> BlockInfoStorage;
BlockInfo *getBlockInfo(const SILBasicBlock *BB) {
BlockInfo *BI = BlockInfos[BB];
assert(BI);
return BI;
}
// Utility functions to make the template solveDataFlow compilable for a block
// list containing references _and_ a list containing pointers.
const SILBasicBlock *getBlock(const SILBasicBlock *BB) { return BB; }
const SILBasicBlock *getBlock(const SILBasicBlock &BB) { return &BB; }
/// Returns the minimum distance from all predecessor blocks of \p BB.
int getEntryDistFromPreds(const SILBasicBlock *BB, int LoopDepth);
/// Returns the minimum distance from all successor blocks of \p BB.
int getExitDistFromSuccs(const SILBasicBlock *BB, int LoopDepth);
/// Computes the distances by solving the dataflow problem for all \p Blocks
/// in a scope.
template <typename BlockList>
void solveDataFlow(const BlockList &Blocks, int LoopDepth) {
bool Changed = false;
// Compute the distances from the entry block.
do {
Changed = false;
for (auto &Block : Blocks) {
const SILBasicBlock *BB = getBlock(Block);
BlockInfo *BBInfo = getBlockInfo(BB);
int DistFromEntry = getEntryDistFromPreds(BB, LoopDepth);
Distances &BBDists = BBInfo->getDistances(LoopDepth);
if (DistFromEntry < BBDists.DistFromEntry) {
BBDists.DistFromEntry = DistFromEntry;
Changed = true;
}
}
} while (Changed);
// Compute the distances to the exit block.
do {
Changed = false;
for (auto &Block : reverse(Blocks)) {
const SILBasicBlock *BB = getBlock(Block);
BlockInfo *BBInfo = getBlockInfo(BB);
int DistToExit =
getExitDistFromSuccs(BB, LoopDepth) + BBInfo->getLength(LoopDepth);
Distances &BBDists = BBInfo->getDistances(LoopDepth);
if (DistToExit < BBDists.DistToExit) {
BBDists.DistToExit = DistToExit;
Changed = true;
}
}
} while (Changed);
}
/// Analyze \p Loop and all its inner loops.
void analyzeLoopsRecursively(SILLoop *Loop, int LoopDepth);
void printFunction(llvm::raw_ostream &OS);
void printLoop(llvm::raw_ostream &OS, SILLoop *Loop, int LoopDepth);
void printBlockInfo(llvm::raw_ostream &OS, SILBasicBlock *BB, int LoopDepth);
public:
ShortestPathAnalysis(SILFunction *F, SILLoopInfo *LI) : F(F), LI(LI) { }
bool isValid() const { return !BlockInfos.empty(); }
/// Compute the distances. The function \p getApplyLength returns the length
/// of a function call.
template <typename Func>
void analyze(ColdBlockInfo &CBI, Func getApplyLength) {
assert(!isValid());
BlockInfoStorage.resize(F->size());
// First step: compute the length of the blocks.
unsigned BlockIdx = 0;
for (SILBasicBlock &BB : *F) {
int Length = 0;
if (CBI.isCold(&BB)) {
Length = ColdBlockLength;
} else {
for (SILInstruction &I : BB) {
if (auto FAS = FullApplySite::isa(&I)) {
Length += getApplyLength(FAS);
} else {
Length += (int)instructionInlineCost(I);
}
}
}
BlockInfo *BBInfo = &BlockInfoStorage[BlockIdx++];
BlockInfos[&BB] = BBInfo;
BBInfo->Length = Length;
// Initialize the distances for the entry and exit blocks, used when
// computing the distances for the function itself.
if (&BB == &F->front())
BBInfo->getDistances(0).DistFromEntry = 0;
if (isa<ReturnInst>(BB.getTerminator()))
BBInfo->getDistances(0).DistToExit = Length;
else if (isa<ThrowInst>(BB.getTerminator()))
BBInfo->getDistances(0).DistToExit = Length + ColdBlockLength;
}
// Compute the distances for all loops in the function.
for (SILLoop *Loop : *LI) {
analyzeLoopsRecursively(Loop, 1);
}
// Compute the distances for the function itself.
solveDataFlow(F->getBlocks(), 0);
}
/// Returns the length of the shortest path of the block \p BB in the loop
/// with nesting level \p LoopDepth. If \p LoopDepth is 0 ir returns the
/// shortest path in the function.
int getScopeLength(SILBasicBlock *BB, int LoopDepth) {
assert(BB->getParent() == F);
if (LoopDepth >= MaxNumLoopLevels)
LoopDepth = MaxNumLoopLevels - 1;
return getBlockInfo(BB)->getScopeLength(LoopDepth);
}
/// Returns the weight of block \p BB also considering the \p CallerWeight
/// which is the weight of the call site's block in the caller.
Weight getWeight(SILBasicBlock *BB, Weight CallerWeight);
void dump();
};
int ShortestPathAnalysis::getEntryDistFromPreds(const SILBasicBlock *BB,
int LoopDepth) {
int MinDist = InitialDist;
for (SILBasicBlock *Pred : BB->getPreds()) {
BlockInfo *PredInfo = getBlockInfo(Pred);
Distances &PDists = PredInfo->getDistances(LoopDepth);
int DistFromEntry = PDists.DistFromEntry + PredInfo->Length +
PDists.LoopHeaderLength;
assert(DistFromEntry >= 0);
if (DistFromEntry < MinDist)
MinDist = DistFromEntry;
}
return MinDist;
}
int ShortestPathAnalysis::getExitDistFromSuccs(const SILBasicBlock *BB,
int LoopDepth) {
int MinDist = InitialDist;
for (const SILSuccessor &Succ : BB->getSuccessors()) {
BlockInfo *SuccInfo = getBlockInfo(Succ);
Distances &SDists = SuccInfo->getDistances(LoopDepth);
if (SDists.DistToExit < MinDist)
MinDist = SDists.DistToExit;
}
return MinDist;
}
/// Detect an edge from the loop pre-header's predecessor to the loop exit
/// block. Such an edge "short-cuts" a loop if it is never iterated. But usually
/// it is the less frequent case and we want to ignore it.
/// E.g. it handles the case of N==0 for
/// for i in 0..<N { ... }
/// If the \p Loop has such an edge the source block of this edge is returned,
/// which is the predecessor of the loop pre-header.
static SILBasicBlock *detectLoopBypassPreheader(SILLoop *Loop) {
SILBasicBlock *Pred = Loop->getLoopPreheader();
if (!Pred)
return nullptr;
SILBasicBlock *PredPred = Pred->getSinglePredecessor();
if (!PredPred)
return nullptr;
auto *CBR = dyn_cast<CondBranchInst>(PredPred->getTerminator());
if (!CBR)
return nullptr;
SILBasicBlock *Succ = (CBR->getTrueBB() == Pred ? CBR->getFalseBB() :
CBR->getTrueBB());
for (SILBasicBlock *PredOfSucc : Succ->getPreds()) {
SILBasicBlock *Exiting = PredOfSucc->getSinglePredecessor();
if (!Exiting)
Exiting = PredOfSucc;
if (Loop->contains(Exiting))
return PredPred;
}
return nullptr;
}
void ShortestPathAnalysis::analyzeLoopsRecursively(SILLoop *Loop, int LoopDepth) {
if (LoopDepth >= MaxNumLoopLevels)
return;
// First dive into the inner loops.
for (SILLoop *SubLoop : Loop->getSubLoops()) {
analyzeLoopsRecursively(SubLoop, LoopDepth + 1);
}
BlockInfo *HeaderInfo = getBlockInfo(Loop->getHeader());
Distances &HeaderDists = HeaderInfo->getDistances(LoopDepth);
// Initial values for the entry (== header) and exit-predecessor (== header as
// well).
HeaderDists.DistFromEntry = 0;
HeaderDists.DistToExit = 0;
solveDataFlow(Loop->getBlocks(), LoopDepth);
int LoopLength = getExitDistFromSuccs(Loop->getHeader(), LoopDepth) +
HeaderInfo->getLength(LoopDepth);
HeaderDists.DistToExit = LoopLength;
// If there is a loop bypass edge, add the loop length to the loop pre-pre-
// header instead to the header. This actually let us ignore the loop bypass
// edge in the length calculation for the loop's parent scope.
if (SILBasicBlock *Bypass = detectLoopBypassPreheader(Loop))
HeaderInfo = getBlockInfo(Bypass);
// Add the full loop length (= assumed-iteration-count * length) to the loop
// header so that it is considered in the parent scope.
HeaderInfo->getDistances(LoopDepth - 1).LoopHeaderLength =
LoopCount * LoopLength;
}
ShortestPathAnalysis::Weight ShortestPathAnalysis::
getWeight(SILBasicBlock *BB, Weight CallerWeight) {
assert(BB->getParent() == F);
SILLoop *Loop = LI->getLoopFor(BB);
if (!Loop) {
// We are not in a loop. So just account the length of our function scope
// in to the length of the CallerWeight.
return Weight(CallerWeight.ScopeLength + getScopeLength(BB, 0),
CallerWeight.LoopWeight);
}
int LoopDepth = Loop->getLoopDepth();
// Deal with the corner case of having more than 4 nested loops.
while (LoopDepth >= MaxNumLoopLevels) {
--LoopDepth;
Loop = Loop->getParentLoop();
}
Weight W(getScopeLength(BB, LoopDepth), SingleLoopWeight);
// Add weights for all the loops BB is in.
while (Loop) {
assert(LoopDepth > 0);
BlockInfo *HeaderInfo = getBlockInfo(Loop->getHeader());
int InnerLoopLength = HeaderInfo->getScopeLength(LoopDepth) *
ShortestPathAnalysis::LoopCount;
int OuterLoopWeight = SingleLoopWeight;
int OuterScopeLength = HeaderInfo->getScopeLength(LoopDepth - 1);
// Reaching the outermost loop, we use the CallerWeight to get the outer
// length+loopweight.
if (LoopDepth == 1) {
// If the apply in the caller is not in a significant loop, just stop with
// what we have now.
if (CallerWeight.LoopWeight < 4)
return W;
// If this function is part of the caller's scope length take the caller's
// scope length. Note: this is not the case e.g. if the apply is in a
// then-branch of an if-then-else in the caller and the else-branch is
// the short path.
if (CallerWeight.ScopeLength > OuterScopeLength)
OuterScopeLength = CallerWeight.ScopeLength;
OuterLoopWeight = CallerWeight.LoopWeight;
}
assert(OuterScopeLength >= InnerLoopLength);
// If the current loop is only a small part of its outer loop, we don't
// take the outer loop that much into account. Only if the current loop is
// actually the "main part" in the outer loop we add the full loop weight
// for the outer loop.
if (OuterScopeLength < InnerLoopLength * 2) {
W.LoopWeight += OuterLoopWeight - 1;
} else if (OuterScopeLength < InnerLoopLength * 3) {
W.LoopWeight += OuterLoopWeight - 2;
} else if (OuterScopeLength < InnerLoopLength * 4) {
W.LoopWeight += OuterLoopWeight - 3;
} else {
return W;
}
--LoopDepth;
Loop = Loop->getParentLoop();
}
assert(LoopDepth == 0);
return W;
}
void ShortestPathAnalysis::dump() {
printFunction(llvm::errs());
}
void ShortestPathAnalysis::printFunction(llvm::raw_ostream &OS) {
OS << "SPA @" << F->getName() << "\n";
for (SILBasicBlock &BB : *F) {
printBlockInfo(OS, &BB, 0);
}
for (SILLoop *Loop : *LI) {
printLoop(OS, Loop, 1);
}
}
void ShortestPathAnalysis::printLoop(llvm::raw_ostream &OS, SILLoop *Loop,
int LoopDepth) {
if (LoopDepth >= MaxNumLoopLevels)
return;
assert(LoopDepth == (int)Loop->getLoopDepth());
OS << "Loop bb" << Loop->getHeader()->getDebugID() << ":\n";
for (SILBasicBlock *BB : Loop->getBlocks()) {
printBlockInfo(OS, BB, LoopDepth);
}
for (SILLoop *SubLoop : Loop->getSubLoops()) {
printLoop(OS, SubLoop, LoopDepth + 1);
}
}
void ShortestPathAnalysis::printBlockInfo(llvm::raw_ostream &OS,
SILBasicBlock *BB, int LoopDepth) {
BlockInfo *BBInfo = getBlockInfo(BB);
Distances &D = BBInfo->getDistances(LoopDepth);
OS << " bb" << BB->getDebugID() << ": length=" << BBInfo->Length << '+'
<< D.LoopHeaderLength << ", d-entry=" << D.DistFromEntry
<< ", d-exit=" << D.DistToExit << '\n';
}
//===----------------------------------------------------------------------===//
// Performance Inliner
//===----------------------------------------------------------------------===//
namespace {
// Controls the decision to inline functions with @_semantics, @effect and
// global_init attributes.
enum class InlineSelection {
Everything,
NoGlobalInit, // and no availability semantics calls
NoSemanticsAndGlobalInit
};
using Weight = ShortestPathAnalysis::Weight;
class SILPerformanceInliner {
/// Specifies which functions not to inline, based on @_semantics and
/// global_init attributes.
InlineSelection WhatToInline;
DominanceAnalysis *DA;
SILLoopAnalysis *LA;
// For keys of SILFunction and SILLoop.
llvm::DenseMap<SILFunction *, ShortestPathAnalysis *> SPAs;
llvm::SpecificBumpPtrAllocator<ShortestPathAnalysis> SPAAllocator;
ColdBlockInfo CBI;
/// The following constants define the cost model for inlining. Some constants
/// are also defined in ShortestPathAnalysis.
enum {
/// The base value for every call: it represents the benefit of removing the
/// call overhead itself.
RemovedCallBenefit = 20,
/// The benefit if the operand of an apply gets constant, e.g. if a closure
/// is passed to an apply instruction in the callee.
RemovedClosureBenefit = RemovedCallBenefit + 50,
/// The benefit if a load can (probably) eliminated because it loads from
/// a stack location in the caller.
RemovedLoadBenefit = RemovedCallBenefit + 5,