-
Notifications
You must be signed in to change notification settings - Fork 10.5k
/
Copy pathRedundantLoadElimination.cpp
1704 lines (1445 loc) · 60.5 KB
/
RedundantLoadElimination.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//===--- RedundantLoadElimination.cpp - SIL Load Forwarding ---------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
///
/// \file
///
/// This pass eliminates redundant loads.
///
/// A load can be eliminated if its value has already been held somewhere,
/// i.e. generated by a previous load, LSLocation stored by a known value.
///
/// In this case, one can replace the load instruction with the previous
/// results.
///
/// Redundant Load Elimination (RLE) eliminates such loads by:
///
/// 1. Introducing a notion of a LSLocation that is used to model object
/// fields. (See below for more details).
///
/// 2. Introducing a notion of a LSValue that is used to model the value
/// that currently resides in the associated LSLocation on the particular
/// program path. (See below for more details).
///
/// 3. Performing a RPO walk over the control flow graph, tracking any
/// LSLocations that are read from or stored into in each basic block. The
/// read or stored value, kept in a map between LSLocation and LSValue,
/// becomes the available value for the LSLocation.
///
/// 4. An optimistic iterative intersection-based dataflow is performed on the
/// gensets until convergence.
///
/// At the core of RLE, there is the LSLocation class. A LSLocation is an
/// abstraction of an object field in program. It consists of a base and a
/// projection path to the field accessed.
///
/// In SIL, one can access an aggregate as a whole, i.e. store to a struct with
/// 2 Int fields. A store like this will generate 2 *indivisible* LSLocations,
/// 1 for each field and in addition to keeping a list of LSLocation, RLE also
/// keeps their available LSValues. We call it *indivisible* because it
/// cannot be broken down to more LSLocations.
///
/// LSValue consists of a base - a SILValue from the load or store inst,
/// as well as a projection path to which the field it represents. So, a
/// store to an 2-field struct as mentioned above will generate 2 LSLocations
/// and 2 LSValues.
///
/// Every basic block keeps a map between LSLocation and LSValue. By
/// keeping the LSLocation and LSValue in their indivisible form, one
/// can easily find which part of the load is redundant and how to compute its
/// forwarding value.
///
/// Given the case which the 2 fields of the struct both have available values,
/// RLE can find their LSValues (maybe by struct_extract from a larger
/// value) and then aggregate them.
///
/// However, this may introduce a lot of extraction and aggregation which may
/// not be necessary. i.e. a store the struct followed by a load from the
/// struct. To solve this problem, when RLE detects that a load instruction
/// can be replaced by forwarded value, it will try to find minimum # of
/// extractions necessary to form the forwarded value. It will group the
/// available value's by the LSValue base, i.e. the LSValues come from the
/// same instruction, and then use extraction to obtain the needed components
/// of the base.
///
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "sil-redundant-load-elim"
#include "swift/SIL/Projection.h"
#include "swift/SIL/SILArgument.h"
#include "swift/SIL/SILBuilder.h"
#include "swift/SIL/BasicBlockDatastructures.h"
#include "swift/SILOptimizer/Analysis/ARCAnalysis.h"
#include "swift/SILOptimizer/Analysis/AliasAnalysis.h"
#include "swift/SILOptimizer/Analysis/DeadEndBlocksAnalysis.h"
#include "swift/SILOptimizer/Analysis/DominanceAnalysis.h"
#include "swift/SILOptimizer/Analysis/PostOrderAnalysis.h"
#include "swift/SILOptimizer/Analysis/ValueTracking.h"
#include "swift/SILOptimizer/PassManager/Passes.h"
#include "swift/SILOptimizer/PassManager/Transforms.h"
#include "swift/SILOptimizer/Utils/CFGOptUtils.h"
#include "swift/SILOptimizer/Utils/InstOptUtils.h"
#include "swift/SILOptimizer/Utils/LoadStoreOptUtils.h"
#include "swift/SILOptimizer/Utils/SILSSAUpdater.h"
#include "swift/SIL/BasicBlockData.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
using namespace swift;
STATISTIC(NumForwardedLoads, "Number of loads forwarded");
static AllocStackInst *findAllocStackInst(DeallocStackInst *I) {
// It's allowed to be undef in unreachable code.
return dyn_cast<AllocStackInst>(I->getOperand());
}
/// ComputeAvailSetMax - If we ignore all unknown writes, what is the max
/// available set that can reach the a certain point in a basic block. This
/// helps generating the genset and killset. i.e. if there is no downward visible
/// value that can reach the end of a basic block, then we know that the genset
/// and killset for the location need not be set.
///
/// ComputeAvailGenKillSet - Build the genset and killset of the basic block.
///
/// ComputeAvailValue - Compute the available value at the end of the basic
/// block.
///
/// PerformRLE - Perform the actual redundant load elimination.
enum class RLEKind : unsigned {
ComputeAvailSetMax = 0,
ComputeAvailGenKillSet = 1,
ComputeAvailValue = 2,
PerformRLE = 3,
};
//===----------------------------------------------------------------------===//
// Utility Functions
//===----------------------------------------------------------------------===//
static bool inline isComputeAvailSetMax(RLEKind Kind) {
return Kind == RLEKind::ComputeAvailSetMax;
}
static bool inline isComputeAvailGenKillSet(RLEKind Kind) {
return Kind == RLEKind::ComputeAvailGenKillSet;
}
static bool inline isComputeAvailValue(RLEKind Kind) {
return Kind == RLEKind::ComputeAvailValue;
}
static bool inline isPerformingRLE(RLEKind Kind) {
return Kind == RLEKind::PerformRLE;
}
/// Returns true if this is an instruction that may have side effects in a
/// general sense but are inert from a load store perspective.
static bool isRLEInertInstruction(SILInstruction *Inst) {
switch (Inst->getKind()) {
case SILInstructionKind::DeallocStackInst:
case SILInstructionKind::CondFailInst:
case SILInstructionKind::IsEscapingClosureInst:
case SILInstructionKind::IsUniqueInst:
case SILInstructionKind::EndCOWMutationInst:
case SILInstructionKind::FixLifetimeInst:
case SILInstructionKind::EndAccessInst:
case SILInstructionKind::SetDeallocatingInst:
case SILInstructionKind::DeallocRefInst:
case SILInstructionKind::BeginBorrowInst:
case SILInstructionKind::EndBorrowInst:
return true;
default:
return false;
}
}
//===----------------------------------------------------------------------===//
// Basic Block Location State
//===----------------------------------------------------------------------===//
namespace {
/// If this function has too many basic blocks or too many locations, it may
/// take a long time to compute the genset and killset. The number of memory
/// behavior or alias query we need to do in worst case is roughly linear to
/// # of BBs x(times) # of locations.
///
/// we could run RLE on functions with 128 basic blocks and 128 locations,
/// which is a large function.
constexpr unsigned MaxLSLocationBBMultiplicationNone = 128*128;
/// we could run optimistic RLE on functions with less than 64 basic blocks
/// and 64 locations which is a sizable function.
constexpr unsigned MaxLSLocationBBMultiplicationPessimistic = 64*64;
/// forward declaration.
class RLEContext;
/// State of the load store in one basic block which allows for forwarding from
/// loads, stores -> loads
class BlockState {
public:
enum class ValueState : unsigned {
CoverValues = 0,
ConcreteValues = 1,
CoverAndConcreteValues = 2,
};
private:
/// # of locations in the LocationVault.
unsigned LocationNum;
/// The basic block that we are optimizing.
SILBasicBlock *BB;
/// A bit vector for which the ith bit represents the ith LSLocation in
/// LocationVault. If the bit is set, then the location currently has an
/// downward visible value at the beginning of the basic block.
SmallBitVector ForwardSetIn;
/// A bit vector for which the ith bit represents the ith LSLocation in
/// LocationVault. If the bit is set, then the location currently has an
/// downward visible value at the end of the basic block.
SmallBitVector ForwardSetOut;
/// A bit vector for which the ith bit represents the ith LSLocation in
/// LocationVault. If we ignore all unknown write, what's the maximum set
/// of available locations at the current position in the basic block.
SmallBitVector ForwardSetMax;
/// A bit vector for which the ith bit represents the ith LSLocation in
/// LocationVault. If the bit is set, then the basic block generates a
/// value for the location.
SmallBitVector BBGenSet;
/// A bit vector for which the ith bit represents the ith LSLocation in
/// LocationVault. If the bit is set, then the basic block kills the
/// value for the location.
SmallBitVector BBKillSet;
/// This is map between LSLocations and their available values at the
/// beginning of this basic block.
ValueTableMap ForwardValIn;
/// This is map between LSLocations and their available values at the end of
/// this basic block.
ValueTableMap ForwardValOut;
/// Keeps a list of replaceable instructions in the current basic block as
/// well as their SILValue replacement.
llvm::DenseMap<SingleValueInstruction *, SILValue> RedundantLoads;
/// LSLocation read or written has been extracted, expanded and mapped to the
/// bit position in the bitvector. Update it in the ForwardSetIn of the
/// current basic block.
void updateForwardSetForRead(RLEContext &Ctx, unsigned B);
void updateForwardSetForWrite(RLEContext &Ctx, unsigned B);
/// LSLocation read or written has been extracted, expanded and mapped to the
/// B position in the Bvector. Update it in the genset and killset of the
/// current basic block.
void updateGenKillSetForRead(RLEContext &Ctx, unsigned B);
void updateGenKillSetForWrite(RLEContext &Ctx, unsigned B);
/// LSLocation read or written has been extracted, expanded and mapped to the
/// B position in the Bvector. Update it in the MaxAvailForwardSet of the
/// current basic block.
void updateMaxAvailSetForRead(RLEContext &Ctx, unsigned B);
void updateMaxAvailSetForWrite(RLEContext &Ctx, unsigned B);
/// LSLocation written has been extracted, expanded and mapped to the bit
/// position in the bitvector. process it using the bit position.
void updateForwardSetAndValForRead(RLEContext &Ctx, unsigned L, unsigned V);
void updateForwardSetAndValForWrite(RLEContext &Ctx, unsigned L, unsigned V);
/// There is a read to a LSLocation, expand the LSLocation into individual
/// fields before processing them.
void processRead(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
SILValue Val, RLEKind Kind);
/// There is a write to a LSLocation, expand the LSLocation into individual
/// fields before processing them.
void processWrite(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
SILValue Val, RLEKind Kind);
/// BitVector manipulation functions.
void startTrackingLocation(SmallBitVector &BV, unsigned B);
void stopTrackingLocation(SmallBitVector &BV, unsigned B);
bool isTrackingLocation(SmallBitVector &BV, unsigned B);
void startTrackingValue(ValueTableMap &VM, unsigned L, unsigned V);
void stopTrackingValue(ValueTableMap &VM, unsigned B);
public:
BlockState() = default;
void init(SILBasicBlock *NewBB, unsigned bitcnt, bool optimistic) {
BB = NewBB;
LocationNum = bitcnt;
// For reachable basic blocks, the initial state of ForwardSetOut should be
// all 1's. Otherwise the dataflow solution could be too conservative.
//
// Consider this case, the forwardable value by var a = 10 before the loop
// will not be forwarded if the ForwardSetOut is set to 0 initially.
//
// var a = 10
// for _ in 0...1024 {}
// use(a);
//
// However, by doing so, we can only do the data forwarding after the
// data flow stabilizes.
//
// We set the initial state of unreachable block to 0, as we do not have
// a value for the location.
//
// This is a bit conservative as we could be missing forwarding
// opportunities. i.e. a joint block with 1 predecessor being an
// unreachable block.
//
// we rely on other passes to clean up unreachable block.
ForwardSetIn.resize(LocationNum, false);
ForwardSetOut.resize(LocationNum, optimistic);
// If we are running an optimistic data flow, set forward max to true
// initially.
ForwardSetMax.resize(LocationNum, optimistic);
BBGenSet.resize(LocationNum, false);
BBKillSet.resize(LocationNum, false);
}
/// Initialize the AvailSetMax by intersecting this basic block's
/// predecessors' AvailSetMax.
void mergePredecessorsAvailSetMax(RLEContext &Ctx);
/// Initialize the AvailSet by intersecting this basic block' predecessors'
/// AvailSet.
void mergePredecessorAvailSet(RLEContext &Ctx);
/// Initialize the AvailSet and AvailVal of the current basic block.
void mergePredecessorAvailSetAndValue(RLEContext &Ctx);
/// Reached the end of the basic block, update the ForwardValOut with the
/// ForwardValIn.
void updateForwardValOut() { ForwardValOut = ForwardValIn; }
/// Check whether the ForwardSetOut has changed. If it does, we need to
/// rerun the data flow to reach fixed point.
bool updateForwardSetOut() {
if (ForwardSetIn == ForwardSetOut)
return false;
ForwardSetOut = ForwardSetIn;
return true;
}
/// Returns the current basic block we are processing.
SILBasicBlock *getBB() const { return BB; }
/// Returns the ForwardValIn for the current basic block.
ValueTableMap &getForwardValIn() { return ForwardValIn; }
/// Returns the ForwardValOut for the current basic block.
ValueTableMap &getForwardValOut() { return ForwardValOut; }
/// Returns the redundant loads and their replacement in the currently basic
/// block.
llvm::DenseMap<SingleValueInstruction *, SILValue> &getRL() {
return RedundantLoads;
}
/// Look into the value for the given LSLocation at end of the basic block,
/// return one of the three ValueState type.
ValueState getValueStateAtEndOfBlock(RLEContext &Ctx, LSLocation &L);
/// Wrappers to query the value state of the location in this BlockState.
bool isCoverValues(RLEContext &Ctx, LSLocation &L) {
return getValueStateAtEndOfBlock(Ctx, L) == ValueState::CoverValues;
}
bool isConcreteValues(RLEContext &Ctx, LSLocation &L) {
return getValueStateAtEndOfBlock(Ctx, L) == ValueState::ConcreteValues;
}
/// Iterate over the instructions in the basic block in forward order and
/// process them w.r.t. the given \p Kind.
void processInstructionWithKind(RLEContext &Ctx, SILInstruction *I,
RLEKind Kind);
void processBasicBlockWithKind(RLEContext &Ctx, RLEKind Kind);
/// Process the current basic block with the genset and killset. Return true
/// if the ForwardSetOut changes.
bool processBasicBlockWithGenKillSet();
/// Set up the value for redundant load elimination.
bool setupRLE(RLEContext &Ctx, SILInstruction *I, SILValue Mem);
/// Process Instruction which writes to memory in an unknown way.
void processUnknownWriteInst(RLEContext &Ctx, SILInstruction *I,
RLEKind Kind);
void processUnknownWriteInstForGenKillSet(RLEContext &Ctx, SILInstruction *I);
void processUnknownWriteInstForRLE(RLEContext &Ctx, SILInstruction *I);
void processDeallocStackInst(RLEContext &Ctx, DeallocStackInst *I,
RLEKind Kind);
void processDeallocStackInstForGenKillSet(RLEContext &Ctx, DeallocStackInst *I);
void processDeallocStackInstForRLE(RLEContext &Ctx, DeallocStackInst *I);
/// Process LoadInst. Extract LSLocations from LoadInst.
void processLoadInst(RLEContext &Ctx, LoadInst *LI, RLEKind Kind);
/// Process LoadInst. Extract LSLocations from StoreInst.
void processStoreInst(RLEContext &Ctx, StoreInst *SI, RLEKind Kind);
/// Returns a *single* forwardable SILValue for the given LSLocation right
/// before the InsertPt instruction.
SILValue reduceValuesAtEndOfBlock(RLEContext &Ctx, LSLocation &L);
#ifndef NDEBUG
void dump(RLEContext &Ctx);
#endif
};
} // end anonymous namespace
//===----------------------------------------------------------------------===//
// RLEContext Interface
//===----------------------------------------------------------------------===//
namespace {
/// This class stores global state that we use when computing redundant load and
/// their replacement in each basic block.
class RLEContext {
enum class ProcessKind {
ProcessMultipleIterations = 0,
ProcessOneIteration = 1,
ProcessNone = 2,
};
private:
/// Function currently processing.
SILFunction *Fn;
/// The passmanager we are using.
SILPassManager *PM;
/// The alias analysis that we will use during all computations.
AliasAnalysis *AA;
/// The type expansion analysis we will use during all computations.
TypeExpansionAnalysis *TE;
/// The SSA updater we use to materialize covering values.
SILSSAUpdater Updater;
/// The range that we use to iterate over the post order and reverse post
/// order of the given function.
PostOrderFunctionInfo *PO;
/// Epilogue release analysis.
EpilogueARCFunctionInfo *EAFI;
/// Keeps all the locations for the current function. The BitVector in each
/// BlockState is then laid on top of it to keep track of which LSLocation
/// has a downward available value.
std::vector<LSLocation> LocationVault;
/// Contains a map between LSLocation to their index in the LocationVault.
/// Use for fast lookup.
LSLocationIndexMap LocToBitIndex;
/// Keeps a map between the accessed SILValue and the location.
LSLocationBaseMap BaseToLocIndex;
/// Keeps all the loadstorevalues for the current function. The BitVector in
/// each g is then laid on top of it to keep track of which LSLocation
/// has a downward available value.
std::vector<LSValue> LSValueVault;
/// Contains a map between LSLocation to their index in the LocationVault.
/// Use for fast lookup.
llvm::DenseMap<LSValue, unsigned> ValToBitIndex;
/// A map from each BasicBlock to its BlockState.
BasicBlockData<BlockState> BBToLocState;
/// Keeps a list of basic blocks that have LoadInsts. If a basic block does
/// not have LoadInst, we do not actually perform the last iteration where
/// RLE is actually performed on the basic block.
///
/// NOTE: This is never populated for functions which will only require 1
/// data flow iteration. For function that requires more than 1 iteration of
/// the data flow this is populated when the first time the functions is
/// walked, i.e. when the we generate the genset and killset.
BasicBlockSet BBWithLoads;
/// If set, RLE ignores loads from that array type.
NominalTypeDecl *ArrayType;
#ifndef NDEBUG
SILPrintContext printCtx;
#endif
public:
RLEContext(SILFunction *F, SILPassManager *PM, AliasAnalysis *AA,
TypeExpansionAnalysis *TE, PostOrderFunctionInfo *PO,
EpilogueARCFunctionInfo *EAFI, bool disableArrayLoads);
RLEContext(const RLEContext &) = delete;
RLEContext(RLEContext &&) = delete;
RLEContext &operator=(const RLEContext &) = delete;
RLEContext &operator=(RLEContext &&) = delete;
~RLEContext() = default;
/// Entry point to redundant load elimination.
bool run();
SILFunction *getFunction() const { return Fn; }
/// Use a set of ad hoc rules to tell whether we should run a pessimistic
/// one iteration data flow on the function.
ProcessKind getProcessFunctionKind(unsigned LoadCount, unsigned StoreCount);
/// Run the iterative data flow until convergence.
void runIterativeRLE();
/// Process the basic blocks for the gen and kill set.
void processBasicBlocksForGenKillSet();
/// Process the basic blocks with the gen and kill set.
void processBasicBlocksWithGenKillSet();
/// Process the basic block for values generated in the current basic
/// block.
void processBasicBlocksForAvailValue();
/// Process basic blocks to perform the redundant load elimination.
void processBasicBlocksForRLE(bool Optimistic);
/// Returns the alias analysis we will use during all computations.
AliasAnalysis *getAA() const { return AA; }
/// Returns the current type expansion analysis we are .
TypeExpansionAnalysis *getTE() const { return TE; }
/// Returns current epilogue release function info we are using.
EpilogueARCFunctionInfo *getEAFI() const { return EAFI; }
/// Returns the SILValue base to bit index.
LSLocationBaseMap &getBM() { return BaseToLocIndex; }
/// Return the BlockState for the basic block this basic block belongs to.
BlockState &getBlockState(SILBasicBlock *B) { return BBToLocState[B]; }
/// Get the bit representing the LSLocation in the LocationVault.
unsigned getLocationBit(const LSLocation &L);
/// Given the bit, get the LSLocation from the LocationVault.
LSLocation &getLocation(const unsigned index);
/// Get the bit representing the LSValue in the LSValueVault.
unsigned getValueBit(const LSValue &L);
/// Given the bit, get the LSValue from the LSValueVault.
LSValue &getValue(const unsigned index);
/// Given a LSLocation, try to collect all the LSValues for this LSLocation
/// in the given basic block. If part of the locations have covering values,
/// find the values in its predecessors.
bool collectLocationValues(SILBasicBlock *BB, LSLocation &L,
LSLocationValueMap &Values, ValueTableMap &VM);
/// Transitively collect all the values that make up this location and
/// create a SILArgument out of them.
SILValue computePredecessorLocationValue(SILBasicBlock *BB, LSLocation &L);
/// Returns the LoadInst if \p Inst is a load inst we want to handle.
LoadInst *isLoadInstToHandle(SILInstruction *Inst) {
if (auto *LI = dyn_cast<LoadInst>(Inst)) {
if (!ArrayType ||
LI->getType().getNominalOrBoundGenericNominal() != ArrayType) {
return LI;
}
}
return nullptr;
}
};
} // end anonymous namespace
void BlockState::startTrackingValue(ValueTableMap &VM, unsigned L, unsigned V) {
VM[L] = V;
}
void BlockState::stopTrackingValue(ValueTableMap &VM, unsigned B) {
VM.erase(B);
}
bool BlockState::isTrackingLocation(SmallBitVector &BV, unsigned B) {
return BV.test(B);
}
void BlockState::startTrackingLocation(SmallBitVector &BV, unsigned B) {
BV.set(B);
}
void BlockState::stopTrackingLocation(SmallBitVector &BV, unsigned B) {
BV.reset(B);
}
void BlockState::mergePredecessorsAvailSetMax(RLEContext &Ctx) {
if (BB->pred_empty()) {
ForwardSetMax.reset();
return;
}
auto Iter = BB->pred_begin();
ForwardSetMax = Ctx.getBlockState(*Iter).ForwardSetMax;
Iter = std::next(Iter);
for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) {
ForwardSetMax &= Ctx.getBlockState(*Iter).ForwardSetMax;
}
}
void BlockState::mergePredecessorAvailSet(RLEContext &Ctx) {
// Clear the state if the basic block has no predecessor.
if (BB->getPredecessorBlocks().begin() == BB->getPredecessorBlocks().end()) {
ForwardSetIn.reset();
return;
}
auto Iter = BB->pred_begin();
ForwardSetIn = Ctx.getBlockState(*Iter).ForwardSetOut;
Iter = std::next(Iter);
for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) {
ForwardSetIn &= Ctx.getBlockState(*Iter).ForwardSetOut;
}
}
void BlockState::mergePredecessorAvailSetAndValue(RLEContext &Ctx) {
// Clear the state if the basic block has no predecessor.
if (BB->getPredecessorBlocks().begin() == BB->getPredecessorBlocks().end()) {
ForwardSetIn.reset();
ForwardValIn.clear();
return;
}
auto Iter = BB->pred_begin();
ForwardSetIn = Ctx.getBlockState(*Iter).ForwardSetOut;
ForwardValIn = Ctx.getBlockState(*Iter).ForwardValOut;
Iter = std::next(Iter);
for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) {
BlockState &OtherState = Ctx.getBlockState(*Iter);
ForwardSetIn &= OtherState.ForwardSetOut;
// Merge in the predecessor state.
for (unsigned i = 0; i < LocationNum; ++i) {
if (OtherState.ForwardSetOut[i]) {
// There are multiple values from multiple predecessors, set this as
// a covering value. We do not need to track the value itself, as we
// can always go to the predecessors BlockState to find it.
ForwardValIn[i] = Ctx.getValueBit(LSValue(true));
continue;
}
// If this location does have an available value, then clear it.
stopTrackingValue(ForwardValIn, i);
stopTrackingLocation(ForwardSetIn, i);
}
}
}
void BlockState::processBasicBlockWithKind(RLEContext &Ctx, RLEKind Kind) {
// Iterate over instructions in forward order.
for (auto &II : *BB) {
processInstructionWithKind(Ctx, &II, Kind);
}
}
bool BlockState::processBasicBlockWithGenKillSet() {
ForwardSetIn.reset(BBKillSet);
ForwardSetIn |= BBGenSet;
return updateForwardSetOut();
}
SILValue BlockState::reduceValuesAtEndOfBlock(RLEContext &Ctx, LSLocation &L) {
// First, collect current available locations and their corresponding values
// into a map.
LSLocationValueMap Values;
LSLocationList Locs;
LSLocation::expand(L, &BB->getModule(),
TypeExpansionContext(*BB->getParent()), Locs, Ctx.getTE());
// Find the values that this basic block defines and the locations which
// we do not have a concrete value in the current basic block.
ValueTableMap &OTM = getForwardValOut();
for (unsigned i = 0; i < Locs.size(); ++i) {
auto Val = Ctx.getValue(OTM[Ctx.getLocationBit(Locs[i])]);
auto AvailVal = makeCopiedValueAvailable(Val.getBase(), BB);
Values[Locs[i]] = LSValue(AvailVal, Val.getPath().getValue());
}
// Second, reduce the available values into a single SILValue we can use to
// forward.
SILValue TheForwardingValue =
LSValue::reduce(L, &BB->getModule(), Values, BB->getTerminator());
/// Return the forwarding value.
return TheForwardingValue;
}
bool BlockState::setupRLE(RLEContext &Ctx, SILInstruction *I, SILValue Mem) {
// Try to construct a SILValue for the current LSLocation.
//
// Collect the locations and their corresponding values into a map.
LSLocation L;
LSLocationBaseMap &BaseToLocIndex = Ctx.getBM();
if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) {
L = BaseToLocIndex[Mem];
} else {
SILValue UO = getUnderlyingObject(Mem);
L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem));
}
LSLocationValueMap Values;
// Use the ForwardValIn as we are currently processing the basic block.
if (!Ctx.collectLocationValues(I->getParent(), L, Values, getForwardValIn()))
return false;
// Reduce the available values into a single SILValue we can use to forward.
SILModule *Mod = &I->getModule();
SILValue TheForwardingValue =
LSValue::reduce(L, Mod, Values, I);
if (!TheForwardingValue)
return false;
// Now we have the forwarding value, record it for forwarding!.
//
// NOTE: we do not perform the RLE right here because doing so could introduce
// new LSLocations.
//
// e.g.
// %0 = load %x
// %1 = load %x
// %2 = extract_struct %1, #a
// %3 = load %2
//
// If we perform the RLE and replace %1 with %0, we end up having a memory
// location we do not have before, i.e. Base == %0, and Path == #a.
//
// We may be able to add the LSLocation to the vault, but it gets
// complicated very quickly, e.g. we need to resize the bit vectors size,
// etc.
//
// However, since we already know the instruction to replace and the value to
// replace it with, we can record it for now and forwarded it after all the
// forwardable values are recorded in the function.
//
RedundantLoads[cast<SingleValueInstruction>(I)] = TheForwardingValue;
LLVM_DEBUG(llvm::dbgs() << "FORWARD " << TheForwardingValue << " to" << *I);
return true;
}
void BlockState::updateForwardSetForRead(RLEContext &Ctx, unsigned B) {
startTrackingLocation(ForwardSetIn, B);
}
void BlockState::updateGenKillSetForRead(RLEContext &Ctx, unsigned B) {
startTrackingLocation(BBGenSet, B);
stopTrackingLocation(BBKillSet, B);
}
void BlockState::updateForwardSetAndValForRead(RLEContext &Ctx, unsigned L,
unsigned V) {
// Track the new location and value.
startTrackingValue(ForwardValIn, L, V);
startTrackingLocation(ForwardSetIn, L);
}
void BlockState::updateGenKillSetForWrite(RLEContext &Ctx, unsigned B) {
// This is a store, invalidate any location that this location may alias, as
// their values can no longer be forwarded.
LSLocation &R = Ctx.getLocation(B);
for (unsigned i = 0; i < LocationNum; ++i) {
if (!isTrackingLocation(ForwardSetMax, i))
continue;
LSLocation &L = Ctx.getLocation(i);
if (!L.isMayAliasLSLocation(R, Ctx.getAA()))
continue;
// MayAlias, invalidate the location.
stopTrackingLocation(BBGenSet, i);
startTrackingLocation(BBKillSet, i);
}
// Start tracking this location.
startTrackingLocation(BBGenSet, B);
stopTrackingLocation(BBKillSet, B);
}
void BlockState::updateMaxAvailSetForWrite(RLEContext &Ctx, unsigned B) {
startTrackingLocation(ForwardSetMax, B);
}
void BlockState::updateMaxAvailSetForRead(RLEContext &Ctx, unsigned B) {
startTrackingLocation(ForwardSetMax, B);
}
void BlockState::updateForwardSetForWrite(RLEContext &Ctx, unsigned B) {
// This is a store, invalidate any location that this location may alias, as
// their values can no longer be forwarded.
LSLocation &R = Ctx.getLocation(B);
for (unsigned i = 0; i < LocationNum; ++i) {
if (!isTrackingLocation(ForwardSetIn, i))
continue;
LSLocation &L = Ctx.getLocation(i);
if (!L.isMayAliasLSLocation(R, Ctx.getAA()))
continue;
// MayAlias, invalidate the location.
stopTrackingLocation(ForwardSetIn, i);
}
// Start tracking this location.
startTrackingLocation(ForwardSetIn, B);
}
void BlockState::updateForwardSetAndValForWrite(RLEContext &Ctx, unsigned L,
unsigned V) {
// This is a store, invalidate any location that this location may alias, as
// their values can no longer be forwarded.
LSLocation &R = Ctx.getLocation(L);
for (unsigned i = 0; i < LocationNum; ++i) {
if (!isTrackingLocation(ForwardSetIn, i))
continue;
LSLocation &L = Ctx.getLocation(i);
if (!L.isMayAliasLSLocation(R, Ctx.getAA()))
continue;
// MayAlias, invalidate the location and value.
stopTrackingValue(ForwardValIn, i);
stopTrackingLocation(ForwardSetIn, i);
}
// Start tracking this location and value.
startTrackingLocation(ForwardSetIn, L);
startTrackingValue(ForwardValIn, L, V);
}
void BlockState::processWrite(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
SILValue Val, RLEKind Kind) {
// Initialize the LSLocation.
LSLocation L;
LSLocationBaseMap &BaseToLocIndex = Ctx.getBM();
if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) {
L = BaseToLocIndex[Mem];
} else {
SILValue UO = getUnderlyingObject(Mem);
L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem));
}
// If we can't figure out the Base or Projection Path for the write,
// process it as an unknown memory instruction.
if (!L.isValid()) {
// we can ignore unknown store instructions if we are computing the
// AvailSetMax.
if (!isComputeAvailSetMax(Kind)) {
processUnknownWriteInst(Ctx, I, Kind);
}
return;
}
auto *Fn = I->getFunction();
// Expand the given location and val into individual fields and process
// them as separate writes.
LSLocationList Locs;
LSLocation::expand(L, &I->getModule(), TypeExpansionContext(*Fn), Locs,
Ctx.getTE());
if (isComputeAvailSetMax(Kind)) {
for (unsigned i = 0; i < Locs.size(); ++i) {
updateMaxAvailSetForWrite(Ctx, Ctx.getLocationBit(Locs[i]));
}
return;
}
// Are we computing the genset and killset ?
if (isComputeAvailGenKillSet(Kind)) {
for (unsigned i = 0; i < Locs.size(); ++i) {
updateGenKillSetForWrite(Ctx, Ctx.getLocationBit(Locs[i]));
}
return;
}
// Are we computing available value or performing RLE?
LSValueList Vals;
LSValue::expand(Val, &I->getModule(), TypeExpansionContext(*Fn), Vals,
Ctx.getTE());
if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) {
for (unsigned i = 0; i < Locs.size(); ++i) {
updateForwardSetAndValForWrite(Ctx, Ctx.getLocationBit(Locs[i]),
Ctx.getValueBit(Vals[i]));
}
return;
}
llvm_unreachable("Unknown RLE compute kind");
}
void BlockState::processRead(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
SILValue Val, RLEKind Kind) {
// Initialize the LSLocation.
LSLocation L;
LSLocationBaseMap &BaseToLocIndex = Ctx.getBM();
if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) {
L = BaseToLocIndex[Mem];
} else {
SILValue UO = getUnderlyingObject(Mem);
L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem));
}
// If we can't figure out the Base or Projection Path for the read, simply
// ignore it for now.
if (!L.isValid())
return;
auto *Fn = I->getFunction();
// Expand the given LSLocation and Val into individual fields and process
// them as separate reads.
LSLocationList Locs;
LSLocation::expand(L, &I->getModule(), TypeExpansionContext(*Fn), Locs,
Ctx.getTE());
if (isComputeAvailSetMax(Kind)) {
for (unsigned i = 0; i < Locs.size(); ++i) {
updateMaxAvailSetForRead(Ctx, Ctx.getLocationBit(Locs[i]));
}
return;
}
// Are we computing the genset and killset.
if (isComputeAvailGenKillSet(Kind)) {
for (unsigned i = 0; i < Locs.size(); ++i) {
updateGenKillSetForRead(Ctx, Ctx.getLocationBit(Locs[i]));
}
return;
}
// Are we computing available values ?.
bool CanForward = true;
LSValueList Vals;
LSValue::expand(Val, &I->getModule(), TypeExpansionContext(*Fn), Vals,
Ctx.getTE());
if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) {
for (unsigned i = 0; i < Locs.size(); ++i) {
if (isTrackingLocation(ForwardSetIn, Ctx.getLocationBit(Locs[i])))
continue;
updateForwardSetAndValForRead(Ctx, Ctx.getLocationBit(Locs[i]),
Ctx.getValueBit(Vals[i]));
// We can not perform the forwarding as we are at least missing
// some pieces of the read location.
CanForward = false;
}
}
// Simply return if we are not performing RLE or we do not have all the
// values available to perform RLE.
if (!isPerformingRLE(Kind) || !CanForward)
return;
// Lastly, forward value to the load.
setupRLE(Ctx, I, Mem);
}
void BlockState::processStoreInst(RLEContext &Ctx, StoreInst *SI, RLEKind Kind) {
processWrite(Ctx, SI, SI->getDest(), SI->getSrc(), Kind);
}
void BlockState::processLoadInst(RLEContext &Ctx, LoadInst *LI, RLEKind Kind) {
processRead(Ctx, LI, LI->getOperand(), SILValue(LI), Kind);
}
void BlockState::processUnknownWriteInstForGenKillSet(RLEContext &Ctx,
SILInstruction *I) {
auto *AA = Ctx.getAA();
for (unsigned i = 0; i < LocationNum; ++i) {
if (!isTrackingLocation(ForwardSetMax, i))
continue;
// Invalidate any location this instruction may write to.
//
// TODO: checking may alias with Base is overly conservative,
// we should check may alias with base plus projection path.
LSLocation &R = Ctx.getLocation(i);
if (!AA->mayWriteToMemory(I, R.getBase()))
continue;
// MayAlias.
stopTrackingLocation(BBGenSet, i);
startTrackingLocation(BBKillSet, i);
}
}
void BlockState::processUnknownWriteInstForRLE(RLEContext &Ctx,
SILInstruction *I) {
auto *AA = Ctx.getAA();
for (unsigned i = 0; i < LocationNum; ++i) {
if (!isTrackingLocation(ForwardSetIn, i))
continue;
// Invalidate any location this instruction may write to.
//
// TODO: checking may alias with Base is overly conservative,
// we should check may alias with base plus projection path.
LSLocation &R = Ctx.getLocation(i);
if (!AA->mayWriteToMemory(I, R.getBase()))