forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathReassociate.cpp
2615 lines (2322 loc) · 99.1 KB
/
Reassociate.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass reassociates commutative expressions in an order that is designed
// to promote better constant propagation, GCSE, LICM, PRE, etc.
//
// For example: 4 + (x + 5) -> x + (4 + 5)
//
// In the implementation of this algorithm, constants are assigned rank = 0,
// function arguments are rank = 1, and other values are assigned ranks
// corresponding to the reverse post order traversal of current function
// (starting at 2), which effectively gives values in deep loops higher rank
// than values not in loops.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/Reassociate.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <utility>
using namespace llvm;
using namespace reassociate;
using namespace PatternMatch;
#define DEBUG_TYPE "reassociate"
STATISTIC(NumChanged, "Number of insts reassociated");
STATISTIC(NumAnnihil, "Number of expr tree annihilated");
STATISTIC(NumFactor , "Number of multiplies factored");
#ifndef NDEBUG
/// Print out the expression identified in the Ops list.
static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
Module *M = I->getModule();
dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
<< *Ops[0].Op->getType() << '\t';
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
dbgs() << "[ ";
Ops[i].Op->printAsOperand(dbgs(), false, M);
dbgs() << ", #" << Ops[i].Rank << "] ";
}
}
#endif
/// Utility class representing a non-constant Xor-operand. We classify
/// non-constant Xor-Operands into two categories:
/// C1) The operand is in the form "X & C", where C is a constant and C != ~0
/// C2)
/// C2.1) The operand is in the form of "X | C", where C is a non-zero
/// constant.
/// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
/// operand as "E | 0"
class llvm::reassociate::XorOpnd {
public:
XorOpnd(Value *V);
bool isInvalid() const { return SymbolicPart == nullptr; }
bool isOrExpr() const { return isOr; }
Value *getValue() const { return OrigVal; }
Value *getSymbolicPart() const { return SymbolicPart; }
unsigned getSymbolicRank() const { return SymbolicRank; }
const APInt &getConstPart() const { return ConstPart; }
void Invalidate() { SymbolicPart = OrigVal = nullptr; }
void setSymbolicRank(unsigned R) { SymbolicRank = R; }
private:
Value *OrigVal;
Value *SymbolicPart;
APInt ConstPart;
unsigned SymbolicRank;
bool isOr;
};
XorOpnd::XorOpnd(Value *V) {
assert(!isa<ConstantInt>(V) && "No ConstantInt");
OrigVal = V;
Instruction *I = dyn_cast<Instruction>(V);
SymbolicRank = 0;
if (I && (I->getOpcode() == Instruction::Or ||
I->getOpcode() == Instruction::And)) {
Value *V0 = I->getOperand(0);
Value *V1 = I->getOperand(1);
const APInt *C;
if (match(V0, m_APInt(C)))
std::swap(V0, V1);
if (match(V1, m_APInt(C))) {
ConstPart = *C;
SymbolicPart = V0;
isOr = (I->getOpcode() == Instruction::Or);
return;
}
}
// view the operand as "V | 0"
SymbolicPart = V;
ConstPart = APInt::getZero(V->getType()->getScalarSizeInBits());
isOr = true;
}
/// Return true if V is an instruction of the specified opcode and if it
/// only has one use.
static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
auto *I = dyn_cast<Instruction>(V);
if (I && I->hasOneUse() && I->getOpcode() == Opcode)
if (!isa<FPMathOperator>(I) || I->isFast())
return cast<BinaryOperator>(I);
return nullptr;
}
static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
unsigned Opcode2) {
auto *I = dyn_cast<Instruction>(V);
if (I && I->hasOneUse() &&
(I->getOpcode() == Opcode1 || I->getOpcode() == Opcode2))
if (!isa<FPMathOperator>(I) || I->isFast())
return cast<BinaryOperator>(I);
return nullptr;
}
void ReassociatePass::BuildRankMap(Function &F,
ReversePostOrderTraversal<Function*> &RPOT) {
unsigned Rank = 2;
// Assign distinct ranks to function arguments.
for (auto &Arg : F.args()) {
ValueRankMap[&Arg] = ++Rank;
LLVM_DEBUG(dbgs() << "Calculated Rank[" << Arg.getName() << "] = " << Rank
<< "\n");
}
// Traverse basic blocks in ReversePostOrder.
for (BasicBlock *BB : RPOT) {
unsigned BBRank = RankMap[BB] = ++Rank << 16;
// Walk the basic block, adding precomputed ranks for any instructions that
// we cannot move. This ensures that the ranks for these instructions are
// all different in the block.
for (Instruction &I : *BB)
if (mayBeMemoryDependent(I))
ValueRankMap[&I] = ++BBRank;
}
}
unsigned ReassociatePass::getRank(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
return 0; // Otherwise it's a global or constant, rank 0.
}
if (unsigned Rank = ValueRankMap[I])
return Rank; // Rank already known?
// If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
// we can reassociate expressions for code motion! Since we do not recurse
// for PHI nodes, we cannot have infinite recursion here, because there
// cannot be loops in the value graph that do not go through PHI nodes.
unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
for (unsigned i = 0, e = I->getNumOperands(); i != e && Rank != MaxRank; ++i)
Rank = std::max(Rank, getRank(I->getOperand(i)));
// If this is a 'not' or 'neg' instruction, do not count it for rank. This
// assures us that X and ~X will have the same rank.
if (!match(I, m_Not(m_Value())) && !match(I, m_Neg(m_Value())) &&
!match(I, m_FNeg(m_Value())))
++Rank;
LLVM_DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank
<< "\n");
return ValueRankMap[I] = Rank;
}
// Canonicalize constants to RHS. Otherwise, sort the operands by rank.
void ReassociatePass::canonicalizeOperands(Instruction *I) {
assert(isa<BinaryOperator>(I) && "Expected binary operator.");
assert(I->isCommutative() && "Expected commutative operator.");
Value *LHS = I->getOperand(0);
Value *RHS = I->getOperand(1);
if (LHS == RHS || isa<Constant>(RHS))
return;
if (isa<Constant>(LHS) || getRank(RHS) < getRank(LHS))
cast<BinaryOperator>(I)->swapOperands();
}
static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
Instruction *InsertBefore, Value *FlagsOp) {
if (S1->getType()->isIntOrIntVectorTy())
return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
else {
BinaryOperator *Res =
BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore);
Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
return Res;
}
}
static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
Instruction *InsertBefore, Value *FlagsOp) {
if (S1->getType()->isIntOrIntVectorTy())
return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
else {
BinaryOperator *Res =
BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore);
Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
return Res;
}
}
static Instruction *CreateNeg(Value *S1, const Twine &Name,
Instruction *InsertBefore, Value *FlagsOp) {
if (S1->getType()->isIntOrIntVectorTy())
return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
if (auto *FMFSource = dyn_cast<Instruction>(FlagsOp))
return UnaryOperator::CreateFNegFMF(S1, FMFSource, Name, InsertBefore);
return UnaryOperator::CreateFNeg(S1, Name, InsertBefore);
}
/// Replace 0-X with X*-1.
static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
assert((isa<UnaryOperator>(Neg) || isa<BinaryOperator>(Neg)) &&
"Expected a Negate!");
// FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0.
unsigned OpNo = isa<BinaryOperator>(Neg) ? 1 : 0;
Type *Ty = Neg->getType();
Constant *NegOne = Ty->isIntOrIntVectorTy() ?
ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);
BinaryOperator *Res = CreateMul(Neg->getOperand(OpNo), NegOne, "", Neg, Neg);
Neg->setOperand(OpNo, Constant::getNullValue(Ty)); // Drop use of op.
Res->takeName(Neg);
Neg->replaceAllUsesWith(Res);
Res->setDebugLoc(Neg->getDebugLoc());
return Res;
}
/// Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael
/// function. This means that x^(2^k) === 1 mod 2^Bitwidth for
/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every
/// even x in Bitwidth-bit arithmetic.
static unsigned CarmichaelShift(unsigned Bitwidth) {
if (Bitwidth < 3)
return Bitwidth - 1;
return Bitwidth - 2;
}
/// Add the extra weight 'RHS' to the existing weight 'LHS',
/// reducing the combined weight using any special properties of the operation.
/// The existing weight LHS represents the computation X op X op ... op X where
/// X occurs LHS times. The combined weight represents X op X op ... op X with
/// X occurring LHS + RHS times. If op is "Xor" for example then the combined
/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even;
/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second.
static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
// If we were working with infinite precision arithmetic then the combined
// weight would be LHS + RHS. But we are using finite precision arithmetic,
// and the APInt sum LHS + RHS may not be correct if it wraps (it is correct
// for nilpotent operations and addition, but not for idempotent operations
// and multiplication), so it is important to correctly reduce the combined
// weight back into range if wrapping would be wrong.
// If RHS is zero then the weight didn't change.
if (RHS.isMinValue())
return;
// If LHS is zero then the combined weight is RHS.
if (LHS.isMinValue()) {
LHS = RHS;
return;
}
// From this point on we know that neither LHS nor RHS is zero.
if (Instruction::isIdempotent(Opcode)) {
// Idempotent means X op X === X, so any non-zero weight is equivalent to a
// weight of 1. Keeping weights at zero or one also means that wrapping is
// not a problem.
assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
return; // Return a weight of 1.
}
if (Instruction::isNilpotent(Opcode)) {
// Nilpotent means X op X === 0, so reduce weights modulo 2.
assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
LHS = 0; // 1 + 1 === 0 modulo 2.
return;
}
if (Opcode == Instruction::Add || Opcode == Instruction::FAdd) {
// TODO: Reduce the weight by exploiting nsw/nuw?
LHS += RHS;
return;
}
assert((Opcode == Instruction::Mul || Opcode == Instruction::FMul) &&
"Unknown associative operation!");
unsigned Bitwidth = LHS.getBitWidth();
// If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth
// can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth
// bit number x, since either x is odd in which case x^CM = 1, or x is even in
// which case both x^W and x^(W - CM) are zero. By subtracting off multiples
// of CM like this weights can always be reduced to the range [0, CM+Bitwidth)
// which by a happy accident means that they can always be represented using
// Bitwidth bits.
// TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than
// the Carmichael number).
if (Bitwidth > 3) {
/// CM - The value of Carmichael's lambda function.
APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth));
// Any weight W >= Threshold can be replaced with W - CM.
APInt Threshold = CM + Bitwidth;
assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!");
// For Bitwidth 4 or more the following sum does not overflow.
LHS += RHS;
while (LHS.uge(Threshold))
LHS -= CM;
} else {
// To avoid problems with overflow do everything the same as above but using
// a larger type.
unsigned CM = 1U << CarmichaelShift(Bitwidth);
unsigned Threshold = CM + Bitwidth;
assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold &&
"Weights not reduced!");
unsigned Total = LHS.getZExtValue() + RHS.getZExtValue();
while (Total >= Threshold)
Total -= CM;
LHS = Total;
}
}
using RepeatedValue = std::pair<Value*, APInt>;
/// Given an associative binary expression, return the leaf
/// nodes in Ops along with their weights (how many times the leaf occurs). The
/// original expression is the same as
/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
/// op
/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
/// op
/// ...
/// op
/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
///
/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
///
/// This routine may modify the function, in which case it returns 'true'. The
/// changes it makes may well be destructive, changing the value computed by 'I'
/// to something completely different. Thus if the routine returns 'true' then
/// you MUST either replace I with a new expression computed from the Ops array,
/// or use RewriteExprTree to put the values back in.
///
/// A leaf node is either not a binary operation of the same kind as the root
/// node 'I' (i.e. is not a binary operator at all, or is, but with a different
/// opcode), or is the same kind of binary operator but has a use which either
/// does not belong to the expression, or does belong to the expression but is
/// a leaf node. Every leaf node has at least one use that is a non-leaf node
/// of the expression, while for non-leaf nodes (except for the root 'I') every
/// use is a non-leaf node of the expression.
///
/// For example:
/// expression graph node names
///
/// + | I
/// / \ |
/// + + | A, B
/// / \ / \ |
/// * + * | C, D, E
/// / \ / \ / \ |
/// + * | F, G
///
/// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
/// that order) (C, 1), (E, 1), (F, 2), (G, 2).
///
/// The expression is maximal: if some instruction is a binary operator of the
/// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
/// then the instruction also belongs to the expression, is not a leaf node of
/// it, and its operands also belong to the expression (but may be leaf nodes).
///
/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
/// order to ensure that every non-root node in the expression has *exactly one*
/// use by a non-leaf node of the expression. This destruction means that the
/// caller MUST either replace 'I' with a new expression or use something like
/// RewriteExprTree to put the values back in if the routine indicates that it
/// made a change by returning 'true'.
///
/// In the above example either the right operand of A or the left operand of B
/// will be replaced by undef. If it is B's operand then this gives:
///
/// + | I
/// / \ |
/// + + | A, B - operand of B replaced with undef
/// / \ \ |
/// * + * | C, D, E
/// / \ / \ / \ |
/// + * | F, G
///
/// Note that such undef operands can only be reached by passing through 'I'.
/// For example, if you visit operands recursively starting from a leaf node
/// then you will never see such an undef operand unless you get back to 'I',
/// which requires passing through a phi node.
///
/// Note that this routine may also mutate binary operators of the wrong type
/// that have all uses inside the expression (i.e. only used by non-leaf nodes
/// of the expression) if it can turn them into binary operators of the right
/// type and thus make the expression bigger.
static bool LinearizeExprTree(Instruction *I,
SmallVectorImpl<RepeatedValue> &Ops) {
assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
"Expected a UnaryOperator or BinaryOperator!");
LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
unsigned Opcode = I->getOpcode();
assert(I->isAssociative() && I->isCommutative() &&
"Expected an associative and commutative operation!");
// Visit all operands of the expression, keeping track of their weight (the
// number of paths from the expression root to the operand, or if you like
// the number of times that operand occurs in the linearized expression).
// For example, if I = X + A, where X = A + B, then I, X and B have weight 1
// while A has weight two.
// Worklist of non-leaf nodes (their operands are in the expression too) along
// with their weights, representing a certain number of paths to the operator.
// If an operator occurs in the worklist multiple times then we found multiple
// ways to get to it.
SmallVector<std::pair<Instruction*, APInt>, 8> Worklist; // (Op, Weight)
Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
bool Changed = false;
// Leaves of the expression are values that either aren't the right kind of
// operation (eg: a constant, or a multiply in an add tree), or are, but have
// some uses that are not inside the expression. For example, in I = X + X,
// X = A + B, the value X has two uses (by I) that are in the expression. If
// X has any other uses, for example in a return instruction, then we consider
// X to be a leaf, and won't analyze it further. When we first visit a value,
// if it has more than one use then at first we conservatively consider it to
// be a leaf. Later, as the expression is explored, we may discover some more
// uses of the value from inside the expression. If all uses turn out to be
// from within the expression (and the value is a binary operator of the right
// kind) then the value is no longer considered to be a leaf, and its operands
// are explored.
// Leaves - Keeps track of the set of putative leaves as well as the number of
// paths to each leaf seen so far.
using LeafMap = DenseMap<Value *, APInt>;
LeafMap Leaves; // Leaf -> Total weight so far.
SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order.
#ifndef NDEBUG
SmallPtrSet<Value *, 8> Visited; // For sanity checking the iteration scheme.
#endif
while (!Worklist.empty()) {
std::pair<Instruction*, APInt> P = Worklist.pop_back_val();
I = P.first; // We examine the operands of this binary operator.
for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
Value *Op = I->getOperand(OpIdx);
APInt Weight = P.second; // Number of paths to this operand.
LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
assert(!Op->use_empty() && "No uses, so how did we get to it?!");
// If this is a binary operation of the right kind with only one use then
// add its operands to the expression.
if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
assert(Visited.insert(Op).second && "Not first visit!");
LLVM_DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
Worklist.push_back(std::make_pair(BO, Weight));
continue;
}
// Appears to be a leaf. Is the operand already in the set of leaves?
LeafMap::iterator It = Leaves.find(Op);
if (It == Leaves.end()) {
// Not in the leaf map. Must be the first time we saw this operand.
assert(Visited.insert(Op).second && "Not first visit!");
if (!Op->hasOneUse()) {
// This value has uses not accounted for by the expression, so it is
// not safe to modify. Mark it as being a leaf.
LLVM_DEBUG(dbgs()
<< "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
LeafOrder.push_back(Op);
Leaves[Op] = Weight;
continue;
}
// No uses outside the expression, try morphing it.
} else {
// Already in the leaf map.
assert(It != Leaves.end() && Visited.count(Op) &&
"In leaf map but not visited!");
// Update the number of paths to the leaf.
IncorporateWeight(It->second, Weight, Opcode);
#if 0 // TODO: Re-enable once PR13021 is fixed.
// The leaf already has one use from inside the expression. As we want
// exactly one such use, drop this new use of the leaf.
assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
I->setOperand(OpIdx, UndefValue::get(I->getType()));
Changed = true;
// If the leaf is a binary operation of the right kind and we now see
// that its multiple original uses were in fact all by nodes belonging
// to the expression, then no longer consider it to be a leaf and add
// its operands to the expression.
if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
LLVM_DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n");
Worklist.push_back(std::make_pair(BO, It->second));
Leaves.erase(It);
continue;
}
#endif
// If we still have uses that are not accounted for by the expression
// then it is not safe to modify the value.
if (!Op->hasOneUse())
continue;
// No uses outside the expression, try morphing it.
Weight = It->second;
Leaves.erase(It); // Since the value may be morphed below.
}
// At this point we have a value which, first of all, is not a binary
// expression of the right kind, and secondly, is only used inside the
// expression. This means that it can safely be modified. See if we
// can usefully morph it into an expression of the right kind.
assert((!isa<Instruction>(Op) ||
cast<Instruction>(Op)->getOpcode() != Opcode
|| (isa<FPMathOperator>(Op) &&
!cast<Instruction>(Op)->isFast())) &&
"Should have been handled above!");
assert(Op->hasOneUse() && "Has uses outside the expression tree!");
// If this is a multiply expression, turn any internal negations into
// multiplies by -1 so they can be reassociated.
if (Instruction *Tmp = dyn_cast<Instruction>(Op))
if ((Opcode == Instruction::Mul && match(Tmp, m_Neg(m_Value()))) ||
(Opcode == Instruction::FMul && match(Tmp, m_FNeg(m_Value())))) {
LLVM_DEBUG(dbgs()
<< "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
Tmp = LowerNegateToMultiply(Tmp);
LLVM_DEBUG(dbgs() << *Tmp << '\n');
Worklist.push_back(std::make_pair(Tmp, Weight));
Changed = true;
continue;
}
// Failed to morph into an expression of the right type. This really is
// a leaf.
LLVM_DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
LeafOrder.push_back(Op);
Leaves[Op] = Weight;
}
}
// The leaves, repeated according to their weights, represent the linearized
// form of the expression.
for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) {
Value *V = LeafOrder[i];
LeafMap::iterator It = Leaves.find(V);
if (It == Leaves.end())
// Node initially thought to be a leaf wasn't.
continue;
assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
APInt Weight = It->second;
if (Weight.isMinValue())
// Leaf already output or weight reduction eliminated it.
continue;
// Ensure the leaf is only output once.
It->second = 0;
Ops.push_back(std::make_pair(V, Weight));
}
// For nilpotent operations or addition there may be no operands, for example
// because the expression was "X xor X" or consisted of 2^Bitwidth additions:
// in both cases the weight reduces to 0 causing the value to be skipped.
if (Ops.empty()) {
Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
assert(Identity && "Associative operation without identity!");
Ops.emplace_back(Identity, APInt(Bitwidth, 1));
}
return Changed;
}
/// Now that the operands for this expression tree are
/// linearized and optimized, emit them in-order.
void ReassociatePass::RewriteExprTree(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops) {
assert(Ops.size() > 1 && "Single values should be used directly!");
// Since our optimizations should never increase the number of operations, the
// new expression can usually be written reusing the existing binary operators
// from the original expression tree, without creating any new instructions,
// though the rewritten expression may have a completely different topology.
// We take care to not change anything if the new expression will be the same
// as the original. If more than trivial changes (like commuting operands)
// were made then we are obliged to clear out any optional subclass data like
// nsw flags.
/// NodesToRewrite - Nodes from the original expression available for writing
/// the new expression into.
SmallVector<BinaryOperator*, 8> NodesToRewrite;
unsigned Opcode = I->getOpcode();
BinaryOperator *Op = I;
/// NotRewritable - The operands being written will be the leaves of the new
/// expression and must not be used as inner nodes (via NodesToRewrite) by
/// mistake. Inner nodes are always reassociable, and usually leaves are not
/// (if they were they would have been incorporated into the expression and so
/// would not be leaves), so most of the time there is no danger of this. But
/// in rare cases a leaf may become reassociable if an optimization kills uses
/// of it, or it may momentarily become reassociable during rewriting (below)
/// due it being removed as an operand of one of its uses. Ensure that misuse
/// of leaf nodes as inner nodes cannot occur by remembering all of the future
/// leaves and refusing to reuse any of them as inner nodes.
SmallPtrSet<Value*, 8> NotRewritable;
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
NotRewritable.insert(Ops[i].Op);
// ExpressionChanged - Non-null if the rewritten expression differs from the
// original in some non-trivial way, requiring the clearing of optional flags.
// Flags are cleared from the operator in ExpressionChanged up to I inclusive.
BinaryOperator *ExpressionChanged = nullptr;
for (unsigned i = 0; ; ++i) {
// The last operation (which comes earliest in the IR) is special as both
// operands will come from Ops, rather than just one with the other being
// a subexpression.
if (i+2 == Ops.size()) {
Value *NewLHS = Ops[i].Op;
Value *NewRHS = Ops[i+1].Op;
Value *OldLHS = Op->getOperand(0);
Value *OldRHS = Op->getOperand(1);
if (NewLHS == OldLHS && NewRHS == OldRHS)
// Nothing changed, leave it alone.
break;
if (NewLHS == OldRHS && NewRHS == OldLHS) {
// The order of the operands was reversed. Swap them.
LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
Op->swapOperands();
LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
MadeChange = true;
++NumChanged;
break;
}
// The new operation differs non-trivially from the original. Overwrite
// the old operands with the new ones.
LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
if (NewLHS != OldLHS) {
BinaryOperator *BO = isReassociableOp(OldLHS, Opcode);
if (BO && !NotRewritable.count(BO))
NodesToRewrite.push_back(BO);
Op->setOperand(0, NewLHS);
}
if (NewRHS != OldRHS) {
BinaryOperator *BO = isReassociableOp(OldRHS, Opcode);
if (BO && !NotRewritable.count(BO))
NodesToRewrite.push_back(BO);
Op->setOperand(1, NewRHS);
}
LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
ExpressionChanged = Op;
MadeChange = true;
++NumChanged;
break;
}
// Not the last operation. The left-hand side will be a sub-expression
// while the right-hand side will be the current element of Ops.
Value *NewRHS = Ops[i].Op;
if (NewRHS != Op->getOperand(1)) {
LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
if (NewRHS == Op->getOperand(0)) {
// The new right-hand side was already present as the left operand. If
// we are lucky then swapping the operands will sort out both of them.
Op->swapOperands();
} else {
// Overwrite with the new right-hand side.
BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode);
if (BO && !NotRewritable.count(BO))
NodesToRewrite.push_back(BO);
Op->setOperand(1, NewRHS);
ExpressionChanged = Op;
}
LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
MadeChange = true;
++NumChanged;
}
// Now deal with the left-hand side. If this is already an operation node
// from the original expression then just rewrite the rest of the expression
// into it.
BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode);
if (BO && !NotRewritable.count(BO)) {
Op = BO;
continue;
}
// Otherwise, grab a spare node from the original expression and use that as
// the left-hand side. If there are no nodes left then the optimizers made
// an expression with more nodes than the original! This usually means that
// they did something stupid but it might mean that the problem was just too
// hard (finding the mimimal number of multiplications needed to realize a
// multiplication expression is NP-complete). Whatever the reason, smart or
// stupid, create a new node if there are none left.
BinaryOperator *NewOp;
if (NodesToRewrite.empty()) {
Constant *Undef = UndefValue::get(I->getType());
NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode),
Undef, Undef, "", I);
if (NewOp->getType()->isFPOrFPVectorTy())
NewOp->setFastMathFlags(I->getFastMathFlags());
} else {
NewOp = NodesToRewrite.pop_back_val();
}
LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
Op->setOperand(0, NewOp);
LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
ExpressionChanged = Op;
MadeChange = true;
++NumChanged;
Op = NewOp;
}
// If the expression changed non-trivially then clear out all subclass data
// starting from the operator specified in ExpressionChanged, and compactify
// the operators to just before the expression root to guarantee that the
// expression tree is dominated by all of Ops.
if (ExpressionChanged)
do {
// Preserve FastMathFlags.
if (isa<FPMathOperator>(I)) {
FastMathFlags Flags = I->getFastMathFlags();
ExpressionChanged->clearSubclassOptionalData();
ExpressionChanged->setFastMathFlags(Flags);
} else
ExpressionChanged->clearSubclassOptionalData();
if (ExpressionChanged == I)
break;
// Discard any debug info related to the expressions that has changed (we
// can leave debug infor related to the root, since the result of the
// expression tree should be the same even after reassociation).
replaceDbgUsesWithUndef(ExpressionChanged);
ExpressionChanged->moveBefore(I);
ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->user_begin());
} while (true);
// Throw away any left over nodes from the original expression.
for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
RedoInsts.insert(NodesToRewrite[i]);
}
/// Insert instructions before the instruction pointed to by BI,
/// that computes the negative version of the value specified. The negative
/// version of the value is returned, and BI is left pointing at the instruction
/// that should be processed next by the reassociation pass.
/// Also add intermediate instructions to the redo list that are modified while
/// pushing the negates through adds. These will be revisited to see if
/// additional opportunities have been exposed.
static Value *NegateValue(Value *V, Instruction *BI,
ReassociatePass::OrderedSet &ToRedo) {
if (auto *C = dyn_cast<Constant>(V))
return C->getType()->isFPOrFPVectorTy() ? ConstantExpr::getFNeg(C) :
ConstantExpr::getNeg(C);
// We are trying to expose opportunity for reassociation. One of the things
// that we want to do to achieve this is to push a negation as deep into an
// expression chain as possible, to expose the add instructions. In practice,
// this means that we turn this:
// X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
// so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
// the constants. We assume that instcombine will clean up the mess later if
// we introduce tons of unnecessary negation instructions.
//
if (BinaryOperator *I =
isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
// Push the negates through the add.
I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
if (I->getOpcode() == Instruction::Add) {
I->setHasNoUnsignedWrap(false);
I->setHasNoSignedWrap(false);
}
// We must move the add instruction here, because the neg instructions do
// not dominate the old add instruction in general. By moving it, we are
// assured that the neg instructions we just inserted dominate the
// instruction we are about to insert after them.
//
I->moveBefore(BI);
I->setName(I->getName()+".neg");
// Add the intermediate negates to the redo list as processing them later
// could expose more reassociating opportunities.
ToRedo.insert(I);
return I;
}
// Okay, we need to materialize a negated version of V with an instruction.
// Scan the use lists of V to see if we have one already.
for (User *U : V->users()) {
if (!match(U, m_Neg(m_Value())) && !match(U, m_FNeg(m_Value())))
continue;
// We found one! Now we have to make sure that the definition dominates
// this use. We do this by moving it to the entry block (if it is a
// non-instruction value) or right after the definition. These negates will
// be zapped by reassociate later, so we don't need much finesse here.
Instruction *TheNeg = cast<Instruction>(U);
// Verify that the negate is in this function, V might be a constant expr.
if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
continue;
bool FoundCatchSwitch = false;
BasicBlock::iterator InsertPt;
if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
InsertPt = II->getNormalDest()->begin();
} else {
InsertPt = ++InstInput->getIterator();
}
const BasicBlock *BB = InsertPt->getParent();
// Make sure we don't move anything before PHIs or exception
// handling pads.
while (InsertPt != BB->end() && (isa<PHINode>(InsertPt) ||
InsertPt->isEHPad())) {
if (isa<CatchSwitchInst>(InsertPt))
// A catchswitch cannot have anything in the block except
// itself and PHIs. We'll bail out below.
FoundCatchSwitch = true;
++InsertPt;
}
} else {
InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
}
// We found a catchswitch in the block where we want to move the
// neg. We cannot move anything into that block. Bail and just
// create the neg before BI, as if we hadn't found an existing
// neg.
if (FoundCatchSwitch)
break;
TheNeg->moveBefore(&*InsertPt);
if (TheNeg->getOpcode() == Instruction::Sub) {
TheNeg->setHasNoUnsignedWrap(false);
TheNeg->setHasNoSignedWrap(false);
} else {
TheNeg->andIRFlags(BI);
}
ToRedo.insert(TheNeg);
return TheNeg;
}
// Insert a 'neg' instruction that subtracts the value from zero to get the
// negation.
Instruction *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI);
ToRedo.insert(NewNeg);
return NewNeg;
}
// See if this `or` looks like an load widening reduction, i.e. that it
// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
// ensure that the pattern is *really* a load widening reduction,
// we do not ensure that it can really be replaced with a widened load,
// only that it mostly looks like one.
static bool isLoadCombineCandidate(Instruction *Or) {
SmallVector<Instruction *, 8> Worklist;
SmallSet<Instruction *, 8> Visited;
auto Enqueue = [&](Value *V) {
auto *I = dyn_cast<Instruction>(V);
// Each node of an `or` reduction must be an instruction,
if (!I)
return false; // Node is certainly not part of an `or` load reduction.
// Only process instructions we have never processed before.
if (Visited.insert(I).second)
Worklist.emplace_back(I);
return true; // Will need to look at parent nodes.
};
if (!Enqueue(Or))
return false; // Not an `or` reduction pattern.
while (!Worklist.empty()) {
auto *I = Worklist.pop_back_val();
// Okay, which instruction is this node?
switch (I->getOpcode()) {
case Instruction::Or:
// Got an `or` node. That's fine, just recurse into it's operands.
for (Value *Op : I->operands())
if (!Enqueue(Op))
return false; // Not an `or` reduction pattern.
continue;
case Instruction::Shl:
case Instruction::ZExt:
// `shl`/`zext` nodes are fine, just recurse into their base operand.
if (!Enqueue(I->getOperand(0)))
return false; // Not an `or` reduction pattern.
continue;
case Instruction::Load:
// Perfect, `load` node means we've reached an edge of the graph.
continue;
default: // Unknown node.
return false; // Not an `or` reduction pattern.
}
}
return true;
}
/// Return true if it may be profitable to convert this (X|Y) into (X+Y).
static bool shouldConvertOrWithNoCommonBitsToAdd(Instruction *Or) {
// Don't bother to convert this up unless either the LHS is an associable add
// or subtract or mul or if this is only used by one of the above.
// This is only a compile-time improvement, it is not needed for correctness!
auto isInteresting = [](Value *V) {
for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul,
Instruction::Shl})
if (isReassociableOp(V, Op))
return true;
return false;
};
if (any_of(Or->operands(), isInteresting))
return true;
Value *VB = Or->user_back();
if (Or->hasOneUse() && isInteresting(VB))
return true;
return false;
}
/// If we have (X|Y), and iff X and Y have no common bits set,