forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPPCISelDAGToDAG.cpp
7490 lines (6753 loc) · 290 KB
/
PPCISelDAGToDAG.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines a pattern matching instruction selector for PowerPC,
// converting from a legalized dag to a PPC dag.
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPC.h"
#include "PPCISelLowering.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <limits>
#include <memory>
#include <new>
#include <tuple>
#include <utility>
using namespace llvm;
#define DEBUG_TYPE "ppc-codegen"
STATISTIC(NumSextSetcc,
"Number of (sext(setcc)) nodes expanded into GPR sequence.");
STATISTIC(NumZextSetcc,
"Number of (zext(setcc)) nodes expanded into GPR sequence.");
STATISTIC(SignExtensionsAdded,
"Number of sign extensions for compare inputs added.");
STATISTIC(ZeroExtensionsAdded,
"Number of zero extensions for compare inputs added.");
STATISTIC(NumLogicOpsOnComparison,
"Number of logical ops on i1 values calculated in GPR.");
STATISTIC(OmittedForNonExtendUses,
"Number of compares not eliminated as they have non-extending uses.");
STATISTIC(NumP9Setb,
"Number of compares lowered to setb.");
// FIXME: Remove this once the bug has been fixed!
cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
static cl::opt<bool>
UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
cl::desc("use aggressive ppc isel for bit permutations"),
cl::Hidden);
static cl::opt<bool> BPermRewriterNoMasking(
"ppc-bit-perm-rewriter-stress-rotates",
cl::desc("stress rotate selection in aggressive ppc isel for "
"bit permutations"),
cl::Hidden);
static cl::opt<bool> EnableBranchHint(
"ppc-use-branch-hint", cl::init(true),
cl::desc("Enable static hinting of branches on ppc"),
cl::Hidden);
static cl::opt<bool> EnableTLSOpt(
"ppc-tls-opt", cl::init(true),
cl::desc("Enable tls optimization peephole"),
cl::Hidden);
enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
static cl::opt<ICmpInGPRType> CmpInGPR(
"ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
cl::desc("Specify the types of comparisons to emit GPR-only code for."),
cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
clEnumValN(ICGPR_NonExtIn, "nonextin",
"Only comparisons where inputs don't need [sz]ext."),
clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
clEnumValN(ICGPR_ZextI32, "zexti32",
"Only i32 comparisons with zext result."),
clEnumValN(ICGPR_ZextI64, "zexti64",
"Only i64 comparisons with zext result."),
clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
clEnumValN(ICGPR_SextI32, "sexti32",
"Only i32 comparisons with sext result."),
clEnumValN(ICGPR_SextI64, "sexti64",
"Only i64 comparisons with sext result.")));
namespace {
//===--------------------------------------------------------------------===//
/// PPCDAGToDAGISel - PPC specific code to select PPC machine
/// instructions for SelectionDAG operations.
///
class PPCDAGToDAGISel : public SelectionDAGISel {
const PPCTargetMachine &TM;
const PPCSubtarget *Subtarget = nullptr;
const PPCTargetLowering *PPCLowering = nullptr;
unsigned GlobalBaseReg = 0;
public:
explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(tm, OptLevel), TM(tm) {}
bool runOnMachineFunction(MachineFunction &MF) override {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
Subtarget = &MF.getSubtarget<PPCSubtarget>();
PPCLowering = Subtarget->getTargetLowering();
if (Subtarget->hasROPProtect()) {
// Create a place on the stack for the ROP Protection Hash.
// The ROP Protection Hash will always be 8 bytes and aligned to 8
// bytes.
MachineFrameInfo &MFI = MF.getFrameInfo();
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
const int Result = MFI.CreateStackObject(8, Align(8), false);
FI->setROPProtectionHashSaveIndex(Result);
}
SelectionDAGISel::runOnMachineFunction(MF);
return true;
}
void PreprocessISelDAG() override;
void PostprocessISelDAG() override;
/// getI16Imm - Return a target constant with the specified value, of type
/// i16.
inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
}
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
}
/// getI64Imm - Return a target constant with the specified value, of type
/// i64.
inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
}
/// getSmallIPtrImm - Return a target constant of pointer type.
inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) {
return CurDAG->getTargetConstant(
Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
}
/// isRotateAndMask - Returns true if Mask and Shift can be folded into a
/// rotate and mask opcode and mask operation.
static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
unsigned &SH, unsigned &MB, unsigned &ME);
/// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
/// base register. Return the virtual register that holds this value.
SDNode *getGlobalBaseReg();
void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
void Select(SDNode *N) override;
bool tryBitfieldInsert(SDNode *N);
bool tryBitPermutation(SDNode *N);
bool tryIntCompareInGPR(SDNode *N);
// tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
// an X-Form load instruction with the offset being a relocation coming from
// the PPCISD::ADD_TLS.
bool tryTLSXFormLoad(LoadSDNode *N);
// tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
// an X-Form store instruction with the offset being a relocation coming from
// the PPCISD::ADD_TLS.
bool tryTLSXFormStore(StoreSDNode *N);
/// SelectCC - Select a comparison of the specified values with the
/// specified condition code, returning the CR# of the expression.
SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
const SDLoc &dl, SDValue Chain = SDValue());
/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
/// immediate field. Note that the operand at this point is already the
/// result of a prior SelectAddressRegImm call.
bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
if (N.getOpcode() == ISD::TargetConstant ||
N.getOpcode() == ISD::TargetGlobalAddress) {
Out = N;
return true;
}
return false;
}
/// SelectDSForm - Returns true if address N can be represented by the
/// addressing mode of DSForm instructions (a base register, plus a signed
/// 16-bit displacement that is a multiple of 4.
bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
Align(4)) == PPC::AM_DSForm;
}
/// SelectDQForm - Returns true if address N can be represented by the
/// addressing mode of DQForm instructions (a base register, plus a signed
/// 16-bit displacement that is a multiple of 16.
bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
Align(16)) == PPC::AM_DQForm;
}
/// SelectDForm - Returns true if address N can be represented by
/// the addressing mode of DForm instructions (a base register, plus a
/// signed 16-bit immediate.
bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
None) == PPC::AM_DForm;
}
/// SelectPCRelForm - Returns true if address N can be represented by
/// PC-Relative addressing mode.
bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
SDValue &Base) {
return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
None) == PPC::AM_PCRel;
}
/// SelectPDForm - Returns true if address N can be represented by Prefixed
/// DForm addressing mode (a base register, plus a signed 34-bit immediate.
bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
None) == PPC::AM_PrefixDForm;
}
/// SelectXForm - Returns true if address N can be represented by the
/// addressing mode of XForm instructions (an indexed [r+r] operation).
bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
None) == PPC::AM_XForm;
}
/// SelectForceXForm - Given the specified address, force it to be
/// represented as an indexed [r+r] operation (an XForm instruction).
bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,
SDValue &Base) {
return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) ==
PPC::AM_XForm;
}
/// SelectAddrIdx - Given the specified address, check to see if it can be
/// represented as an indexed [r+r] operation.
/// This is for xform instructions whose associated displacement form is D.
/// The last parameter \p 0 means associated D form has no requirment for 16
/// bit signed displacement.
/// Returns false if it can be represented by [r+imm], which are preferred.
bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, None);
}
/// SelectAddrIdx4 - Given the specified address, check to see if it can be
/// represented as an indexed [r+r] operation.
/// This is for xform instructions whose associated displacement form is DS.
/// The last parameter \p 4 means associated DS form 16 bit signed
/// displacement must be a multiple of 4.
/// Returns false if it can be represented by [r+imm], which are preferred.
bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
Align(4));
}
/// SelectAddrIdx16 - Given the specified address, check to see if it can be
/// represented as an indexed [r+r] operation.
/// This is for xform instructions whose associated displacement form is DQ.
/// The last parameter \p 16 means associated DQ form 16 bit signed
/// displacement must be a multiple of 16.
/// Returns false if it can be represented by [r+imm], which are preferred.
bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
Align(16));
}
/// SelectAddrIdxOnly - Given the specified address, force it to be
/// represented as an indexed [r+r] operation.
bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
}
/// SelectAddrImm - Returns true if the address N can be represented by
/// a base register plus a signed 16-bit displacement [r+imm].
/// The last parameter \p 0 means D form has no requirment for 16 bit signed
/// displacement.
bool SelectAddrImm(SDValue N, SDValue &Disp,
SDValue &Base) {
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, None);
}
/// SelectAddrImmX4 - Returns true if the address N can be represented by
/// a base register plus a signed 16-bit displacement that is a multiple of
/// 4 (last parameter). Suitable for use by STD and friends.
bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));
}
/// SelectAddrImmX16 - Returns true if the address N can be represented by
/// a base register plus a signed 16-bit displacement that is a multiple of
/// 16(last parameter). Suitable for use by STXV and friends.
bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
Align(16));
}
/// SelectAddrImmX34 - Returns true if the address N can be represented by
/// a base register plus a signed 34-bit displacement. Suitable for use by
/// PSTXVP and friends.
bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
}
// Select an address into a single register.
bool SelectAddr(SDValue N, SDValue &Base) {
Base = N;
return true;
}
bool SelectAddrPCRel(SDValue N, SDValue &Base) {
return PPCLowering->SelectAddressPCRel(N, Base);
}
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions. It is always correct to compute the value into
/// a register. The case of adding a (possibly relocatable) constant to a
/// register can be improved, but it is wrong to substitute Reg+Reg for
/// Reg in an asm, because the load or store opcode would have to change.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
unsigned ConstraintID,
std::vector<SDValue> &OutOps) override {
switch(ConstraintID) {
default:
errs() << "ConstraintID: " << ConstraintID << "\n";
llvm_unreachable("Unexpected asm memory constraint");
case InlineAsm::Constraint_es:
case InlineAsm::Constraint_m:
case InlineAsm::Constraint_o:
case InlineAsm::Constraint_Q:
case InlineAsm::Constraint_Z:
case InlineAsm::Constraint_Zy:
// We need to make sure that this one operand does not end up in r0
// (because we might end up lowering this as 0(%op)).
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
SDLoc dl(Op);
SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
SDValue NewOp =
SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
dl, Op.getValueType(),
Op, RC), 0);
OutOps.push_back(NewOp);
return false;
}
return true;
}
StringRef getPassName() const override {
return "PowerPC DAG->DAG Pattern Instruction Selection";
}
// Include the pieces autogenerated from the target description.
#include "PPCGenDAGISel.inc"
private:
bool trySETCC(SDNode *N);
bool tryFoldSWTestBRCC(SDNode *N);
bool tryAsSingleRLDICL(SDNode *N);
bool tryAsSingleRLDICR(SDNode *N);
bool tryAsSingleRLWINM(SDNode *N);
bool tryAsSingleRLWINM8(SDNode *N);
bool tryAsSingleRLWIMI(SDNode *N);
bool tryAsPairOfRLDICL(SDNode *N);
bool tryAsSingleRLDIMI(SDNode *N);
void PeepholePPC64();
void PeepholePPC64ZExt();
void PeepholeCROps();
SDValue combineToCMPB(SDNode *N);
void foldBoolExts(SDValue &Res, SDNode *&N);
bool AllUsersSelectZero(SDNode *N);
void SwapAllSelectUsers(SDNode *N);
bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
void transferMemOperands(SDNode *N, SDNode *Result);
};
} // end anonymous namespace
/// getGlobalBaseReg - Output the instructions required to put the
/// base address to use for accessing globals into a register.
///
SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
if (!GlobalBaseReg) {
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = MF->front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
const Module *M = MF->getFunction().getParent();
DebugLoc dl;
if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
if (Subtarget->isTargetELF()) {
GlobalBaseReg = PPC::R30;
if (!Subtarget->isSecurePlt() &&
M->getPICLevel() == PICLevel::SmallPIC) {
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
} else {
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
BuildMI(FirstMBB, MBBI, dl,
TII.get(PPC::UpdateGBR), GlobalBaseReg)
.addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
}
} else {
GlobalBaseReg =
RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
}
} else {
// We must ensure that this sequence is dominated by the prologue.
// FIXME: This is a bit of a big hammer since we don't get the benefits
// of shrink-wrapping whenever we emit this instruction. Considering
// this is used in any function where we emit a jump table, this may be
// a significant limitation. We should consider inserting this in the
// block where it is used and then commoning this sequence up if it
// appears in multiple places.
// Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
// MovePCtoLR8.
MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
}
}
return CurDAG->getRegister(GlobalBaseReg,
PPCLowering->getPointerTy(CurDAG->getDataLayout()))
.getNode();
}
// Check if a SDValue has the toc-data attribute.
static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) {
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);
if (!GA)
return false;
const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(GA->getGlobal());
if (!GV)
return false;
if (!GV->hasAttribute("toc-data"))
return false;
// TODO: These asserts should be updated as more support for the toc data
// transformation is added (struct support, etc.).
assert(
PointerSize >= GV->getAlign().valueOrOne().value() &&
"GlobalVariables with an alignment requirement stricter than TOC entry "
"size not supported by the toc data transformation.");
Type *GVType = GV->getValueType();
assert(GVType->isSized() && "A GlobalVariable's size must be known to be "
"supported by the toc data transformation.");
if (GVType->isVectorTy())
report_fatal_error("A GlobalVariable of Vector type is not currently "
"supported by the toc data transformation.");
if (GVType->isArrayTy())
report_fatal_error("A GlobalVariable of Array type is not currently "
"supported by the toc data transformation.");
if (GVType->isStructTy())
report_fatal_error("A GlobalVariable of Struct type is not currently "
"supported by the toc data transformation.");
assert(GVType->getPrimitiveSizeInBits() <= PointerSize * 8 &&
"A GlobalVariable with size larger than a TOC entry is not currently "
"supported by the toc data transformation.");
if (GV->hasLocalLinkage() || GV->hasPrivateLinkage())
report_fatal_error("A GlobalVariable with private or local linkage is not "
"currently supported by the toc data transformation.");
assert(!GV->hasCommonLinkage() &&
"Tentative definitions cannot have the mapping class XMC_TD.");
return true;
}
/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
/// operand. If so Imm will receive the 32-bit value.
static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
Imm = cast<ConstantSDNode>(N)->getZExtValue();
return true;
}
return false;
}
/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
/// operand. If so Imm will receive the 64-bit value.
static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
Imm = cast<ConstantSDNode>(N)->getZExtValue();
return true;
}
return false;
}
// isInt32Immediate - This method tests to see if a constant operand.
// If so Imm will receive the 32 bit value.
static bool isInt32Immediate(SDValue N, unsigned &Imm) {
return isInt32Immediate(N.getNode(), Imm);
}
/// isInt64Immediate - This method tests to see if the value is a 64-bit
/// constant operand. If so Imm will receive the 64-bit value.
static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
return isInt64Immediate(N.getNode(), Imm);
}
static unsigned getBranchHint(unsigned PCC,
const FunctionLoweringInfo &FuncInfo,
const SDValue &DestMBB) {
assert(isa<BasicBlockSDNode>(DestMBB));
if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
const Instruction *BBTerm = BB->getTerminator();
if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
const BasicBlock *TBB = BBTerm->getSuccessor(0);
const BasicBlock *FBB = BBTerm->getSuccessor(1);
auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
// We only want to handle cases which are easy to predict at static time, e.g.
// C++ throw statement, that is very likely not taken, or calling never
// returned function, e.g. stdlib exit(). So we set Threshold to filter
// unwanted cases.
//
// Below is LLVM branch weight table, we only want to handle case 1, 2
//
// Case Taken:Nontaken Example
// 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
// 2. Invoke-terminating 1:1048575
// 3. Coldblock 4:64 __builtin_expect
// 4. Loop Branch 124:4 For loop
// 5. PH/ZH/FPH 20:12
const uint32_t Threshold = 10000;
if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
return PPC::BR_NO_HINT;
LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
<< "::" << BB->getName() << "'\n"
<< " -> " << TBB->getName() << ": " << TProb << "\n"
<< " -> " << FBB->getName() << ": " << FProb << "\n");
const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
// If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
// because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
std::swap(TProb, FProb);
return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
}
// isOpcWithIntImmediate - This method tests to see if the node is a specific
// opcode and that it has a immediate integer right operand.
// If so Imm will receive the 32 bit value.
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
return N->getOpcode() == Opc
&& isInt32Immediate(N->getOperand(1).getNode(), Imm);
}
void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
SDLoc dl(SN);
int FI = cast<FrameIndexSDNode>(N)->getIndex();
SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
if (SN->hasOneUse())
CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
getSmallIPtrImm(Offset, dl));
else
ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
getSmallIPtrImm(Offset, dl)));
}
bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
bool isShiftMask, unsigned &SH,
unsigned &MB, unsigned &ME) {
// Don't even go down this path for i64, since different logic will be
// necessary for rldicl/rldicr/rldimi.
if (N->getValueType(0) != MVT::i32)
return false;
unsigned Shift = 32;
unsigned Indeterminant = ~0; // bit mask marking indeterminant results
unsigned Opcode = N->getOpcode();
if (N->getNumOperands() != 2 ||
!isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
return false;
if (Opcode == ISD::SHL) {
// apply shift left to mask if it comes first
if (isShiftMask) Mask = Mask << Shift;
// determine which bits are made indeterminant by shift
Indeterminant = ~(0xFFFFFFFFu << Shift);
} else if (Opcode == ISD::SRL) {
// apply shift right to mask if it comes first
if (isShiftMask) Mask = Mask >> Shift;
// determine which bits are made indeterminant by shift
Indeterminant = ~(0xFFFFFFFFu >> Shift);
// adjust for the left rotate
Shift = 32 - Shift;
} else if (Opcode == ISD::ROTL) {
Indeterminant = 0;
} else {
return false;
}
// if the mask doesn't intersect any Indeterminant bits
if (Mask && !(Mask & Indeterminant)) {
SH = Shift & 31;
// make sure the mask is still a mask (wrap arounds may not be)
return isRunOfOnes(Mask, MB, ME);
}
return false;
}
bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
SDValue Base = ST->getBasePtr();
if (Base.getOpcode() != PPCISD::ADD_TLS)
return false;
SDValue Offset = ST->getOffset();
if (!Offset.isUndef())
return false;
if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
return false;
SDLoc dl(ST);
EVT MemVT = ST->getMemoryVT();
EVT RegVT = ST->getValue().getValueType();
unsigned Opcode;
switch (MemVT.getSimpleVT().SimpleTy) {
default:
return false;
case MVT::i8: {
Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
break;
}
case MVT::i16: {
Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
break;
}
case MVT::i32: {
Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
break;
}
case MVT::i64: {
Opcode = PPC::STDXTLS;
break;
}
}
SDValue Chain = ST->getChain();
SDVTList VTs = ST->getVTList();
SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
Chain};
SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
transferMemOperands(ST, MN);
ReplaceNode(ST, MN);
return true;
}
bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
SDValue Base = LD->getBasePtr();
if (Base.getOpcode() != PPCISD::ADD_TLS)
return false;
SDValue Offset = LD->getOffset();
if (!Offset.isUndef())
return false;
if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
return false;
SDLoc dl(LD);
EVT MemVT = LD->getMemoryVT();
EVT RegVT = LD->getValueType(0);
unsigned Opcode;
switch (MemVT.getSimpleVT().SimpleTy) {
default:
return false;
case MVT::i8: {
Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
break;
}
case MVT::i16: {
Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
break;
}
case MVT::i32: {
Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
break;
}
case MVT::i64: {
Opcode = PPC::LDXTLS;
break;
}
}
SDValue Chain = LD->getChain();
SDVTList VTs = LD->getVTList();
SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
transferMemOperands(LD, MN);
ReplaceNode(LD, MN);
return true;
}
/// Turn an or of two masked values into the rotate left word immediate then
/// mask insert (rlwimi) instruction.
bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
SDLoc dl(N);
KnownBits LKnown = CurDAG->computeKnownBits(Op0);
KnownBits RKnown = CurDAG->computeKnownBits(Op1);
unsigned TargetMask = LKnown.Zero.getZExtValue();
unsigned InsertMask = RKnown.Zero.getZExtValue();
if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
unsigned Op0Opc = Op0.getOpcode();
unsigned Op1Opc = Op1.getOpcode();
unsigned Value, SH = 0;
TargetMask = ~TargetMask;
InsertMask = ~InsertMask;
// If the LHS has a foldable shift and the RHS does not, then swap it to the
// RHS so that we can fold the shift into the insert.
if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
Op0.getOperand(0).getOpcode() == ISD::SRL) {
if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
Op1.getOperand(0).getOpcode() != ISD::SRL) {
std::swap(Op0, Op1);
std::swap(Op0Opc, Op1Opc);
std::swap(TargetMask, InsertMask);
}
}
} else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
Op1.getOperand(0).getOpcode() != ISD::SRL) {
std::swap(Op0, Op1);
std::swap(Op0Opc, Op1Opc);
std::swap(TargetMask, InsertMask);
}
}
unsigned MB, ME;
if (isRunOfOnes(InsertMask, MB, ME)) {
if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
isInt32Immediate(Op1.getOperand(1), Value)) {
Op1 = Op1.getOperand(0);
SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
}
if (Op1Opc == ISD::AND) {
// The AND mask might not be a constant, and we need to make sure that
// if we're going to fold the masking with the insert, all bits not
// know to be zero in the mask are known to be one.
KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
unsigned SHOpc = Op1.getOperand(0).getOpcode();
if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
// Note that Value must be in range here (less than 32) because
// otherwise there would not be any bits set in InsertMask.
Op1 = Op1.getOperand(0).getOperand(0);
SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
}
}
SH &= 31;
SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
getI32Imm(ME, dl) };
ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
return true;
}
}
return false;
}
static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
unsigned MaxTruncation = 0;
// Cannot use range-based for loop here as we need the actual use (i.e. we
// need the operand number corresponding to the use). A range-based for
// will unbox the use and provide an SDNode*.
for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
Use != UseEnd; ++Use) {
unsigned Opc =
Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
switch (Opc) {
default: return 0;
case ISD::TRUNCATE:
if (Use->isMachineOpcode())
return 0;
MaxTruncation =
std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits());
continue;
case ISD::STORE: {
if (Use->isMachineOpcode())
return 0;
StoreSDNode *STN = cast<StoreSDNode>(*Use);
unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
if (MemVTSize == 64 || Use.getOperandNo() != 0)
return 0;
MaxTruncation = std::max(MaxTruncation, MemVTSize);
continue;
}
case PPC::STW8:
case PPC::STWX8:
case PPC::STWU8:
case PPC::STWUX8:
if (Use.getOperandNo() != 0)
return 0;
MaxTruncation = std::max(MaxTruncation, 32u);
continue;
case PPC::STH8:
case PPC::STHX8:
case PPC::STHU8:
case PPC::STHUX8:
if (Use.getOperandNo() != 0)
return 0;
MaxTruncation = std::max(MaxTruncation, 16u);
continue;
case PPC::STB8:
case PPC::STBX8:
case PPC::STBU8:
case PPC::STBUX8:
if (Use.getOperandNo() != 0)
return 0;
MaxTruncation = std::max(MaxTruncation, 8u);
continue;
}
}
return MaxTruncation;
}
// For any 32 < Num < 64, check if the Imm contains at least Num consecutive
// zeros and return the number of bits by the left of these consecutive zeros.
static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
unsigned HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm));
unsigned LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm));
if ((HiTZ + LoLZ) >= Num)
return (32 + HiTZ);
return 0;
}
// Direct materialization of 64-bit constants by enumerated patterns.
static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
uint64_t Imm, unsigned &InstCnt) {
unsigned TZ = countTrailingZeros<uint64_t>(Imm);
unsigned LZ = countLeadingZeros<uint64_t>(Imm);
unsigned TO = countTrailingOnes<uint64_t>(Imm);
unsigned LO = countLeadingOnes<uint64_t>(Imm);
unsigned Hi32 = Hi_32(Imm);
unsigned Lo32 = Lo_32(Imm);
SDNode *Result = nullptr;
unsigned Shift = 0;
auto getI32Imm = [CurDAG, dl](unsigned Imm) {
return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
};
// Following patterns use 1 instructions to materialize the Imm.
InstCnt = 1;
// 1-1) Patterns : {zeros}{15-bit valve}
// {ones}{15-bit valve}
if (isInt<16>(Imm)) {
SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
}
// 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
// {ones}{15-bit valve}{16 zeros}
if (TZ > 15 && (LZ > 32 || LO > 32))
return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
getI32Imm((Imm >> 16) & 0xffff));
// Following patterns use 2 instructions to materialize the Imm.
InstCnt = 2;
assert(LZ < 64 && "Unexpected leading zeros here.");
// Count of ones follwing the leading zeros.
unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ);
// 2-1) Patterns : {zeros}{31-bit value}
// {ones}{31-bit value}
if (isInt<32>(Imm)) {
uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
getI32Imm(Imm & 0xffff));
}
// 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
// {zeros}{15-bit value}{zeros}
// {zeros}{ones}{15-bit value}
// {ones}{15-bit value}{zeros}
// We can take advantage of LI's sign-extension semantics to generate leading
// ones, and then use RLDIC to mask off the ones in both sides after rotation.
if ((LZ + FO + TZ) > 48) {
Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
getI32Imm((Imm >> TZ) & 0xffff));
return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
getI32Imm(TZ), getI32Imm(LZ));
}
// 2-3) Pattern : {zeros}{15-bit value}{ones}
// Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
// therefore we can take advantage of LI's sign-extension semantics, and then
// mask them off after rotation.
//
// +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
// |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
// +------------------------+ +------------------------+
// 63 0 63 0
// Imm (Imm >> (48 - LZ) & 0xffff)
// +----sext-----|--16-bit--+ +clear-|-----------------+
// |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
// +------------------------+ +------------------------+
// 63 0 63 0
// LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
if ((LZ + TO) > 48) {
// Since the immediates with (LZ > 32) have been handled by previous
// patterns, here we have (LZ <= 32) to make sure we will not shift right
// the Imm by a negative value.