forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathX86ISelLowering.h
1783 lines (1429 loc) · 63.3 KB
/
X86ISelLowering.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that X86 uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetLowering.h"
namespace llvm {
class X86Subtarget;
class X86TargetMachine;
namespace X86ISD {
// X86 Specific DAG Nodes
enum NodeType : unsigned {
// Start the numbering where the builtin ops leave off.
FIRST_NUMBER = ISD::BUILTIN_OP_END,
/// Bit scan forward.
BSF,
/// Bit scan reverse.
BSR,
/// X86 funnel/double shift i16 instructions. These correspond to
/// X86::SHLDW and X86::SHRDW instructions which have different amt
/// modulo rules to generic funnel shifts.
/// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
FSHL,
FSHR,
/// Bitwise logical AND of floating point values. This corresponds
/// to X86::ANDPS or X86::ANDPD.
FAND,
/// Bitwise logical OR of floating point values. This corresponds
/// to X86::ORPS or X86::ORPD.
FOR,
/// Bitwise logical XOR of floating point values. This corresponds
/// to X86::XORPS or X86::XORPD.
FXOR,
/// Bitwise logical ANDNOT of floating point values. This
/// corresponds to X86::ANDNPS or X86::ANDNPD.
FANDN,
/// These operations represent an abstract X86 call
/// instruction, which includes a bunch of information. In particular the
/// operands of these node are:
///
/// #0 - The incoming token chain
/// #1 - The callee
/// #2 - The number of arg bytes the caller pushes on the stack.
/// #3 - The number of arg bytes the callee pops off the stack.
/// #4 - The value to pass in AL/AX/EAX (optional)
/// #5 - The value to pass in DL/DX/EDX (optional)
///
/// The result values of these nodes are:
///
/// #0 - The outgoing token chain
/// #1 - The first register result value (optional)
/// #2 - The second register result value (optional)
///
CALL,
/// Same as call except it adds the NoTrack prefix.
NT_CALL,
// Pseudo for a OBJC call that gets emitted together with a special
// marker instruction.
CALL_RVMARKER,
/// X86 compare and logical compare instructions.
CMP,
FCMP,
COMI,
UCOMI,
/// X86 bit-test instructions.
BT,
/// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
/// operand, usually produced by a CMP instruction.
SETCC,
/// X86 Select
SELECTS,
// Same as SETCC except it's materialized with a sbb and the value is all
// one's or all zero's.
SETCC_CARRY, // R = carry_bit ? ~0 : 0
/// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
/// Operands are two FP values to compare; result is a mask of
/// 0s or 1s. Generally DTRT for C/C++ with NaNs.
FSETCC,
/// X86 FP SETCC, similar to above, but with output as an i1 mask and
/// and a version with SAE.
FSETCCM,
FSETCCM_SAE,
/// X86 conditional moves. Operand 0 and operand 1 are the two values
/// to select from. Operand 2 is the condition code, and operand 3 is the
/// flag operand produced by a CMP or TEST instruction.
CMOV,
/// X86 conditional branches. Operand 0 is the chain operand, operand 1
/// is the block to branch if condition is true, operand 2 is the
/// condition code, and operand 3 is the flag operand produced by a CMP
/// or TEST instruction.
BRCOND,
/// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
/// operand 1 is the target address.
NT_BRIND,
/// Return with a flag operand. Operand 0 is the chain operand, operand
/// 1 is the number of bytes of stack to pop.
RET_FLAG,
/// Return from interrupt. Operand 0 is the number of bytes to pop.
IRET,
/// Repeat fill, corresponds to X86::REP_STOSx.
REP_STOS,
/// Repeat move, corresponds to X86::REP_MOVSx.
REP_MOVS,
/// On Darwin, this node represents the result of the popl
/// at function entry, used for PIC code.
GlobalBaseReg,
/// A wrapper node for TargetConstantPool, TargetJumpTable,
/// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
/// MCSymbol and TargetBlockAddress.
Wrapper,
/// Special wrapper used under X86-64 PIC mode for RIP
/// relative displacements.
WrapperRIP,
/// Copies a 64-bit value from an MMX vector to the low word
/// of an XMM vector, with the high word zero filled.
MOVQ2DQ,
/// Copies a 64-bit value from the low word of an XMM vector
/// to an MMX vector.
MOVDQ2Q,
/// Copies a 32-bit value from the low word of a MMX
/// vector to a GPR.
MMX_MOVD2W,
/// Copies a GPR into the low 32-bit word of a MMX vector
/// and zero out the high word.
MMX_MOVW2D,
/// Extract an 8-bit value from a vector and zero extend it to
/// i32, corresponds to X86::PEXTRB.
PEXTRB,
/// Extract a 16-bit value from a vector and zero extend it to
/// i32, corresponds to X86::PEXTRW.
PEXTRW,
/// Insert any element of a 4 x float vector into any element
/// of a destination 4 x floatvector.
INSERTPS,
/// Insert the lower 8-bits of a 32-bit value to a vector,
/// corresponds to X86::PINSRB.
PINSRB,
/// Insert the lower 16-bits of a 32-bit value to a vector,
/// corresponds to X86::PINSRW.
PINSRW,
/// Shuffle 16 8-bit values within a vector.
PSHUFB,
/// Compute Sum of Absolute Differences.
PSADBW,
/// Compute Double Block Packed Sum-Absolute-Differences
DBPSADBW,
/// Bitwise Logical AND NOT of Packed FP values.
ANDNP,
/// Blend where the selector is an immediate.
BLENDI,
/// Dynamic (non-constant condition) vector blend where only the sign bits
/// of the condition elements are used. This is used to enforce that the
/// condition mask is not valid for generic VSELECT optimizations. This
/// is also used to implement the intrinsics.
/// Operands are in VSELECT order: MASK, TRUE, FALSE
BLENDV,
/// Combined add and sub on an FP vector.
ADDSUB,
// FP vector ops with rounding mode.
FADD_RND,
FADDS,
FADDS_RND,
FSUB_RND,
FSUBS,
FSUBS_RND,
FMUL_RND,
FMULS,
FMULS_RND,
FDIV_RND,
FDIVS,
FDIVS_RND,
FMAX_SAE,
FMAXS_SAE,
FMIN_SAE,
FMINS_SAE,
FSQRT_RND,
FSQRTS,
FSQRTS_RND,
// FP vector get exponent.
FGETEXP,
FGETEXP_SAE,
FGETEXPS,
FGETEXPS_SAE,
// Extract Normalized Mantissas.
VGETMANT,
VGETMANT_SAE,
VGETMANTS,
VGETMANTS_SAE,
// FP Scale.
SCALEF,
SCALEF_RND,
SCALEFS,
SCALEFS_RND,
// Unsigned Integer average.
AVG,
/// Integer horizontal add/sub.
HADD,
HSUB,
/// Floating point horizontal add/sub.
FHADD,
FHSUB,
// Detect Conflicts Within a Vector
CONFLICT,
/// Floating point max and min.
FMAX,
FMIN,
/// Commutative FMIN and FMAX.
FMAXC,
FMINC,
/// Scalar intrinsic floating point max and min.
FMAXS,
FMINS,
/// Floating point reciprocal-sqrt and reciprocal approximation.
/// Note that these typically require refinement
/// in order to obtain suitable precision.
FRSQRT,
FRCP,
// AVX-512 reciprocal approximations with a little more precision.
RSQRT14,
RSQRT14S,
RCP14,
RCP14S,
// Thread Local Storage.
TLSADDR,
// Thread Local Storage. A call to get the start address
// of the TLS block for the current module.
TLSBASEADDR,
// Thread Local Storage. When calling to an OS provided
// thunk at the address from an earlier relocation.
TLSCALL,
// Exception Handling helpers.
EH_RETURN,
// SjLj exception handling setjmp.
EH_SJLJ_SETJMP,
// SjLj exception handling longjmp.
EH_SJLJ_LONGJMP,
// SjLj exception handling dispatch.
EH_SJLJ_SETUP_DISPATCH,
/// Tail call return. See X86TargetLowering::LowerCall for
/// the list of operands.
TC_RETURN,
// Vector move to low scalar and zero higher vector elements.
VZEXT_MOVL,
// Vector integer truncate.
VTRUNC,
// Vector integer truncate with unsigned/signed saturation.
VTRUNCUS,
VTRUNCS,
// Masked version of the above. Used when less than a 128-bit result is
// produced since the mask only applies to the lower elements and can't
// be represented by a select.
// SRC, PASSTHRU, MASK
VMTRUNC,
VMTRUNCUS,
VMTRUNCS,
// Vector FP extend.
VFPEXT,
VFPEXT_SAE,
VFPEXTS,
VFPEXTS_SAE,
// Vector FP round.
VFPROUND,
VFPROUND_RND,
VFPROUNDS,
VFPROUNDS_RND,
// Masked version of above. Used for v2f64->v4f32.
// SRC, PASSTHRU, MASK
VMFPROUND,
// 128-bit vector logical left / right shift
VSHLDQ,
VSRLDQ,
// Vector shift elements
VSHL,
VSRL,
VSRA,
// Vector variable shift
VSHLV,
VSRLV,
VSRAV,
// Vector shift elements by immediate
VSHLI,
VSRLI,
VSRAI,
// Shifts of mask registers.
KSHIFTL,
KSHIFTR,
// Bit rotate by immediate
VROTLI,
VROTRI,
// Vector packed double/float comparison.
CMPP,
// Vector integer comparisons.
PCMPEQ,
PCMPGT,
// v8i16 Horizontal minimum and position.
PHMINPOS,
MULTISHIFT,
/// Vector comparison generating mask bits for fp and
/// integer signed and unsigned data types.
CMPM,
// Vector mask comparison generating mask bits for FP values.
CMPMM,
// Vector mask comparison with SAE for FP values.
CMPMM_SAE,
// Arithmetic operations with FLAGS results.
ADD,
SUB,
ADC,
SBB,
SMUL,
UMUL,
OR,
XOR,
AND,
// Bit field extract.
BEXTR,
BEXTRI,
// Zero High Bits Starting with Specified Bit Position.
BZHI,
// Parallel extract and deposit.
PDEP,
PEXT,
// X86-specific multiply by immediate.
MUL_IMM,
// Vector sign bit extraction.
MOVMSK,
// Vector bitwise comparisons.
PTEST,
// Vector packed fp sign bitwise comparisons.
TESTP,
// OR/AND test for masks.
KORTEST,
KTEST,
// ADD for masks.
KADD,
// Several flavors of instructions with vector shuffle behaviors.
// Saturated signed/unnsigned packing.
PACKSS,
PACKUS,
// Intra-lane alignr.
PALIGNR,
// AVX512 inter-lane alignr.
VALIGN,
PSHUFD,
PSHUFHW,
PSHUFLW,
SHUFP,
// VBMI2 Concat & Shift.
VSHLD,
VSHRD,
VSHLDV,
VSHRDV,
// Shuffle Packed Values at 128-bit granularity.
SHUF128,
MOVDDUP,
MOVSHDUP,
MOVSLDUP,
MOVLHPS,
MOVHLPS,
MOVSD,
MOVSS,
MOVSH,
UNPCKL,
UNPCKH,
VPERMILPV,
VPERMILPI,
VPERMI,
VPERM2X128,
// Variable Permute (VPERM).
// Res = VPERMV MaskV, V0
VPERMV,
// 3-op Variable Permute (VPERMT2).
// Res = VPERMV3 V0, MaskV, V1
VPERMV3,
// Bitwise ternary logic.
VPTERNLOG,
// Fix Up Special Packed Float32/64 values.
VFIXUPIMM,
VFIXUPIMM_SAE,
VFIXUPIMMS,
VFIXUPIMMS_SAE,
// Range Restriction Calculation For Packed Pairs of Float32/64 values.
VRANGE,
VRANGE_SAE,
VRANGES,
VRANGES_SAE,
// Reduce - Perform Reduction Transformation on scalar\packed FP.
VREDUCE,
VREDUCE_SAE,
VREDUCES,
VREDUCES_SAE,
// RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
// Also used by the legacy (V)ROUND intrinsics where we mask out the
// scaling part of the immediate.
VRNDSCALE,
VRNDSCALE_SAE,
VRNDSCALES,
VRNDSCALES_SAE,
// Tests Types Of a FP Values for packed types.
VFPCLASS,
// Tests Types Of a FP Values for scalar types.
VFPCLASSS,
// Broadcast (splat) scalar or element 0 of a vector. If the operand is
// a vector, this node may change the vector length as part of the splat.
VBROADCAST,
// Broadcast mask to vector.
VBROADCASTM,
/// SSE4A Extraction and Insertion.
EXTRQI,
INSERTQI,
// XOP arithmetic/logical shifts.
VPSHA,
VPSHL,
// XOP signed/unsigned integer comparisons.
VPCOM,
VPCOMU,
// XOP packed permute bytes.
VPPERM,
// XOP two source permutation.
VPERMIL2,
// Vector multiply packed unsigned doubleword integers.
PMULUDQ,
// Vector multiply packed signed doubleword integers.
PMULDQ,
// Vector Multiply Packed UnsignedIntegers with Round and Scale.
MULHRS,
// Multiply and Add Packed Integers.
VPMADDUBSW,
VPMADDWD,
// AVX512IFMA multiply and add.
// NOTE: These are different than the instruction and perform
// op0 x op1 + op2.
VPMADD52L,
VPMADD52H,
// VNNI
VPDPBUSD,
VPDPBUSDS,
VPDPWSSD,
VPDPWSSDS,
// FMA nodes.
// We use the target independent ISD::FMA for the non-inverted case.
FNMADD,
FMSUB,
FNMSUB,
FMADDSUB,
FMSUBADD,
// FMA with rounding mode.
FMADD_RND,
FNMADD_RND,
FMSUB_RND,
FNMSUB_RND,
FMADDSUB_RND,
FMSUBADD_RND,
// AVX512-FP16 complex addition and multiplication.
VFMADDC,
VFMADDC_RND,
VFCMADDC,
VFCMADDC_RND,
VFMULC,
VFMULC_RND,
VFCMULC,
VFCMULC_RND,
VFMADDCSH,
VFMADDCSH_RND,
VFCMADDCSH,
VFCMADDCSH_RND,
VFMULCSH,
VFMULCSH_RND,
VFCMULCSH,
VFCMULCSH_RND,
// Compress and expand.
COMPRESS,
EXPAND,
// Bits shuffle
VPSHUFBITQMB,
// Convert Unsigned/Integer to Floating-Point Value with rounding mode.
SINT_TO_FP_RND,
UINT_TO_FP_RND,
SCALAR_SINT_TO_FP,
SCALAR_UINT_TO_FP,
SCALAR_SINT_TO_FP_RND,
SCALAR_UINT_TO_FP_RND,
// Vector float/double to signed/unsigned integer.
CVTP2SI,
CVTP2UI,
CVTP2SI_RND,
CVTP2UI_RND,
// Scalar float/double to signed/unsigned integer.
CVTS2SI,
CVTS2UI,
CVTS2SI_RND,
CVTS2UI_RND,
// Vector float/double to signed/unsigned integer with truncation.
CVTTP2SI,
CVTTP2UI,
CVTTP2SI_SAE,
CVTTP2UI_SAE,
// Scalar float/double to signed/unsigned integer with truncation.
CVTTS2SI,
CVTTS2UI,
CVTTS2SI_SAE,
CVTTS2UI_SAE,
// Vector signed/unsigned integer to float/double.
CVTSI2P,
CVTUI2P,
// Masked versions of above. Used for v2f64->v4f32.
// SRC, PASSTHRU, MASK
MCVTP2SI,
MCVTP2UI,
MCVTTP2SI,
MCVTTP2UI,
MCVTSI2P,
MCVTUI2P,
// Vector float to bfloat16.
// Convert TWO packed single data to one packed BF16 data
CVTNE2PS2BF16,
// Convert packed single data to packed BF16 data
CVTNEPS2BF16,
// Masked version of above.
// SRC, PASSTHRU, MASK
MCVTNEPS2BF16,
// Dot product of BF16 pairs to accumulated into
// packed single precision.
DPBF16PS,
// A stack checking function call. On Windows it's _chkstk call.
DYN_ALLOCA,
// For allocating variable amounts of stack space when using
// segmented stacks. Check if the current stacklet has enough space, and
// falls back to heap allocation if not.
SEG_ALLOCA,
// For allocating stack space when using stack clash protector.
// Allocation is performed by block, and each block is probed.
PROBED_ALLOCA,
// Memory barriers.
MEMBARRIER,
MFENCE,
// Get a random integer and indicate whether it is valid in CF.
RDRAND,
// Get a NIST SP800-90B & C compliant random integer and
// indicate whether it is valid in CF.
RDSEED,
// Protection keys
// RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
// WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
// value for ECX.
RDPKRU,
WRPKRU,
// SSE42 string comparisons.
// These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
// will emit one or two instructions based on which results are used. If
// flags and index/mask this allows us to use a single instruction since
// we won't have to pick and opcode for flags. Instead we can rely on the
// DAG to CSE everything and decide at isel.
PCMPISTR,
PCMPESTR,
// Test if in transactional execution.
XTEST,
// ERI instructions.
RSQRT28,
RSQRT28_SAE,
RSQRT28S,
RSQRT28S_SAE,
RCP28,
RCP28_SAE,
RCP28S,
RCP28S_SAE,
EXP2,
EXP2_SAE,
// Conversions between float and half-float.
CVTPS2PH,
CVTPH2PS,
CVTPH2PS_SAE,
// Masked version of above.
// SRC, RND, PASSTHRU, MASK
MCVTPS2PH,
// Galois Field Arithmetic Instructions
GF2P8AFFINEINVQB,
GF2P8AFFINEQB,
GF2P8MULB,
// LWP insert record.
LWPINS,
// User level wait
UMWAIT,
TPAUSE,
// Enqueue Stores Instructions
ENQCMD,
ENQCMDS,
// For avx512-vp2intersect
VP2INTERSECT,
// User level interrupts - testui
TESTUI,
/// X86 strict FP compare instructions.
STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
STRICT_FCMPS,
// Vector packed double/float comparison.
STRICT_CMPP,
/// Vector comparison generating mask bits for fp and
/// integer signed and unsigned data types.
STRICT_CMPM,
// Vector float/double to signed/unsigned integer with truncation.
STRICT_CVTTP2SI,
STRICT_CVTTP2UI,
// Vector FP extend.
STRICT_VFPEXT,
// Vector FP round.
STRICT_VFPROUND,
// RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
// Also used by the legacy (V)ROUND intrinsics where we mask out the
// scaling part of the immediate.
STRICT_VRNDSCALE,
// Vector signed/unsigned integer to float/double.
STRICT_CVTSI2P,
STRICT_CVTUI2P,
// Strict FMA nodes.
STRICT_FNMADD,
STRICT_FMSUB,
STRICT_FNMSUB,
// Conversions between float and half-float.
STRICT_CVTPS2PH,
STRICT_CVTPH2PS,
// WARNING: Only add nodes here if they are stric FP nodes. Non-memory and
// non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
// Compare and swap.
LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
LCMPXCHG8_DAG,
LCMPXCHG16_DAG,
LCMPXCHG16_SAVE_RBX_DAG,
/// LOCK-prefixed arithmetic read-modify-write instructions.
/// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
LADD,
LSUB,
LOR,
LXOR,
LAND,
// Load, scalar_to_vector, and zero extend.
VZEXT_LOAD,
// extract_vector_elt, store.
VEXTRACT_STORE,
// scalar broadcast from memory.
VBROADCAST_LOAD,
// subvector broadcast from memory.
SUBV_BROADCAST_LOAD,
// Store FP control word into i16 memory.
FNSTCW16m,
// Load FP control word from i16 memory.
FLDCW16m,
/// This instruction implements FP_TO_SINT with the
/// integer destination in memory and a FP reg source. This corresponds
/// to the X86::FIST*m instructions and the rounding mode change stuff. It
/// has two inputs (token chain and address) and two outputs (int value
/// and token chain). Memory VT specifies the type to store to.
FP_TO_INT_IN_MEM,
/// This instruction implements SINT_TO_FP with the
/// integer source in memory and FP reg result. This corresponds to the
/// X86::FILD*m instructions. It has two inputs (token chain and address)
/// and two outputs (FP value and token chain). The integer source type is
/// specified by the memory VT.
FILD,
/// This instruction implements a fp->int store from FP stack
/// slots. This corresponds to the fist instruction. It takes a
/// chain operand, value to store, address, and glue. The memory VT
/// specifies the type to store as.
FIST,
/// This instruction implements an extending load to FP stack slots.
/// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
/// operand, and ptr to load from. The memory VT specifies the type to
/// load from.
FLD,
/// This instruction implements a truncating store from FP stack
/// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
/// chain operand, value to store, address, and glue. The memory VT
/// specifies the type to store as.
FST,
/// These instructions grab the address of the next argument
/// from a va_list. (reads and modifies the va_list in memory)
VAARG_64,
VAARG_X32,
// Vector truncating store with unsigned/signed saturation
VTRUNCSTOREUS,
VTRUNCSTORES,
// Vector truncating masked store with unsigned/signed saturation
VMTRUNCSTOREUS,
VMTRUNCSTORES,
// X86 specific gather and scatter
MGATHER,
MSCATTER,
// Key locker nodes that produce flags.
AESENC128KL,
AESDEC128KL,
AESENC256KL,
AESDEC256KL,
AESENCWIDE128KL,
AESDECWIDE128KL,
AESENCWIDE256KL,
AESDECWIDE256KL,
// Save xmm argument registers to the stack, according to %al. An operator
// is needed so that this can be expanded with control flow.
VASTART_SAVE_XMM_REGS,
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
// opcodes will be thought as target memory ops!
};
} // end namespace X86ISD
namespace X86 {
/// Current rounding mode is represented in bits 11:10 of FPSR. These
/// values are same as corresponding constants for rounding mode used
/// in glibc.
enum RoundingMode {
rmToNearest = 0, // FE_TONEAREST
rmDownward = 1 << 10, // FE_DOWNWARD
rmUpward = 2 << 10, // FE_UPWARD
rmTowardZero = 3 << 10, // FE_TOWARDZERO
rmMask = 3 << 10 // Bit mask selecting rounding mode
};
}
/// Define some predicates that are used for node matching.
namespace X86 {
/// Returns true if Elt is a constant zero or floating point constant +0.0.
bool isZeroNode(SDValue Elt);
/// Returns true of the given offset can be
/// fit into displacement field of the instruction.
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
bool hasSymbolicDisplacement);
/// Determines whether the callee is required to pop its
/// own arguments. Callee pop is necessary to support tail calls.
bool isCalleePop(CallingConv::ID CallingConv,
bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
/// If Op is a constant whose elements are all the same constant or
/// undefined, return true and return the constant value in \p SplatVal.
/// If we have undef bits that don't cover an entire element, we treat these
/// as zero if AllowPartialUndefs is set, else we fail and return false.
bool isConstantSplat(SDValue Op, APInt &SplatVal,
bool AllowPartialUndefs = true);
/// Check if Op is a load operation that could be folded into some other x86
/// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
bool AssumeSingleUse = false);
/// Check if Op is a load operation that could be folded into a vector splat
/// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
const X86Subtarget &Subtarget,
bool AssumeSingleUse = false);
/// Check if Op is a value that could be used to fold a store into some
/// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
bool mayFoldIntoStore(SDValue Op);
/// Check if Op is an operation that could be folded into a zero extend x86
/// instruction.
bool mayFoldIntoZeroExtend(SDValue Op);
} // end namespace X86
//===--------------------------------------------------------------------===//
// X86 Implementation of the TargetLowering interface
class X86TargetLowering final : public TargetLowering {
public:
explicit X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI);
unsigned getJumpTableEncoding() const override;
bool useSoftFloat() const override;
void markLibCallAttributes(MachineFunction *MF, unsigned CC,
ArgListTy &Args) const override;
MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
return MVT::i8;
}
const MCExpr *
LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB, unsigned uid,
MCContext &Ctx) const override;
/// Returns relocation base for the given PIC jumptable.
SDValue getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const override;
const MCExpr *
getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
unsigned JTI, MCContext &Ctx) const override;
/// Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. For X86, aggregates
/// that contains are placed at 16-byte boundaries while the rest are at
/// 4-byte boundaries.
uint64_t getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const override;
EVT getOptimalMemOpType(const MemOp &Op,
const AttributeList &FuncAttributes) const override;
/// Returns true if it's safe to use load / store of the
/// specified type to expand memcpy / memset inline. This is mostly true
/// for all types except for some special cases. For example, on X86
/// targets without SSE2 f64 load / store are done with fldl / fstpl which
/// also does type conversion. Note the specified type doesn't have to be
/// legal as the hook is used before type legalization.
bool isSafeMemOpType(MVT VT) const override;
/// Returns true if the target allows unaligned memory accesses of the
/// specified type. Returns whether it is "fast" in the last argument.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
MachineMemOperand::Flags Flags,
bool *Fast) const override;
/// Provide custom lowering hooks for some operations.
///
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
/// Replace the results of node with an illegal result
/// type with new values built out of custom code.
///
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
/// Return true if the target has native support for