14
14
let Predicates = [FeatureVector] in {
15
15
// Register move.
16
16
def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>;
17
+ def VLR32 : UnaryAliasVRR<null_frag, v32eb, v32eb>;
18
+ def VLR64 : UnaryAliasVRR<null_frag, v64db, v64db>;
17
19
18
20
// Load GR from VR element.
19
21
def VLGVB : BinaryVRSc<"vlgvb", 0xE721, null_frag, v128b, 0>;
@@ -123,6 +125,13 @@ let Predicates = [FeatureVector] in {
123
125
def : Pat<(v2f64 (z_replicate_loadf64 bdxaddr12only:$addr)),
124
126
(VLREPG bdxaddr12only:$addr)>;
125
127
128
+ // Use VLREP to load subvectors. These patterns use "12pair" because
129
+ // LEY and LDY offer full 20-bit displacement fields. It's often better
130
+ // to use those instructions rather than force a 20-bit displacement
131
+ // into a GPR temporary.
132
+ def VL32 : UnaryAliasVRX<load, v32eb, bdxaddr12pair>;
133
+ def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>;
134
+
126
135
// Load logical element and zero.
127
136
def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8, v128b, 1, 0>;
128
137
def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>;
@@ -193,6 +202,13 @@ let Predicates = [FeatureVector] in {
193
202
imm32zx1:$index),
194
203
(VSTEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>;
195
204
205
+ // Use VSTE to store subvectors. These patterns use "12pair" because
206
+ // STEY and STDY offer full 20-bit displacement fields. It's often better
207
+ // to use those instructions rather than force a 20-bit displacement
208
+ // into a GPR temporary.
209
+ def VST32 : StoreAliasVRX<store, v32eb, bdxaddr12pair>;
210
+ def VST64 : StoreAliasVRX<store, v64db, bdxaddr12pair>;
211
+
196
212
// Scatter element.
197
213
def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>;
198
214
def VSCEG : StoreBinaryVRV<"vsceg", 0xE71A, 8, imm32zx1>;
@@ -778,7 +794,7 @@ multiclass VectorRounding<Instruction insn, TypedReg tr> {
778
794
let Predicates = [FeatureVector] in {
779
795
// Add.
780
796
def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
781
- def WFADB : BinaryVRRc<"wfadb", 0xE7E3, null_frag , v64db, v64db, 3, 8>;
797
+ def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd , v64db, v64db, 3, 8>;
782
798
783
799
// Convert from fixed 64-bit.
784
800
def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
@@ -804,53 +820,55 @@ let Predicates = [FeatureVector] in {
804
820
805
821
// Divide.
806
822
def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
807
- def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, null_frag , v64db, v64db, 3, 8>;
823
+ def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv , v64db, v64db, 3, 8>;
808
824
809
825
// Load FP integer.
810
826
def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, null_frag, v128db, v128db, 3, 0>;
811
827
def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
812
828
defm : VectorRounding<VFIDB, v128db>;
829
+ defm : VectorRounding<WFIDB, v64db>;
813
830
814
831
// Load lengthened.
815
832
def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>;
816
- def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, null_frag , v64db, v32eb, 2, 8>;
833
+ def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fextend , v64db, v32eb, 2, 8>;
817
834
818
835
// Load rounded,
819
836
def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>;
820
837
def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>;
821
838
def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
839
+ def : FPConversion<WLEDB, fround, v32eb, v64db, 0, 0>;
822
840
823
841
// Multiply.
824
842
def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
825
- def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, null_frag , v64db, v64db, 3, 8>;
843
+ def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul , v64db, v64db, 3, 8>;
826
844
827
845
// Multiply and add.
828
846
def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
829
- def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, null_frag , v64db, v64db, 8, 3>;
847
+ def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma , v64db, v64db, 8, 3>;
830
848
831
849
// Multiply and subtract.
832
850
def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
833
- def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, null_frag , v64db, v64db, 8, 3>;
851
+ def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms , v64db, v64db, 8, 3>;
834
852
835
853
// Load complement,
836
854
def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>;
837
- def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, null_frag , v64db, v64db, 3, 8, 0>;
855
+ def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg , v64db, v64db, 3, 8, 0>;
838
856
839
857
// Load negative.
840
858
def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>;
841
- def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, null_frag , v64db, v64db, 3, 8, 1>;
859
+ def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs , v64db, v64db, 3, 8, 1>;
842
860
843
861
// Load positive.
844
862
def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>;
845
- def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, null_frag , v64db, v64db, 3, 8, 2>;
863
+ def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs , v64db, v64db, 3, 8, 2>;
846
864
847
865
// Square root.
848
866
def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
849
- def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, null_frag , v64db, v64db, 3, 8>;
867
+ def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt , v64db, v64db, 3, 8>;
850
868
851
869
// Subtract.
852
870
def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
853
- def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, null_frag , v64db, v64db, 3, 8>;
871
+ def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub , v64db, v64db, 3, 8>;
854
872
855
873
// Test data class immediate.
856
874
let Defs = [CC] in {
@@ -866,7 +884,7 @@ let Predicates = [FeatureVector] in {
866
884
let Predicates = [FeatureVector] in {
867
885
// Compare scalar.
868
886
let Defs = [CC] in
869
- def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, null_frag , v64db, 3>;
887
+ def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp , v64db, 3>;
870
888
871
889
// Compare and signal scalar.
872
890
let Defs = [CC] in
0 commit comments