Skip to content
This repository was archived by the owner on Nov 1, 2021. It is now read-only.

Commit 4fa1f77

Browse files
committed
[X86][AVX512] Converted the VPERMPD/VPERMQ intrinsics to generic IR
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@274502 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent a2bfa04 commit 4fa1f77

File tree

4 files changed

+161
-138
lines changed

4 files changed

+161
-138
lines changed

Diff for: lib/Headers/avx512fintrin.h

+41-27
Original file line numberDiff line numberDiff line change
@@ -8678,35 +8678,49 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
86788678
-(__v2df)(__m128d)(Y), \
86798679
(__mmask8)(U), (int)(R)); })
86808680

8681-
#define _mm512_permutex_pd(X, M) __extension__ ({ \
8682-
(__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \
8683-
(__v8df)_mm512_undefined_pd(), \
8684-
(__mmask8)-1); })
8685-
8686-
#define _mm512_mask_permutex_pd(W, U, X, M) __extension__ ({ \
8687-
(__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \
8688-
(__v8df)(__m512d)(W), \
8689-
(__mmask8)(U)); })
8690-
8691-
#define _mm512_maskz_permutex_pd(U, X, M) __extension__ ({ \
8692-
(__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \
8693-
(__v8df)_mm512_setzero_pd(), \
8694-
(__mmask8)(U)); })
8695-
8696-
#define _mm512_permutex_epi64(X, I) __extension__ ({ \
8697-
(__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \
8698-
(__v8di)_mm512_undefined_epi32(), \
8699-
(__mmask8)-1); })
8681+
#define _mm512_permutex_pd(X, C) __extension__ ({ \
8682+
(__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
8683+
(__v8df)_mm512_undefined_pd(), \
8684+
0 + (((C) & 0x03) >> 0), \
8685+
0 + (((C) & 0x0c) >> 2), \
8686+
0 + (((C) & 0x30) >> 4), \
8687+
0 + (((C) & 0xc0) >> 6), \
8688+
4 + (((C) & 0x03) >> 0), \
8689+
4 + (((C) & 0x0c) >> 2), \
8690+
4 + (((C) & 0x30) >> 4), \
8691+
4 + (((C) & 0xc0) >> 6)); })
8692+
8693+
#define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8694+
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8695+
(__v8df)_mm512_permutex_pd((X), (C)), \
8696+
(__v8df)(__m512d)(W)); })
87008697

8701-
#define _mm512_mask_permutex_epi64(W, M, X, I) __extension__ ({ \
8702-
(__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \
8703-
(__v8di)(__m512i)(W), \
8704-
(__mmask8)(M)); })
8698+
#define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
8699+
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8700+
(__v8df)_mm512_permutex_pd((X), (C)), \
8701+
(__v8df)_mm512_setzero_pd()); })
87058702

8706-
#define _mm512_maskz_permutex_epi64(M, X, I) __extension__ ({ \
8707-
(__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \
8708-
(__v8di)_mm512_setzero_si512(), \
8709-
(__mmask8)(M)); })
8703+
#define _mm512_permutex_epi64(X, C) __extension__ ({ \
8704+
(__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
8705+
(__v8di)_mm512_undefined_epi32(), \
8706+
0 + (((C) & 0x03) >> 0), \
8707+
0 + (((C) & 0x0c) >> 2), \
8708+
0 + (((C) & 0x30) >> 4), \
8709+
0 + (((C) & 0xc0) >> 6), \
8710+
4 + (((C) & 0x03) >> 0), \
8711+
4 + (((C) & 0x0c) >> 2), \
8712+
4 + (((C) & 0x30) >> 4), \
8713+
4 + (((C) & 0xc0) >> 6)); })
8714+
8715+
#define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8716+
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8717+
(__v8di)_mm512_permutex_epi64((X), (C)), \
8718+
(__v8di)(__m512i)(W)); })
8719+
8720+
#define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8721+
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8722+
(__v8di)_mm512_permutex_epi64((X), (C)), \
8723+
(__v8di)_mm512_setzero_si512()); })
87108724

87118725
static __inline__ __m512d __DEFAULT_FN_ATTRS
87128726
_mm512_permutexvar_pd (__m512i __X, __m512d __Y)

Diff for: lib/Headers/avx512vlintrin.h

+28-26
Original file line numberDiff line numberDiff line change
@@ -8806,35 +8806,37 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
88068806
(__v8si)(__m256i)(index), \
88078807
(__mmask8)(mask), (int)(scale)); })
88088808

8809-
#define _mm256_mask_permutex_pd(W, U, X, imm) __extension__ ({ \
8810-
(__m256d)__builtin_ia32_permdf256_mask((__v4df)(__m256d)(X), (int)(imm), \
8811-
(__v4df)(__m256d)(W), \
8812-
(__mmask8)(U)); })
8813-
8814-
#define _mm256_maskz_permutex_pd(U, X, imm) __extension__ ({ \
8815-
(__m256d)__builtin_ia32_permdf256_mask((__v4df)(__m256d)(X), (int)(imm), \
8816-
(__v4df)_mm256_setzero_pd(), \
8817-
(__mmask8)(U)); })
8818-
8819-
#define _mm256_permutex_pd(X, M) __extension__ ({ \
8820-
(__m256d)__builtin_ia32_permdf256_mask((__v4df)(__m256d)(X), (int)(M), \
8821-
(__v4df)_mm256_undefined_pd(), \
8822-
(__mmask8)-1); })
8809+
#define _mm256_permutex_pd(X, C) __extension__ ({ \
8810+
(__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \
8811+
(__v4df)_mm256_undefined_pd(), \
8812+
(C) & 0x3, ((C) & 0xc) >> 2, \
8813+
((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); })
88238814

8824-
#define _mm256_mask_permutex_epi64(W, M, X, I) __extension__ ({ \
8825-
(__m256i)__builtin_ia32_permdi256_mask((__v4di)(__m256i)(X), (int)(I), \
8826-
(__v4di)(__m256i)(W), \
8827-
(__mmask8)(M)); })
8815+
#define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8816+
(__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8817+
(__v4df)_mm256_permutex_pd((X), (C)), \
8818+
(__v4df)(__m256d)(W)); })
88288819

8829-
#define _mm256_maskz_permutex_epi64(M, X, I) __extension__ ({ \
8830-
(__m256i)__builtin_ia32_permdi256_mask((__v4di)(__m256i)(X), (int)(I), \
8831-
(__v4di)_mm256_setzero_si256(), \
8832-
(__mmask8)(M)); })
8820+
#define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \
8821+
(__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8822+
(__v4df)_mm256_permutex_pd((X), (C)), \
8823+
(__v4df)_mm256_setzero_pd()); })
88338824

8834-
#define _mm256_permutex_epi64(X, I) __extension__ ({ \
8835-
(__m256i)__builtin_ia32_permdi256_mask((__v4di)(__m256i)(X), (int)(I), \
8836-
(__v4di)_mm256_undefined_si256(), \
8837-
(__mmask8)-1); })
8825+
#define _mm256_permutex_epi64(X, C) __extension__ ({ \
8826+
(__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \
8827+
(__v4di)_mm256_undefined_si256(), \
8828+
(C) & 0x3, ((C) & 0xc) >> 2, \
8829+
((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); })
8830+
8831+
#define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8832+
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8833+
(__v4di)_mm256_permutex_epi64((X), (C)), \
8834+
(__v4di)(__m256i)(W)); })
8835+
8836+
#define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8837+
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8838+
(__v4di)_mm256_permutex_epi64((X), (C)), \
8839+
(__v4di)_mm256_setzero_si256()); })
88388840

88398841
static __inline__ __m256d __DEFAULT_FN_ATTRS
88408842
_mm256_permutexvar_pd (__m256i __X, __m256d __Y)

Diff for: test/CodeGen/avx512f-builtins.c

+16-12
Original file line numberDiff line numberDiff line change
@@ -5704,38 +5704,42 @@ __m128d test_mm_mask3_fnmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __m
57045704

57055705
__m512d test_mm512_permutex_pd(__m512d __X) {
57065706
// CHECK-LABEL: @test_mm512_permutex_pd
5707-
// CHECK: @llvm.x86.avx512.mask.perm.df.512
5708-
return _mm512_permutex_pd(__X, 0);
5707+
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
5708+
return _mm512_permutex_pd(__X, 0);
57095709
}
57105710

57115711
__m512d test_mm512_mask_permutex_pd(__m512d __W, __mmask8 __U, __m512d __X) {
57125712
// CHECK-LABEL: @test_mm512_mask_permutex_pd
5713-
// CHECK: @llvm.x86.avx512.mask.perm.df.512
5714-
return _mm512_mask_permutex_pd(__W, __U, __X, 0);
5713+
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
5714+
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
5715+
return _mm512_mask_permutex_pd(__W, __U, __X, 0);
57155716
}
57165717

57175718
__m512d test_mm512_maskz_permutex_pd(__mmask8 __U, __m512d __X) {
57185719
// CHECK-LABEL: @test_mm512_maskz_permutex_pd
5719-
// CHECK: @llvm.x86.avx512.mask.perm.df.512
5720-
return _mm512_maskz_permutex_pd(__U, __X, 0);
5720+
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
5721+
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
5722+
return _mm512_maskz_permutex_pd(__U, __X, 0);
57215723
}
57225724

57235725
__m512i test_mm512_permutex_epi64(__m512i __X) {
57245726
// CHECK-LABEL: @test_mm512_permutex_epi64
5725-
// CHECK: @llvm.x86.avx512.mask.perm.di.512
5726-
return _mm512_permutex_epi64(__X, 0);
5727+
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
5728+
return _mm512_permutex_epi64(__X, 0);
57275729
}
57285730

57295731
__m512i test_mm512_mask_permutex_epi64(__m512i __W, __mmask8 __M, __m512i __X) {
57305732
// CHECK-LABEL: @test_mm512_mask_permutex_epi64
5731-
// CHECK: @llvm.x86.avx512.mask.perm.di.512
5732-
return _mm512_mask_permutex_epi64(__W, __M, __X, 0);
5733+
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
5734+
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
5735+
return _mm512_mask_permutex_epi64(__W, __M, __X, 0);
57335736
}
57345737

57355738
__m512i test_mm512_maskz_permutex_epi64(__mmask8 __M, __m512i __X) {
57365739
// CHECK-LABEL: @test_mm512_maskz_permutex_epi64
5737-
// CHECK: @llvm.x86.avx512.mask.perm.di.512
5738-
return _mm512_maskz_permutex_epi64(__M, __X, 0);
5740+
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
5741+
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
5742+
return _mm512_maskz_permutex_epi64(__M, __X, 0);
57395743
}
57405744

57415745
__m512d test_mm512_permutexvar_pd(__m512i __X, __m512d __Y) {

0 commit comments

Comments
 (0)