Skip to content
This repository was archived by the owner on Nov 1, 2021. It is now read-only.

Commit d073eee

Browse files
committed
[X86] Implement kand/kandn/kor/kxor/kxnor/knot intrinsics using native IR.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@320919 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 4a7d78a commit d073eee

File tree

2 files changed

+74
-16
lines changed

2 files changed

+74
-16
lines changed

lib/CodeGen/CGBuiltin.cpp

+29
Original file line numberDiff line numberDiff line change
@@ -7564,6 +7564,19 @@ static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
75647564
return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
75657565
}
75667566

7567+
static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
7568+
unsigned NumElts, SmallVectorImpl<Value *> &Ops,
7569+
bool InvertLHS = false) {
7570+
Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
7571+
Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
7572+
7573+
if (InvertLHS)
7574+
LHS = CGF.Builder.CreateNot(LHS);
7575+
7576+
return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
7577+
CGF.Builder.getIntNTy(std::max(NumElts, 8U)));
7578+
}
7579+
75677580
static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
75687581
SmallVectorImpl<Value *> &Ops,
75697582
llvm::Type *DstTy,
@@ -8217,6 +8230,22 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
82178230
return EmitX86MaskedCompare(*this, CC, false, Ops);
82188231
}
82198232

8233+
case X86::BI__builtin_ia32_kandhi:
8234+
return EmitX86MaskLogic(*this, Instruction::And, 16, Ops);
8235+
case X86::BI__builtin_ia32_kandnhi:
8236+
return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true);
8237+
case X86::BI__builtin_ia32_korhi:
8238+
return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
8239+
case X86::BI__builtin_ia32_kxnorhi:
8240+
return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true);
8241+
case X86::BI__builtin_ia32_kxorhi:
8242+
return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops);
8243+
case X86::BI__builtin_ia32_knothi: {
8244+
Ops[0] = getMaskVecValue(*this, Ops[0], 16);
8245+
return Builder.CreateBitCast(Builder.CreateNot(Ops[0]),
8246+
Builder.getInt16Ty());
8247+
}
8248+
82208249
case X86::BI__builtin_ia32_vplzcntd_128_mask:
82218250
case X86::BI__builtin_ia32_vplzcntd_256_mask:
82228251
case X86::BI__builtin_ia32_vplzcntd_512_mask:

test/CodeGen/avx512f-builtins.c

+45-16
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,9 @@ __m512d test_mm512_set1_pd(double d)
385385
__mmask16 test_mm512_knot(__mmask16 a)
386386
{
387387
// CHECK-LABEL: @test_mm512_knot
388-
// CHECK: @llvm.x86.avx512.knot.w
388+
// CHECK: [[IN:%.*]] = bitcast i16 %1 to <16 x i1>
389+
// CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
390+
// CHECK: bitcast <16 x i1> [[NOT]] to i16
389391
return _mm512_knot(a);
390392
}
391393

@@ -6211,22 +6213,38 @@ __m512i test_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __
62116213
return _mm512_mask_permutexvar_epi32(__W, __M, __X, __Y);
62126214
}
62136215

6214-
__mmask16 test_mm512_kand(__mmask16 __A, __mmask16 __B) {
6216+
__mmask16 test_mm512_kand(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
62156217
// CHECK-LABEL: @test_mm512_kand
6216-
// CHECK: @llvm.x86.avx512.kand.w
6217-
return _mm512_kand(__A, __B);
6218+
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
6219+
// CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
6220+
// CHECK: [[RES:%.*]] = and <16 x i1> [[LHS]], [[RHS]]
6221+
// CHECK: bitcast <16 x i1> [[RES]] to i16
6222+
return _mm512_mask_cmpneq_epu32_mask(_mm512_kand(_mm512_cmpneq_epu32_mask(__A, __B),
6223+
_mm512_cmpneq_epu32_mask(__C, __D)),
6224+
__E, __F);
62186225
}
62196226

6220-
__mmask16 test_mm512_kandn(__mmask16 __A, __mmask16 __B) {
6227+
__mmask16 test_mm512_kandn(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
62216228
// CHECK-LABEL: @test_mm512_kandn
6222-
// CHECK: @llvm.x86.avx512.kandn.w
6223-
return _mm512_kandn(__A, __B);
6229+
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
6230+
// CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
6231+
// CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
6232+
// CHECK: [[RES:%.*]] = and <16 x i1> [[NOT]], [[RHS]]
6233+
// CHECK: bitcast <16 x i1> [[RES]] to i16
6234+
return _mm512_mask_cmpneq_epu32_mask(_mm512_kandn(_mm512_cmpneq_epu32_mask(__A, __B),
6235+
_mm512_cmpneq_epu32_mask(__C, __D)),
6236+
__E, __F);
62246237
}
62256238

6226-
__mmask16 test_mm512_kor(__mmask16 __A, __mmask16 __B) {
6239+
__mmask16 test_mm512_kor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
62276240
// CHECK-LABEL: @test_mm512_kor
6228-
// CHECK: @llvm.x86.avx512.kor.w
6229-
return _mm512_kor(__A, __B);
6241+
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
6242+
// CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
6243+
// CHECK: [[RES:%.*]] = or <16 x i1> [[LHS]], [[RHS]]
6244+
// CHECK: bitcast <16 x i1> [[RES]] to i16
6245+
return _mm512_mask_cmpneq_epu32_mask(_mm512_kor(_mm512_cmpneq_epu32_mask(__A, __B),
6246+
_mm512_cmpneq_epu32_mask(__C, __D)),
6247+
__E, __F);
62306248
}
62316249

62326250
int test_mm512_kortestc(__mmask16 __A, __mmask16 __B) {
@@ -6254,16 +6272,27 @@ __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D
62546272
__E, __F);
62556273
}
62566274

6257-
__mmask16 test_mm512_kxnor(__mmask16 __A, __mmask16 __B) {
6275+
__mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
62586276
// CHECK-LABEL: @test_mm512_kxnor
6259-
// CHECK: @llvm.x86.avx512.kxnor.w
6260-
return _mm512_kxnor(__A, __B);
6277+
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
6278+
// CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
6279+
// CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
6280+
// CHECK: [[RES:%.*]] = xor <16 x i1> [[NOT]], [[RHS]]
6281+
// CHECK: bitcast <16 x i1> [[RES]] to i16
6282+
return _mm512_mask_cmpneq_epu32_mask(_mm512_kxnor(_mm512_cmpneq_epu32_mask(__A, __B),
6283+
_mm512_cmpneq_epu32_mask(__C, __D)),
6284+
__E, __F);
62616285
}
62626286

6263-
__mmask16 test_mm512_kxor(__mmask16 __A, __mmask16 __B) {
6287+
__mmask16 test_mm512_kxor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
62646288
// CHECK-LABEL: @test_mm512_kxor
6265-
// CHECK: @llvm.x86.avx512.kxor.w
6266-
return _mm512_kxor(__A, __B);
6289+
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
6290+
// CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
6291+
// CHECK: [[RES:%.*]] = xor <16 x i1> [[LHS]], [[RHS]]
6292+
// CHECK: bitcast <16 x i1> [[RES]] to i16
6293+
return _mm512_mask_cmpneq_epu32_mask(_mm512_kxor(_mm512_cmpneq_epu32_mask(__A, __B),
6294+
_mm512_cmpneq_epu32_mask(__C, __D)),
6295+
__E, __F);
62676296
}
62686297

62696298
void test_mm512_stream_si512(__m512i * __P, __m512i __A) {

0 commit comments

Comments
 (0)