forked from rust-lang/rust
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimd-intrinsic-mask-reduce.rs
59 lines (53 loc) · 1.8 KB
/
simd-intrinsic-mask-reduce.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
// verify that simd mask reductions do not introduce additional bit shift operations
//@ add-core-stubs
//@ revisions: x86 aarch64
//@ [x86] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
// Set the base cpu explicitly, in case the default has been changed.
//@ [x86] compile-flags: -C target-cpu=x86-64
//@ [x86] needs-llvm-components: x86
//@ [aarch64] compile-flags: --target=aarch64-unknown-linux-gnu
//@ [aarch64] needs-llvm-components: aarch64
//@ assembly-output: emit-asm
//@ compile-flags: --crate-type=lib -Copt-level=3 -C panic=abort
#![feature(no_core, lang_items, repr_simd, intrinsics)]
#![no_core]
#![allow(non_camel_case_types)]
extern crate minicore;
use minicore::*;
#[repr(simd)]
pub struct mask8x16([i8; 16]);
extern "rust-intrinsic" {
fn simd_reduce_all<T>(x: T) -> bool;
fn simd_reduce_any<T>(x: T) -> bool;
}
// CHECK-LABEL: mask_reduce_all:
#[no_mangle]
pub unsafe extern "C" fn mask_reduce_all(m: mask8x16) -> bool {
// x86-NOT: psllw
// x86: pmovmskb eax, xmm0
// x86-NEXT: {{cmp ax, -1|cmp eax, 65535|xor eax, 65535}}
// x86-NEXT: sete al
//
// aarch64-NOT: shl
// aarch64: cmge v0.16b, v0.16b, #0
// aarch64-DAG: mov [[REG1:[a-z0-9]+]], #1
// aarch64-DAG: umaxv b0, v0.16b
// aarch64-NEXT: fmov [[REG2:[a-z0-9]+]], s0
// aarch64-NEXT: bic w0, [[REG1]], [[REG2]]
simd_reduce_all(m)
}
// CHECK-LABEL: mask_reduce_any:
#[no_mangle]
pub unsafe extern "C" fn mask_reduce_any(m: mask8x16) -> bool {
// x86-NOT: psllw
// x86: pmovmskb
// x86-NEXT: test eax, eax
// x86-NEXT: setne al
//
// aarch64-NOT: shl
// aarch64: cmlt v0.16b, v0.16b, #0
// aarch64-NEXT: umaxv b0, v0.16b
// aarch64-NEXT: fmov [[REG:[a-z0-9]+]], s0
// aarch64-NEXT: and w0, [[REG]], #0x1
simd_reduce_any(m)
}