Skip to content

Commit a0c37c4

Browse files
committed
Auto merge of #126171 - RalfJung:simd_bitmask_multibyte, r=workingjubilee
simd_bitmask intrinsic: add a non-power-of-2 multi-byte example r? `@calebzulawski` `@workingjubilee`
2 parents 7d97c59 + e9dd39c commit a0c37c4

File tree

4 files changed

+144
-26
lines changed

4 files changed

+144
-26
lines changed

Diff for: compiler/rustc_codegen_llvm/src/intrinsic.rs

+8-8
Original file line numberDiff line numberDiff line change
@@ -1121,8 +1121,8 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
11211121
if name == sym::simd_select_bitmask {
11221122
let (len, _) = require_simd!(arg_tys[1], SimdArgument);
11231123

1124-
let expected_int_bits = (len.max(8) - 1).next_power_of_two();
1125-
let expected_bytes = len / 8 + ((len % 8 > 0) as u64);
1124+
let expected_int_bits = len.max(8).next_power_of_two();
1125+
let expected_bytes = len.div_ceil(8);
11261126

11271127
let mask_ty = arg_tys[0];
11281128
let mask = match mask_ty.kind() {
@@ -1379,17 +1379,16 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
13791379
}
13801380

13811381
if name == sym::simd_bitmask {
1382-
// The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a
1383-
// vector mask and returns the most significant bit (MSB) of each lane in the form
1384-
// of either:
1382+
// The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a vector mask and
1383+
// returns one bit for each lane (which must all be `0` or `!0`) in the form of either:
13851384
// * an unsigned integer
13861385
// * an array of `u8`
13871386
// If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
13881387
//
13891388
// The bit order of the result depends on the byte endianness, LSB-first for little
13901389
// endian and MSB-first for big endian.
1391-
let expected_int_bits = in_len.max(8);
1392-
let expected_bytes = expected_int_bits / 8 + ((expected_int_bits % 8 > 0) as u64);
1390+
let expected_int_bits = in_len.max(8).next_power_of_two();
1391+
let expected_bytes = in_len.div_ceil(8);
13931392

13941393
// Integer vector <i{in_bitwidth} x in_len>:
13951394
let (i_xn, in_elem_bitwidth) = match in_elem.kind() {
@@ -1409,7 +1408,8 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
14091408
}),
14101409
};
14111410

1412-
// Shift the MSB to the right by "in_elem_bitwidth - 1" into the first bit position.
1411+
// LLVM doesn't always know the inputs are `0` or `!0`, so we shift here so it optimizes to
1412+
// `pmovmskb` and similar on x86.
14131413
let shift_indices =
14141414
vec![
14151415
bx.cx.const_int(bx.type_ix(in_elem_bitwidth), (in_elem_bitwidth - 1) as _);

Diff for: library/core/src/intrinsics/simd.rs

+13-6
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ extern "rust-intrinsic" {
460460
/// `T` must be an integer vector.
461461
///
462462
/// `U` must be either the smallest unsigned integer with at least as many bits as the length
463-
/// of `T`, or the smallest array of `u8` with as many bits as the length of `T`.
463+
/// of `T`, or the smallest array of `u8` with at least as many bits as the length of `T`.
464464
///
465465
/// Each element is truncated to a single bit and packed into the result.
466466
///
@@ -472,12 +472,19 @@ extern "rust-intrinsic" {
472472
/// * On little endian, the least significant bit corresponds to the first vector element.
473473
/// * On big endian, the least significant bit corresponds to the last vector element.
474474
///
475-
/// For example, `[!0, 0, !0, !0]` packs to `0b1101` on little endian and `0b1011` on big
476-
/// endian.
475+
/// For example, `[!0, 0, !0, !0]` packs to
476+
/// - `0b1101u8` or `[0b1101]` on little endian, and
477+
/// - `0b1011u8` or `[0b1011]` on big endian.
477478
///
478-
/// To consider a larger example, `[!0, 0, 0, 0, 0, 0, 0, 0, !0, !0, 0, 0, 0, 0, !0, 0]` packs
479-
/// to `[0b00000001, 0b01000011]` or `0b0100001100000001` on little endian, and `[0b10000000,
480-
/// 0b11000010]` or `0b1000000011000010` on big endian.
479+
/// To consider a larger example,
480+
/// `[!0, 0, 0, 0, 0, 0, 0, 0, !0, !0, 0, 0, 0, 0, !0, 0]` packs to
481+
/// - `0b0100001100000001u16` or `[0b00000001, 0b01000011]` on little endian, and
482+
/// - `0b1000000011000010u16` or `[0b10000000, 0b11000010]` on big endian.
483+
///
484+
/// And finally, a non-power-of-2 example with multiple bytes:
485+
/// `[!0, !0, 0, !0, 0, 0, !0, 0, !0, 0]` packs to
486+
/// - `0b0101001011u16` or `[0b01001011, 0b01]` on little endian, and
487+
/// - `0b1101001010u16` or `[0b11, 0b01001010]` on big endian.
481488
///
482489
/// # Safety
483490
/// `x` must contain only `0` and `!0`.

Diff for: tests/ui/simd/simd-bitmask-notpow2.rs

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
//@run-pass
2+
// SEGFAULTS on LLVM 17. This should be merged into `simd-bitmask` once we require LLVM 18.
3+
//@ min-llvm-version: 18
4+
// FIXME: broken codegen on big-endian (https://github.com/rust-lang/rust/issues/127205)
5+
//@ ignore-endian-big
6+
#![feature(repr_simd, intrinsics)]
7+
8+
extern "rust-intrinsic" {
9+
fn simd_bitmask<T, U>(v: T) -> U;
10+
fn simd_select_bitmask<T, U>(m: T, a: U, b: U) -> U;
11+
}
12+
13+
fn main() {
14+
// Non-power-of-2 multi-byte mask.
15+
#[repr(simd, packed)]
16+
#[allow(non_camel_case_types)]
17+
#[derive(Copy, Clone, Debug, PartialEq)]
18+
struct i32x10([i32; 10]);
19+
impl i32x10 {
20+
fn splat(x: i32) -> Self {
21+
Self([x; 10])
22+
}
23+
}
24+
unsafe {
25+
let mask = i32x10([!0, !0, 0, !0, 0, 0, !0, 0, !0, 0]);
26+
let mask_bits = if cfg!(target_endian = "little") { 0b0101001011 } else { 0b1101001010 };
27+
let mask_bytes =
28+
if cfg!(target_endian = "little") { [0b01001011, 0b01] } else { [0b11, 0b01001010] };
29+
30+
let bitmask1: u16 = simd_bitmask(mask);
31+
let bitmask2: [u8; 2] = simd_bitmask(mask);
32+
assert_eq!(bitmask1, mask_bits);
33+
assert_eq!(bitmask2, mask_bytes);
34+
35+
let selected1 = simd_select_bitmask::<u16, _>(
36+
mask_bits,
37+
i32x10::splat(!0), // yes
38+
i32x10::splat(0), // no
39+
);
40+
let selected2 = simd_select_bitmask::<[u8; 2], _>(
41+
mask_bytes,
42+
i32x10::splat(!0), // yes
43+
i32x10::splat(0), // no
44+
);
45+
assert_eq!(selected1, mask);
46+
assert_eq!(selected2, mask);
47+
}
48+
49+
// Test for a mask where the next multiple of 8 is not a power of two.
50+
#[repr(simd, packed)]
51+
#[allow(non_camel_case_types)]
52+
#[derive(Copy, Clone, Debug, PartialEq)]
53+
struct i32x20([i32; 20]);
54+
impl i32x20 {
55+
fn splat(x: i32) -> Self {
56+
Self([x; 20])
57+
}
58+
}
59+
unsafe {
60+
let mask = i32x20([!0, !0, 0, !0, 0, 0, !0, 0, !0, 0, 0, 0, 0, !0, !0, !0, !0, !0, !0, !0]);
61+
let mask_bits = if cfg!(target_endian = "little") {
62+
0b11111110000101001011
63+
} else {
64+
0b11010010100001111111
65+
};
66+
let mask_bytes = if cfg!(target_endian = "little") {
67+
[0b01001011, 0b11100001, 0b1111]
68+
} else {
69+
[0b1101, 0b00101000, 0b01111111]
70+
};
71+
72+
let bitmask1: u32 = simd_bitmask(mask);
73+
let bitmask2: [u8; 3] = simd_bitmask(mask);
74+
assert_eq!(bitmask1, mask_bits);
75+
assert_eq!(bitmask2, mask_bytes);
76+
77+
let selected1 = simd_select_bitmask::<u32, _>(
78+
mask_bits,
79+
i32x20::splat(!0), // yes
80+
i32x20::splat(0), // no
81+
);
82+
let selected2 = simd_select_bitmask::<[u8; 3], _>(
83+
mask_bytes,
84+
i32x20::splat(!0), // yes
85+
i32x20::splat(0), // no
86+
);
87+
assert_eq!(selected1, mask);
88+
assert_eq!(selected2, mask);
89+
}
90+
}

Diff for: tests/ui/simd/simd-bitmask.rs

+33-12
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
//@run-pass
2-
//@ignore-endian-big behavior of simd_select_bitmask is endian-specific
32
#![feature(repr_simd, intrinsics)]
43

54
extern "rust-intrinsic" {
@@ -17,36 +16,58 @@ fn main() {
1716
let i: u8 = simd_bitmask(v);
1817
let a: [u8; 1] = simd_bitmask(v);
1918

20-
assert_eq!(i, 0b0101);
21-
assert_eq!(a, [0b0101]);
19+
if cfg!(target_endian = "little") {
20+
assert_eq!(i, 0b0101);
21+
assert_eq!(a, [0b0101]);
22+
} else {
23+
assert_eq!(i, 0b1010);
24+
assert_eq!(a, [0b1010]);
25+
}
2226

2327
let v = Simd::<i8, 16>([0, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0]);
2428
let i: u16 = simd_bitmask(v);
2529
let a: [u8; 2] = simd_bitmask(v);
2630

27-
assert_eq!(i, 0b0101000000001100);
28-
assert_eq!(a, [0b1100, 0b01010000]);
31+
if cfg!(target_endian = "little") {
32+
assert_eq!(i, 0b0101000000001100);
33+
assert_eq!(a, [0b00001100, 0b01010000]);
34+
} else {
35+
assert_eq!(i, 0b0011000000001010);
36+
assert_eq!(a, [0b00110000, 0b00001010]);
37+
}
2938
}
3039

3140
unsafe {
32-
let a = Simd::<i32, 8>([0, 1, 2, 3, 4, 5, 6, 7]);
33-
let b = Simd::<i32, 8>([8, 9, 10, 11, 12, 13, 14, 15]);
34-
let e = [0, 9, 2, 11, 12, 13, 14, 15];
41+
let a = Simd::<i32, 4>([0, 1, 2, 3]);
42+
let b = Simd::<i32, 4>([8, 9, 10, 11]);
43+
let e = [0, 9, 2, 11];
3544

36-
let r = simd_select_bitmask(0b0101u8, a, b);
45+
let mask = if cfg!(target_endian = "little") { 0b0101u8 } else { 0b1010u8 };
46+
let r = simd_select_bitmask(mask, a, b);
3747
assert_eq!(r.0, e);
3848

39-
let r = simd_select_bitmask([0b0101u8], a, b);
49+
let mask = if cfg!(target_endian = "little") { [0b0101u8] } else { [0b1010u8] };
50+
let r = simd_select_bitmask(mask, a, b);
4051
assert_eq!(r.0, e);
4152

4253
let a = Simd::<i32, 16>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
4354
let b = Simd::<i32, 16>([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]);
4455
let e = [16, 17, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 29, 14, 31];
4556

46-
let r = simd_select_bitmask(0b0101000000001100u16, a, b);
57+
let mask = if cfg!(target_endian = "little") {
58+
0b0101000000001100u16
59+
} else {
60+
0b0011000000001010u16
61+
};
62+
let r = simd_select_bitmask(mask, a, b);
4763
assert_eq!(r.0, e);
4864

49-
let r = simd_select_bitmask([0b1100u8, 0b01010000u8], a, b);
65+
let mask = if cfg!(target_endian = "little") {
66+
[0b00001100u8, 0b01010000u8]
67+
} else {
68+
[0b00110000u8, 0b00001010u8]
69+
};
70+
let r = simd_select_bitmask(mask, a, b);
5071
assert_eq!(r.0, e);
5172
}
5273
}

0 commit comments

Comments
 (0)