Skip to content

Commit 88f4054

Browse files
committed
[X86] Add BSR/BSF/BSWAP intrinsics to ia32intrin.h to match gcc.
Summary: These are all implemented by icc as well. I made bit_scan_forward/reverse forward to the __bsfd/__bsrq since we also have __bsfq/__bsrq. Note, when lzcnt is enabled the bsr intrinsics generates lzcnt+xor instead of bsr. Reviewers: RKSimon, spatel Subscribers: cfe-commits, llvm-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D59682 llvm-svn: 356848
1 parent 4b7bf6a commit 88f4054

File tree

4 files changed

+167
-15
lines changed

4 files changed

+167
-15
lines changed

clang/lib/Headers/ia32intrin.h

+108
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,114 @@
2828
#ifndef __IA32INTRIN_H
2929
#define __IA32INTRIN_H
3030

31+
/** Find the first set bit starting from the lsb. Result is undefined if
32+
* input is 0.
33+
*
34+
* \headerfile <x86intrin.h>
35+
*
36+
* This intrinsic corresponds to the <c> BSF </c> instruction or the
37+
* <c> TZCNT </c> instruction.
38+
*
39+
* \param __A
40+
* A 32-bit integer operand.
41+
* \returns A 32-bit integer containing the bit number.
42+
*/
43+
static __inline__ int __attribute__((__always_inline__, __nodebug__))
44+
__bsfd(int __A) {
45+
return __builtin_ctz(__A);
46+
}
47+
48+
/** Find the first set bit starting from the msb. Result is undefined if
49+
* input is 0.
50+
*
51+
* \headerfile <x86intrin.h>
52+
*
53+
* This intrinsic corresponds to the <c> BSR </c> instruction or the
54+
* <c> LZCNT </c> instruction and an <c> XOR </c>.
55+
*
56+
* \param __A
57+
* A 32-bit integer operand.
58+
* \returns A 32-bit integer containing the bit number.
59+
*/
60+
static __inline__ int __attribute__((__always_inline__, __nodebug__))
61+
__bsrd(int __A) {
62+
return 31 - __builtin_clz(__A);
63+
}
64+
65+
/** Swaps the bytes in the input. Converting little endian to big endian or
66+
* vice versa.
67+
*
68+
* \headerfile <x86intrin.h>
69+
*
70+
* This intrinsic corresponds to the <c> BSWAP </c> instruction.
71+
*
72+
* \param __A
73+
* A 32-bit integer operand.
74+
* \returns A 32-bit integer containing the swapped bytes.
75+
*/
76+
static __inline__ int __attribute__((__always_inline__, __nodebug__))
77+
__bswapd(int __A) {
78+
return __builtin_bswap32(__A);
79+
}
80+
81+
#define _bswap(A) __bswapd((A))
82+
#define _bit_scan_forward(A) __bsfd((A))
83+
#define _bit_scan_reverse(A) __bsrd((A))
84+
85+
#ifdef __x86_64__
86+
/** Find the first set bit starting from the lsb. Result is undefined if
87+
* input is 0.
88+
*
89+
* \headerfile <x86intrin.h>
90+
*
91+
* This intrinsic corresponds to the <c> BSF </c> instruction or the
92+
* <c> TZCNT </c> instruction.
93+
*
94+
* \param __A
95+
* A 64-bit integer operand.
96+
* \returns A 32-bit integer containing the bit number.
97+
*/
98+
static __inline__ int __attribute__((__always_inline__, __nodebug__))
99+
__bsfq(long long __A) {
100+
return __builtin_ctzll(__A);
101+
}
102+
103+
/** Find the first set bit starting from the msb. Result is undefined if
104+
* input is 0.
105+
*
106+
* \headerfile <x86intrin.h>
107+
*
108+
* This intrinsic corresponds to the <c> BSR </c> instruction or the
109+
* <c> LZCNT </c> instruction and an <c> XOR </c>.
110+
*
111+
* \param __A
112+
* A 64-bit integer operand.
113+
* \returns A 32-bit integer containing the bit number.
114+
*/
115+
static __inline__ int __attribute__((__always_inline__, __nodebug__))
116+
__bsrq(long long __A) {
117+
return 63 - __builtin_clzll(__A);
118+
}
119+
120+
/** Swaps the bytes in the input. Converting little endian to big endian or
121+
* vice versa.
122+
*
123+
* \headerfile <x86intrin.h>
124+
*
125+
* This intrinsic corresponds to the <c> BSWAP </c> instruction.
126+
*
127+
* \param __A
128+
* A 64-bit integer operand.
129+
* \returns A 64-bit integer containing the swapped bytes.
130+
*/
131+
static __inline__ long long __attribute__((__always_inline__, __nodebug__))
132+
__bswapq(long long __A) {
133+
return __builtin_bswap64(__A);
134+
}
135+
136+
#define _bswap64(A) __bswapq((A))
137+
#endif
138+
31139
/** Counts the number of bits in the source operand having a value of 1.
32140
*
33141
* \headerfile <x86intrin.h>

clang/lib/Headers/immintrin.h

-12
Original file line numberDiff line numberDiff line change
@@ -241,18 +241,6 @@ _rdrand64_step(unsigned long long *__p)
241241
#endif
242242
#endif /* __RDRND__ */
243243

244-
/* __bit_scan_forward */
245-
static __inline__ int __attribute__((__always_inline__, __nodebug__))
246-
_bit_scan_forward(int __A) {
247-
return __builtin_ctz(__A);
248-
}
249-
250-
/* __bit_scan_reverse */
251-
static __inline__ int __attribute__((__always_inline__, __nodebug__))
252-
_bit_scan_reverse(int __A) {
253-
return 31 - __builtin_clz(__A);
254-
}
255-
256244
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
257245
#ifdef __x86_64__
258246
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))

clang/test/CodeGen/bitscan-builtins.c

+30-3
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,45 @@
33
// PR33722
44
// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-unknown -fms-extensions -fms-compatibility-version=19.00 -emit-llvm -o - %s | FileCheck %s
55

6-
#include <immintrin.h>
6+
#include <x86intrin.h>
77

88
int test_bit_scan_forward(int a) {
99
return _bit_scan_forward(a);
1010
// CHECK: @test_bit_scan_forward
11-
// CHECK: %[[call:.*]] = call i32 @llvm.cttz.i32(
11+
// CHECK: %[[call:.*]] = call i32 @llvm.cttz.i32(i32 %{{.*}}, i1 true)
1212
// CHECK: ret i32 %[[call]]
1313
}
1414

1515
int test_bit_scan_reverse(int a) {
1616
return _bit_scan_reverse(a);
17-
// CHECK: %[[call:.*]] = call i32 @llvm.ctlz.i32(
17+
// CHECK: %[[call:.*]] = call i32 @llvm.ctlz.i32(i32 %{{.*}}, i1 true)
1818
// CHECK: %[[sub:.*]] = sub nsw i32 31, %[[call]]
1919
// CHECK: ret i32 %[[sub]]
2020
}
21+
22+
int test__bsfd(int X) {
23+
// CHECK: @test__bsfd
24+
// CHECK: %[[call:.*]] = call i32 @llvm.cttz.i32(i32 %{{.*}}, i1 true)
25+
return __bsfd(X);
26+
}
27+
28+
int test__bsfq(long long X) {
29+
// CHECK: @test__bsfq
30+
// CHECK: %[[call:.*]] = call i64 @llvm.cttz.i64(i64 %{{.*}}, i1 true)
31+
return __bsfq(X);
32+
}
33+
34+
int test__bsrd(int X) {
35+
// CHECK: @test__bsrd
36+
// CHECK: %[[call:.*]] = call i32 @llvm.ctlz.i32(i32 %{{.*}}, i1 true)
37+
// CHECK: %[[sub:.*]] = sub nsw i32 31, %[[call]]
38+
return __bsrd(X);
39+
}
40+
41+
int test__bsrq(long long X) {
42+
// CHECK: @test__bsrq
43+
// CHECK: %[[call:.*]] = call i64 @llvm.ctlz.i64(i64 %{{.*}}, i1 true)
44+
// CHECK: %[[cast:.*]] = trunc i64 %[[call]] to i32
45+
// CHECK: %[[sub:.*]] = sub nsw i32 63, %[[cast]]
46+
return __bsrq(X);
47+
}

clang/test/CodeGen/x86-bswap.c

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -emit-llvm -o - | FileCheck %s
2+
3+
#include <x86intrin.h>
4+
5+
int test__bswapd(int X) {
6+
// CHECK-LABEL: @test__bswapd
7+
// CHECK: call i32 @llvm.bswap.i32
8+
return __bswapd(X);
9+
}
10+
11+
int test_bswap(int X) {
12+
// CHECK-LABEL: @test_bswap
13+
// CHECK: call i32 @llvm.bswap.i32
14+
return _bswap(X);
15+
}
16+
17+
long test__bswapq(long long X) {
18+
// CHECK-LABEL: @test__bswapq
19+
// CHECK: call i64 @llvm.bswap.i64
20+
return __bswapq(X);
21+
}
22+
23+
long test_bswap64(long long X) {
24+
// CHECK-LABEL: @test_bswap64
25+
// CHECK: call i64 @llvm.bswap.i64
26+
return _bswap64(X);
27+
}
28+
29+

0 commit comments

Comments
 (0)