Skip to content
This repository was archived by the owner on Nov 1, 2021. It is now read-only.

Commit daee7ce

Browse files
committed
[Altivec] Change vec_sl to a << (b % (sizeof(a) * 8))
For a << b (as original vec_sl does), if b >= sizeof(a) * 8, the behavior is undefined. However, Power instructions do define the behavior, which is equivalent to a << (b % (sizeof(a) * 8)). This patch changes altivec.h to use a << (b % (sizeof(a) * 8)), to ensure the consistent semantic of the instructions. Then it combines the generated multiple instructions back to a single shift. This patch handles left shift only. Right shift, on the other hand, is more complicated, considering arithematic/logical right shift. Differential Revision: https://reviews.llvm.org/D28037 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@292659 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 32c7998 commit daee7ce

File tree

2 files changed

+49
-31
lines changed

2 files changed

+49
-31
lines changed

Diff for: lib/Headers/altivec.h

+25-19
Original file line numberDiff line numberDiff line change
@@ -8045,45 +8045,51 @@ static __inline__ vector float __ATTRS_o_ai vec_vsel(vector float __a,
80458045

80468046
/* vec_sl */
80478047

8048-
static __inline__ vector signed char __ATTRS_o_ai
8049-
vec_sl(vector signed char __a, vector unsigned char __b) {
8050-
return __a << (vector signed char)__b;
8051-
}
8052-
8048+
// vec_sl does modulo arithmetic on __b first, so __b is allowed to be more
8049+
// than the length of __a.
80538050
static __inline__ vector unsigned char __ATTRS_o_ai
80548051
vec_sl(vector unsigned char __a, vector unsigned char __b) {
8055-
return __a << __b;
8052+
return __a << (__b %
8053+
(vector unsigned char)(sizeof(unsigned char) * __CHAR_BIT__));
80568054
}
80578055

8058-
static __inline__ vector short __ATTRS_o_ai vec_sl(vector short __a,
8059-
vector unsigned short __b) {
8060-
return __a << (vector short)__b;
8056+
static __inline__ vector signed char __ATTRS_o_ai
8057+
vec_sl(vector signed char __a, vector unsigned char __b) {
8058+
return (vector signed char)vec_sl((vector unsigned char)__a, __b);
80618059
}
80628060

80638061
static __inline__ vector unsigned short __ATTRS_o_ai
80648062
vec_sl(vector unsigned short __a, vector unsigned short __b) {
8065-
return __a << __b;
8063+
return __a << (__b % (vector unsigned short)(sizeof(unsigned short) *
8064+
__CHAR_BIT__));
80668065
}
80678066

8068-
static __inline__ vector int __ATTRS_o_ai vec_sl(vector int __a,
8069-
vector unsigned int __b) {
8070-
return __a << (vector int)__b;
8067+
static __inline__ vector short __ATTRS_o_ai vec_sl(vector short __a,
8068+
vector unsigned short __b) {
8069+
return (vector short)vec_sl((vector unsigned short)__a, __b);
80718070
}
80728071

80738072
static __inline__ vector unsigned int __ATTRS_o_ai
80748073
vec_sl(vector unsigned int __a, vector unsigned int __b) {
8075-
return __a << __b;
8074+
return __a << (__b %
8075+
(vector unsigned int)(sizeof(unsigned int) * __CHAR_BIT__));
80768076
}
80778077

8078-
#ifdef __POWER8_VECTOR__
8079-
static __inline__ vector signed long long __ATTRS_o_ai
8080-
vec_sl(vector signed long long __a, vector unsigned long long __b) {
8081-
return __a << (vector long long)__b;
8078+
static __inline__ vector int __ATTRS_o_ai vec_sl(vector int __a,
8079+
vector unsigned int __b) {
8080+
return (vector int)vec_sl((vector unsigned int)__a, __b);
80828081
}
80838082

8083+
#ifdef __POWER8_VECTOR__
80848084
static __inline__ vector unsigned long long __ATTRS_o_ai
80858085
vec_sl(vector unsigned long long __a, vector unsigned long long __b) {
8086-
return __a << __b;
8086+
return __a << (__b % (vector unsigned long long)(sizeof(unsigned long long) *
8087+
__CHAR_BIT__));
8088+
}
8089+
8090+
static __inline__ vector long long __ATTRS_o_ai
8091+
vec_sl(vector long long __a, vector unsigned long long __b) {
8092+
return (vector long long)vec_sl((vector unsigned long long)__a, __b);
80878093
}
80888094
#endif
80898095

Diff for: test/CodeGen/builtins-ppc-altivec.c

+24-12
Original file line numberDiff line numberDiff line change
@@ -3419,28 +3419,40 @@ void test6() {
34193419

34203420
/* vec_sl */
34213421
res_vsc = vec_sl(vsc, vuc);
3422-
// CHECK: shl <16 x i8>
3423-
// CHECK-LE: shl <16 x i8>
3422+
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
3423+
// CHECK: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
3424+
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
3425+
// CHECK-LE: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
34243426

34253427
res_vuc = vec_sl(vuc, vuc);
3426-
// CHECK: shl <16 x i8>
3427-
// CHECK-LE: shl <16 x i8>
3428+
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
3429+
// CHECK: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
3430+
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
3431+
// CHECK-LE: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
34283432

34293433
res_vs = vec_sl(vs, vus);
3430-
// CHECK: shl <8 x i16>
3431-
// CHECK-LE: shl <8 x i16>
3434+
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
3435+
// CHECK: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
3436+
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
3437+
// CHECK-LE: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
34323438

34333439
res_vus = vec_sl(vus, vus);
3434-
// CHECK: shl <8 x i16>
3435-
// CHECK-LE: shl <8 x i16>
3440+
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
3441+
// CHECK: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
3442+
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
3443+
// CHECK-LE: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
34363444

34373445
res_vi = vec_sl(vi, vui);
3438-
// CHECK: shl <4 x i32>
3439-
// CHECK-LE: shl <4 x i32>
3446+
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
3447+
// CHECK: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
3448+
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
3449+
// CHECK-LE: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
34403450

34413451
res_vui = vec_sl(vui, vui);
3442-
// CHECK: shl <4 x i32>
3443-
// CHECK-LE: shl <4 x i32>
3452+
// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
3453+
// CHECK: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
3454+
// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
3455+
// CHECK-LE: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
34443456

34453457
res_vsc = vec_vslb(vsc, vuc);
34463458
// CHECK: shl <16 x i8>

0 commit comments

Comments
 (0)