1
- ; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2
- ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
3
- ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
1
+ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2
+ ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
3
+ ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
4
4
5
5
declare double @llvm.fabs.f64 (double %Val )
6
6
declare double @llvm.floor.f64 (double ) nounwind readnone
@@ -20,7 +20,7 @@ declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
20
20
; SI: v_add_f64
21
21
; SI: s_endpgm
22
22
define amdgpu_kernel void @ffloor_f64 (double addrspace (1 )* %out , double %x ) {
23
- %y = call double @llvm.floor.f64 (double %x ) nounwind readnone
23
+ %y = call fast double @llvm.floor.f64 (double %x ) nounwind readnone
24
24
store double %y , double addrspace (1 )* %out
25
25
ret void
26
26
}
@@ -35,8 +35,8 @@ define amdgpu_kernel void @ffloor_f64(double addrspace(1)* %out, double %x) {
35
35
; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT]]
36
36
; SI: s_endpgm
37
37
define amdgpu_kernel void @ffloor_f64_neg (double addrspace (1 )* %out , double %x ) {
38
- %neg = fsub double 0 .0 , %x
39
- %y = call double @llvm.floor.f64 (double %neg ) nounwind readnone
38
+ %neg = fsub nsz double 0 .0 , %x
39
+ %y = call fast double @llvm.floor.f64 (double %neg ) nounwind readnone
40
40
store double %y , double addrspace (1 )* %out
41
41
ret void
42
42
}
@@ -51,9 +51,9 @@ define amdgpu_kernel void @ffloor_f64_neg(double addrspace(1)* %out, double %x)
51
51
; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT]]|
52
52
; SI: s_endpgm
53
53
define amdgpu_kernel void @ffloor_f64_neg_abs (double addrspace (1 )* %out , double %x ) {
54
- %abs = call double @llvm.fabs.f64 (double %x )
55
- %neg = fsub double 0 .0 , %abs
56
- %y = call double @llvm.floor.f64 (double %neg ) nounwind readnone
54
+ %abs = call fast double @llvm.fabs.f64 (double %x )
55
+ %neg = fsub nsz double 0 .0 , %abs
56
+ %y = call fast double @llvm.floor.f64 (double %neg ) nounwind readnone
57
57
store double %y , double addrspace (1 )* %out
58
58
ret void
59
59
}
@@ -62,7 +62,7 @@ define amdgpu_kernel void @ffloor_f64_neg_abs(double addrspace(1)* %out, double
62
62
; CI: v_floor_f64_e32
63
63
; CI: v_floor_f64_e32
64
64
define amdgpu_kernel void @ffloor_v2f64 (<2 x double > addrspace (1 )* %out , <2 x double > %x ) {
65
- %y = call <2 x double > @llvm.floor.v2f64 (<2 x double > %x ) nounwind readnone
65
+ %y = call fast <2 x double > @llvm.floor.v2f64 (<2 x double > %x ) nounwind readnone
66
66
store <2 x double > %y , <2 x double > addrspace (1 )* %out
67
67
ret void
68
68
}
@@ -73,7 +73,7 @@ define amdgpu_kernel void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x do
73
73
; CI: v_floor_f64_e32
74
74
; CI-NOT: v_floor_f64_e32
75
75
define amdgpu_kernel void @ffloor_v3f64 (<3 x double > addrspace (1 )* %out , <3 x double > %x ) {
76
- %y = call <3 x double > @llvm.floor.v3f64 (<3 x double > %x ) nounwind readnone
76
+ %y = call fast <3 x double > @llvm.floor.v3f64 (<3 x double > %x ) nounwind readnone
77
77
store <3 x double > %y , <3 x double > addrspace (1 )* %out
78
78
ret void
79
79
}
@@ -84,7 +84,7 @@ define amdgpu_kernel void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x do
84
84
; CI: v_floor_f64_e32
85
85
; CI: v_floor_f64_e32
86
86
define amdgpu_kernel void @ffloor_v4f64 (<4 x double > addrspace (1 )* %out , <4 x double > %x ) {
87
- %y = call <4 x double > @llvm.floor.v4f64 (<4 x double > %x ) nounwind readnone
87
+ %y = call fast <4 x double > @llvm.floor.v4f64 (<4 x double > %x ) nounwind readnone
88
88
store <4 x double > %y , <4 x double > addrspace (1 )* %out
89
89
ret void
90
90
}
@@ -99,7 +99,7 @@ define amdgpu_kernel void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x do
99
99
; CI: v_floor_f64_e32
100
100
; CI: v_floor_f64_e32
101
101
define amdgpu_kernel void @ffloor_v8f64 (<8 x double > addrspace (1 )* %out , <8 x double > %x ) {
102
- %y = call <8 x double > @llvm.floor.v8f64 (<8 x double > %x ) nounwind readnone
102
+ %y = call fast <8 x double > @llvm.floor.v8f64 (<8 x double > %x ) nounwind readnone
103
103
store <8 x double > %y , <8 x double > addrspace (1 )* %out
104
104
ret void
105
105
}
@@ -122,7 +122,7 @@ define amdgpu_kernel void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x do
122
122
; CI: v_floor_f64_e32
123
123
; CI: v_floor_f64_e32
124
124
define amdgpu_kernel void @ffloor_v16f64 (<16 x double > addrspace (1 )* %out , <16 x double > %x ) {
125
- %y = call <16 x double > @llvm.floor.v16f64 (<16 x double > %x ) nounwind readnone
125
+ %y = call fast <16 x double > @llvm.floor.v16f64 (<16 x double > %x ) nounwind readnone
126
126
store <16 x double > %y , <16 x double > addrspace (1 )* %out
127
127
ret void
128
128
}
0 commit comments