Skip to content

Commit 005d705

Browse files
author
Michael Berg
committed
Migrate some more fadd and fsub cases away from UnsafeFPMath control to utilize NoSignedZerosFPMath options control
Summary: Honoring no signed zeroes is also available as a user control through clang separately regardless of fastmath or UnsafeFPMath context, DAG guards should reflect this context. Reviewers: spatel, arsenm, hfinkel, wristow, craig.topper Reviewed By: spatel Subscribers: rampitec, foad, nhaehnle, wuzish, nemanjai, jvesely, wdng, javed.absar, MaskRay, jsji Differential Revision: https://reviews.llvm.org/D65170 llvm-svn: 367486
1 parent f8e7b53 commit 005d705

14 files changed

+534
-455
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -839,7 +839,7 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
839839
});
840840
}
841841
case ISD::FADD:
842-
if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
842+
if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
843843
return 0;
844844

845845
// After operation legalization, it might not be legal to create new FSUBs.
@@ -912,7 +912,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
912912
return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
913913
}
914914
case ISD::FADD:
915-
assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
915+
assert(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros());
916916

917917
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
918918
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
@@ -12017,7 +12017,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
1201712017
// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
1201812018
ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
1201912019
if (N1C && N1C->isZero())
12020-
if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
12020+
if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
1202112021
return N0;
1202212022

1202312023
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -12075,7 +12075,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
1207512075
// If 'unsafe math' or reassoc and nsz, fold lots of things.
1207612076
// TODO: break out portions of the transformations below for which Unsafe is
1207712077
// considered and which do not require both nsz and reassoc
12078-
if ((Options.UnsafeFPMath ||
12078+
if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
1207912079
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
1208012080
AllowNewConst) {
1208112081
// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
@@ -12194,7 +12194,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
1219412194

1219512195
// (fsub A, 0) -> A
1219612196
if (N1CFP && N1CFP->isZero()) {
12197-
if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
12197+
if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
1219812198
Flags.hasNoSignedZeros()) {
1219912199
return N0;
1220012200
}
@@ -12221,7 +12221,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
1222112221
}
1222212222
}
1222312223

12224-
if ((Options.UnsafeFPMath ||
12224+
if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
1222512225
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
1222612226
&& N1.getOpcode() == ISD::FADD) {
1222712227
// X - (X + Y) -> -Y

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -4630,7 +4630,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
46304630
return getUNDEF(VT);
46314631

46324632
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
4633-
if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) &&
4633+
if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
46344634
OpOpcode == ISD::FSUB)
46354635
return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
46364636
Operand.getOperand(0), Flags);

llvm/test/CodeGen/AArch64/fadd-combines.ll

+10-15
Original file line numberDiff line numberDiff line change
@@ -146,28 +146,23 @@ define float @fadd_const_multiuse_fmf(float %x) {
146146
ret float %a3
147147
}
148148

149-
; DAGCombiner transforms this into: (x + 59.0) + (x + 17.0).
150-
; The machine combiner transforms this into a chain of 3 dependent adds:
151-
; ((x + 59.0) + 17.0) + x
152-
153-
define float @fadd_const_multiuse_attr(float %x) #0 {
149+
; DAGCombiner transforms this into: (x + 17.0) + (x + 59.0).
150+
define float @fadd_const_multiuse_attr(float %x) {
154151
; CHECK-LABEL: fadd_const_multiuse_attr:
155152
; CHECK: // %bb.0:
156-
; CHECK-DAG: mov [[W59:w[0-9]+]], #1114374144
157153
; CHECK-DAG: mov [[W17:w[0-9]+]], #1109917696
158-
; CHECK-NEXT: fmov [[FP59:s[0-9]+]], [[W59]]
154+
; CHECK-DAG: mov [[W59:w[0-9]+]], #1114374144
159155
; CHECK-NEXT: fmov [[FP17:s[0-9]+]], [[W17]]
160-
; CHECK-NEXT: fadd [[TMP1:s[0-9]+]], s0, [[FP59]]
161-
; CHECK-NEXT: fadd [[TMP2:s[0-9]+]], [[FP17]], [[TMP1]]
162-
; CHECK-NEXT: fadd s0, s0, [[TMP2]]
156+
; CHECK-NEXT: fmov [[FP59:s[0-9]+]], [[W59]]
157+
; CHECK-NEXT: fadd [[TMP1:s[0-9]+]], s0, [[FP17]]
158+
; CHECK-NEXT: fadd [[TMP2:s[0-9]+]], s0, [[FP59]]
159+
; CHECK-NEXT: fadd s0, [[TMP1]], [[TMP2]]
163160
; CHECK-NEXT: ret
164-
%a1 = fadd float %x, 42.0
165-
%a2 = fadd float %a1, 17.0
166-
%a3 = fadd float %a1, %a2
161+
%a1 = fadd fast float %x, 42.0
162+
%a2 = fadd fast float %a1, 17.0
163+
%a3 = fadd fast float %a1, %a2
167164
ret float %a3
168165
}
169166

170-
attributes #0 = { "unsafe-fp-math"="true" }
171-
172167
declare void @use(double)
173168

llvm/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll

+19-7
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,29 @@
1-
; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=0 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SAFE %s
2-
; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s
3-
; RUN: llc -march=amdgcn -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s
1+
; RUN: llc -march=amdgcn < %s | FileCheck --check-prefixes=GCN,GCN-FMF,GCN-SAFE %s
42

53
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
64

75
; Test that the -enable-no-signed-zeros-fp-math flag works
86

9-
; GCN-LABEL: {{^}}fneg_fsub_f32:
7+
; GCN-LABEL: {{^}}fneg_fsub_f32_fmf:
108
; GCN: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
11-
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
9+
; GCN-FMF-NOT: xor
10+
define amdgpu_kernel void @fneg_fsub_f32_fmf(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
11+
%tid = call i32 @llvm.amdgcn.workitem.id.x()
12+
%add = add i32 %tid, 1
13+
%gep = getelementptr float, float addrspace(1)* %in, i32 %tid
14+
%b_ptr = getelementptr float, float addrspace(1)* %in, i32 %add
15+
%a = load float, float addrspace(1)* %gep, align 4
16+
%b = load float, float addrspace(1)* %b_ptr, align 4
17+
%result = fsub fast float %a, %b
18+
%neg.result = fsub fast float -0.0, %result
19+
store float %neg.result, float addrspace(1)* %out, align 4
20+
ret void
21+
}
1222

13-
; GCN-UNSAFE-NOT: xor
14-
define amdgpu_kernel void @fneg_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
23+
; GCN-LABEL: {{^}}fneg_fsub_f32_safe:
24+
; GCN: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
25+
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
26+
define amdgpu_kernel void @fneg_fsub_f32_safe(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
1527
%tid = call i32 @llvm.amdgcn.workitem.id.x()
1628
%add = add i32 %tid, 1
1729
%gep = getelementptr float, float addrspace(1)* %in, i32 %tid

llvm/test/CodeGen/AMDGPU/ffloor.f64.ll

+14-14
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2-
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
3-
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
1+
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2+
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
3+
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
44

55
declare double @llvm.fabs.f64(double %Val)
66
declare double @llvm.floor.f64(double) nounwind readnone
@@ -20,7 +20,7 @@ declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
2020
; SI: v_add_f64
2121
; SI: s_endpgm
2222
define amdgpu_kernel void @ffloor_f64(double addrspace(1)* %out, double %x) {
23-
%y = call double @llvm.floor.f64(double %x) nounwind readnone
23+
%y = call fast double @llvm.floor.f64(double %x) nounwind readnone
2424
store double %y, double addrspace(1)* %out
2525
ret void
2626
}
@@ -35,8 +35,8 @@ define amdgpu_kernel void @ffloor_f64(double addrspace(1)* %out, double %x) {
3535
; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT]]
3636
; SI: s_endpgm
3737
define amdgpu_kernel void @ffloor_f64_neg(double addrspace(1)* %out, double %x) {
38-
%neg = fsub double 0.0, %x
39-
%y = call double @llvm.floor.f64(double %neg) nounwind readnone
38+
%neg = fsub nsz double 0.0, %x
39+
%y = call fast double @llvm.floor.f64(double %neg) nounwind readnone
4040
store double %y, double addrspace(1)* %out
4141
ret void
4242
}
@@ -51,9 +51,9 @@ define amdgpu_kernel void @ffloor_f64_neg(double addrspace(1)* %out, double %x)
5151
; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT]]|
5252
; SI: s_endpgm
5353
define amdgpu_kernel void @ffloor_f64_neg_abs(double addrspace(1)* %out, double %x) {
54-
%abs = call double @llvm.fabs.f64(double %x)
55-
%neg = fsub double 0.0, %abs
56-
%y = call double @llvm.floor.f64(double %neg) nounwind readnone
54+
%abs = call fast double @llvm.fabs.f64(double %x)
55+
%neg = fsub nsz double 0.0, %abs
56+
%y = call fast double @llvm.floor.f64(double %neg) nounwind readnone
5757
store double %y, double addrspace(1)* %out
5858
ret void
5959
}
@@ -62,7 +62,7 @@ define amdgpu_kernel void @ffloor_f64_neg_abs(double addrspace(1)* %out, double
6262
; CI: v_floor_f64_e32
6363
; CI: v_floor_f64_e32
6464
define amdgpu_kernel void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
65-
%y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone
65+
%y = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone
6666
store <2 x double> %y, <2 x double> addrspace(1)* %out
6767
ret void
6868
}
@@ -73,7 +73,7 @@ define amdgpu_kernel void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x do
7373
; CI: v_floor_f64_e32
7474
; CI-NOT: v_floor_f64_e32
7575
define amdgpu_kernel void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
76-
%y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
76+
%y = call fast <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
7777
store <3 x double> %y, <3 x double> addrspace(1)* %out
7878
ret void
7979
}
@@ -84,7 +84,7 @@ define amdgpu_kernel void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x do
8484
; CI: v_floor_f64_e32
8585
; CI: v_floor_f64_e32
8686
define amdgpu_kernel void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
87-
%y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
87+
%y = call fast <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
8888
store <4 x double> %y, <4 x double> addrspace(1)* %out
8989
ret void
9090
}
@@ -99,7 +99,7 @@ define amdgpu_kernel void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x do
9999
; CI: v_floor_f64_e32
100100
; CI: v_floor_f64_e32
101101
define amdgpu_kernel void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
102-
%y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone
102+
%y = call fast <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone
103103
store <8 x double> %y, <8 x double> addrspace(1)* %out
104104
ret void
105105
}
@@ -122,7 +122,7 @@ define amdgpu_kernel void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x do
122122
; CI: v_floor_f64_e32
123123
; CI: v_floor_f64_e32
124124
define amdgpu_kernel void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
125-
%y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone
125+
%y = call fast <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone
126126
store <16 x double> %y, <16 x double> addrspace(1)* %out
127127
ret void
128128
}

llvm/test/CodeGen/AMDGPU/fneg-combines.ll

+5-2
Original file line numberDiff line numberDiff line change
@@ -219,8 +219,11 @@ define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %
219219
; GCN-SAFE-DAG: v_mad_f32 [[A:v[0-9]+]],
220220
; GCN-SAFE-DAG: v_cmp_ngt_f32_e32 {{.*}}, [[A]]
221221
; GCN-SAFE-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, -[[A]]
222-
; GCN-NSZ-DAG: v_mac_f32_e32 [[C:v[0-9]+]],
223-
; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[C]]
222+
; GCN-NSZ-DAG: v_rcp_f32_e32 [[A:v[0-9]+]],
223+
; GCN-NSZ-DAG: v_mov_b32_e32 [[B:v[0-9]+]],
224+
; GCN-NSZ-DAG: v_mov_b32_e32 [[C:v[0-9]+]],
225+
; GCN-NSZ-DAG: v_mul_f32_e32 [[D:v[0-9]+]],
226+
; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[D]]
224227

225228
define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #0 {
226229
.entry:

llvm/test/CodeGen/PowerPC/fma-mutate.ll

+13-6
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,26 @@
33
; same as the FMA target register. The second one is legal. The third
44
; one doesn't fit the feeding-copy pattern.
55

6-
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=+vsx -disable-ppc-vsx-fma-mutation=false | FileCheck %s
6+
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx -disable-ppc-vsx-fma-mutation=false | FileCheck %s
77
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
88
target triple = "powerpc64-unknown-linux-gnu"
99

1010
declare double @llvm.sqrt.f64(double)
1111

12-
define double @foo3(double %a) nounwind {
13-
%r = call double @llvm.sqrt.f64(double %a)
14-
ret double %r
15-
16-
; CHECK: @foo3
12+
define double @foo3_fmf(double %a) nounwind {
13+
; CHECK: @foo3_fmf
1714
; CHECK-NOT: fmr
1815
; CHECK: xsmaddmdp
1916
; CHECK: xsmaddadp
17+
%r = call fast double @llvm.sqrt.f64(double %a)
18+
ret double %r
19+
}
20+
21+
define double @foo3_safe(double %a) nounwind {
22+
; CHECK: @foo3_safe
23+
; CHECK-NOT: fmr
24+
; CHECK: xssqrtdp
25+
%r = call double @llvm.sqrt.f64(double %a)
26+
ret double %r
2027
}
2128

llvm/test/CodeGen/PowerPC/fmf-propagation.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 | FileCheck %s --check-prefix=FMFDEBUG
44
; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s --check-prefix=FMF
55
; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG
6-
; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBAL
6+
; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL
77

88
; Test FP transforms using instruction/node-level fast-math-flags.
99
; We're also checking debug output to verify that FMF is propagated to the newly created nodes.

0 commit comments

Comments
 (0)