Skip to content

Commit 5a4f7cf

Browse files
committed
[IR] allow fast-math-flags on select of FP values
This is a minimal start to correcting a problem most directly discussed in PR38086: https://bugs.llvm.org/show_bug.cgi?id=38086 We have been hacking around a limitation for FP select patterns by using the fast-math-flags on the condition of the select rather than the select itself. This patch just allows FMF to appear with the 'select' opcode. No changes are needed to "FPMathOperator" because it already includes select-of-FP because that definition is based on the (return) value type. Once we have this ability, we can start correcting and adding IR transforms to use the FMF on a 'select' instruction. The instcombine and vectorizer test diffs only show that the IRBuilder change is behaving as expected by applying an FMF guard value to 'select'. For reference: rL241901 - allowed FMF with fcmp rL255555 - allowed FMF with FP calls Differential Revision: https://reviews.llvm.org/D61917 llvm-svn: 361401
1 parent 63305c8 commit 5a4f7cf

File tree

11 files changed

+92
-36
lines changed

11 files changed

+92
-36
lines changed

llvm/docs/LangRef.rst

+6-1
Original file line numberDiff line numberDiff line change
@@ -9931,7 +9931,7 @@ Syntax:
99319931

99329932
::
99339933

9934-
<result> = select selty <cond>, <ty> <val1>, <ty> <val2> ; yields ty
9934+
<result> = select [fast-math flags] selty <cond>, <ty> <val1>, <ty> <val2> ; yields ty
99359935

99369936
selty is either i1 or {<N x i1>}
99379937

@@ -9948,6 +9948,11 @@ The '``select``' instruction requires an 'i1' value or a vector of 'i1'
99489948
values indicating the condition, and two values of the same :ref:`first
99499949
class <t_firstclass>` type.
99509950

9951+
#. The optional ``fast-math flags`` marker indicates that the select has one or more
9952+
:ref:`fast-math flags <fastmath>`. These are optimization hints to enable
9953+
otherwise unsafe floating-point optimizations. Fast-math flags are only valid
9954+
for selects that return a floating-point scalar or vector type.
9955+
99519956
Semantics:
99529957
""""""""""
99539958

llvm/include/llvm/IR/IRBuilder.h

+2
Original file line numberDiff line numberDiff line change
@@ -2067,6 +2067,8 @@ class IRBuilder : public IRBuilderBase, public Inserter {
20672067
MDNode *Unpred = MDFrom->getMetadata(LLVMContext::MD_unpredictable);
20682068
Sel = addBranchMetadata(Sel, Prof, Unpred);
20692069
}
2070+
if (isa<FPMathOperator>(Sel))
2071+
Sel = cast<SelectInst>(setFPAttrs(Sel, nullptr /* MDNode* */, FMF));
20702072
return Insert(Sel, Name);
20712073
}
20722074

llvm/lib/AsmParser/LLParser.cpp

+13-1
Original file line numberDiff line numberDiff line change
@@ -5701,7 +5701,19 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
57015701
case lltok::kw_inttoptr:
57025702
case lltok::kw_ptrtoint: return ParseCast(Inst, PFS, KeywordVal);
57035703
// Other.
5704-
case lltok::kw_select: return ParseSelect(Inst, PFS);
5704+
case lltok::kw_select: {
5705+
FastMathFlags FMF = EatFastMathFlagsIfPresent();
5706+
int Res = ParseSelect(Inst, PFS);
5707+
if (Res != 0)
5708+
return Res;
5709+
if (FMF.any()) {
5710+
if (!Inst->getType()->isFPOrFPVectorTy())
5711+
return Error(Loc, "fast-math-flags specified for select without "
5712+
"floating-point scalar or vector return type");
5713+
Inst->setFastMathFlags(FMF);
5714+
}
5715+
return 0;
5716+
}
57055717
case lltok::kw_va_arg: return ParseVA_Arg(Inst, PFS);
57065718
case lltok::kw_extractelement: return ParseExtractElement(Inst, PFS);
57075719
case lltok::kw_insertelement: return ParseInsertElement(Inst, PFS);

llvm/lib/Bitcode/Reader/BitcodeReader.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -3835,6 +3835,11 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
38353835

38363836
I = SelectInst::Create(Cond, TrueVal, FalseVal);
38373837
InstructionList.push_back(I);
3838+
if (OpNum < Record.size() && isa<FPMathOperator>(I)) {
3839+
FastMathFlags FMF = getDecodedFastMathFlags(Record[OpNum]);
3840+
if (FMF.any())
3841+
I->setFastMathFlags(FMF);
3842+
}
38383843
break;
38393844
}
38403845

llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -2636,12 +2636,16 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
26362636
Vals.append(IVI->idx_begin(), IVI->idx_end());
26372637
break;
26382638
}
2639-
case Instruction::Select:
2639+
case Instruction::Select: {
26402640
Code = bitc::FUNC_CODE_INST_VSELECT;
26412641
pushValueAndType(I.getOperand(1), InstID, Vals);
26422642
pushValue(I.getOperand(2), InstID, Vals);
26432643
pushValueAndType(I.getOperand(0), InstID, Vals);
2644+
uint64_t Flags = getOptimizationFlags(&I);
2645+
if (Flags != 0)
2646+
Vals.push_back(Flags);
26442647
break;
2648+
}
26452649
case Instruction::ExtractElement:
26462650
Code = bitc::FUNC_CODE_INST_EXTRACTELT;
26472651
pushValueAndType(I.getOperand(0), InstID, Vals);

llvm/test/Bitcode/compatibility.ll

+28
Original file line numberDiff line numberDiff line change
@@ -815,6 +815,34 @@ define void @fastmathflags_binops(float %op1, float %op2) {
815815
ret void
816816
}
817817

818+
define void @fastmathflags_select(i1 %cond, float %op1, float %op2) {
819+
%f.nnan = select nnan i1 %cond, float %op1, float %op2
820+
; CHECK: %f.nnan = select nnan i1 %cond, float %op1, float %op2
821+
%f.ninf = select ninf i1 %cond, float %op1, float %op2
822+
; CHECK: %f.ninf = select ninf i1 %cond, float %op1, float %op2
823+
%f.nsz = select nsz i1 %cond, float %op1, float %op2
824+
; CHECK: %f.nsz = select nsz i1 %cond, float %op1, float %op2
825+
%f.arcp = select arcp i1 %cond, float %op1, float %op2
826+
; CHECK: %f.arcp = select arcp i1 %cond, float %op1, float %op2
827+
%f.contract = select contract i1 %cond, float %op1, float %op2
828+
; CHECK: %f.contract = select contract i1 %cond, float %op1, float %op2
829+
%f.afn = select afn i1 %cond, float %op1, float %op2
830+
; CHECK: %f.afn = select afn i1 %cond, float %op1, float %op2
831+
%f.reassoc = select reassoc i1 %cond, float %op1, float %op2
832+
; CHECK: %f.reassoc = select reassoc i1 %cond, float %op1, float %op2
833+
%f.fast = select fast i1 %cond, float %op1, float %op2
834+
; CHECK: %f.fast = select fast i1 %cond, float %op1, float %op2
835+
ret void
836+
}
837+
838+
define void @fastmathflags_vector_select(<2 x i1> %cond, <2 x double> %op1, <2 x double> %op2) {
839+
%f.nnan.nsz = select nnan nsz <2 x i1> %cond, <2 x double> %op1, <2 x double> %op2
840+
; CHECK: %f.nnan.nsz = select nnan nsz <2 x i1> %cond, <2 x double> %op1, <2 x double> %op2
841+
%f.fast = select fast <2 x i1> %cond, <2 x double> %op1, <2 x double> %op2
842+
; CHECK: %f.fast = select fast <2 x i1> %cond, <2 x double> %op1, <2 x double> %op2
843+
ret void
844+
}
845+
818846
; Check various fast math flags and floating-point types on calls.
819847

820848
declare float @fmf1()

llvm/test/CodeGen/Generic/expand-experimental-reductions.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ define double @fmax_f64(<2 x double> %vec) {
277277
; CHECK-NEXT: entry:
278278
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> undef, <2 x i32> <i32 1, i32 undef>
279279
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <2 x double> [[VEC]], [[RDX_SHUF]]
280-
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]]
280+
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select fast <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]]
281281
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX_SELECT]], i32 0
282282
; CHECK-NEXT: ret double [[TMP0]]
283283
;
@@ -291,7 +291,7 @@ define double @fmin_f64(<2 x double> %vec) {
291291
; CHECK-NEXT: entry:
292292
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> undef, <2 x i32> <i32 1, i32 undef>
293293
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <2 x double> [[VEC]], [[RDX_SHUF]]
294-
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]]
294+
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select fast <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]]
295295
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX_SELECT]], i32 0
296296
; CHECK-NEXT: ret double [[TMP0]]
297297
;

llvm/test/Transforms/InstCombine/fast-math.ll

+8-8
Original file line numberDiff line numberDiff line change
@@ -820,7 +820,7 @@ declare fp128 @fminl(fp128, fp128)
820820
define float @max1(float %a, float %b) {
821821
; CHECK-LABEL: @max1(
822822
; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast ogt float [[A:%.*]], [[B:%.*]]
823-
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
823+
; CHECK-NEXT: [[TMP2:%.*]] = select fast i1 [[TMP1]], float [[A]], float [[B]]
824824
; CHECK-NEXT: ret float [[TMP2]]
825825
;
826826
%c = fpext float %a to double
@@ -833,7 +833,7 @@ define float @max1(float %a, float %b) {
833833
define float @max2(float %a, float %b) {
834834
; CHECK-LABEL: @max2(
835835
; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz ogt float [[A:%.*]], [[B:%.*]]
836-
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
836+
; CHECK-NEXT: [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], float [[A]], float [[B]]
837837
; CHECK-NEXT: ret float [[TMP2]]
838838
;
839839
%c = call nnan float @fmaxf(float %a, float %b)
@@ -844,7 +844,7 @@ define float @max2(float %a, float %b) {
844844
define double @max3(double %a, double %b) {
845845
; CHECK-LABEL: @max3(
846846
; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast ogt double [[A:%.*]], [[B:%.*]]
847-
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], double [[A]], double [[B]]
847+
; CHECK-NEXT: [[TMP2:%.*]] = select fast i1 [[TMP1]], double [[A]], double [[B]]
848848
; CHECK-NEXT: ret double [[TMP2]]
849849
;
850850
%c = call fast double @fmax(double %a, double %b)
@@ -854,7 +854,7 @@ define double @max3(double %a, double %b) {
854854
define fp128 @max4(fp128 %a, fp128 %b) {
855855
; CHECK-LABEL: @max4(
856856
; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz ogt fp128 [[A:%.*]], [[B:%.*]]
857-
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], fp128 [[A]], fp128 [[B]]
857+
; CHECK-NEXT: [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], fp128 [[A]], fp128 [[B]]
858858
; CHECK-NEXT: ret fp128 [[TMP2]]
859859
;
860860
%c = call nnan fp128 @fmaxl(fp128 %a, fp128 %b)
@@ -865,7 +865,7 @@ define fp128 @max4(fp128 %a, fp128 %b) {
865865
define float @min1(float %a, float %b) {
866866
; CHECK-LABEL: @min1(
867867
; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz olt float [[A:%.*]], [[B:%.*]]
868-
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
868+
; CHECK-NEXT: [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], float [[A]], float [[B]]
869869
; CHECK-NEXT: ret float [[TMP2]]
870870
;
871871
%c = fpext float %a to double
@@ -878,7 +878,7 @@ define float @min1(float %a, float %b) {
878878
define float @min2(float %a, float %b) {
879879
; CHECK-LABEL: @min2(
880880
; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast olt float [[A:%.*]], [[B:%.*]]
881-
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
881+
; CHECK-NEXT: [[TMP2:%.*]] = select fast i1 [[TMP1]], float [[A]], float [[B]]
882882
; CHECK-NEXT: ret float [[TMP2]]
883883
;
884884
%c = call fast float @fminf(float %a, float %b)
@@ -888,7 +888,7 @@ define float @min2(float %a, float %b) {
888888
define double @min3(double %a, double %b) {
889889
; CHECK-LABEL: @min3(
890890
; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz olt double [[A:%.*]], [[B:%.*]]
891-
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], double [[A]], double [[B]]
891+
; CHECK-NEXT: [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], double [[A]], double [[B]]
892892
; CHECK-NEXT: ret double [[TMP2]]
893893
;
894894
%c = call nnan double @fmin(double %a, double %b)
@@ -898,7 +898,7 @@ define double @min3(double %a, double %b) {
898898
define fp128 @min4(fp128 %a, fp128 %b) {
899899
; CHECK-LABEL: @min4(
900900
; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast olt fp128 [[A:%.*]], [[B:%.*]]
901-
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], fp128 [[A]], fp128 [[B]]
901+
; CHECK-NEXT: [[TMP2:%.*]] = select fast i1 [[TMP1]], fp128 [[A]], fp128 [[B]]
902902
; CHECK-NEXT: ret fp128 [[TMP2]]
903903
;
904904
%c = call fast fp128 @fminl(fp128 %a, fp128 %b)

llvm/test/Transforms/InstCombine/pow-sqrt.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ define double @pow_libcall_half_approx(double %x) {
3636
; CHECK-NEXT: [[SQRT:%.*]] = call afn double @sqrt(double [[X:%.*]])
3737
; CHECK-NEXT: [[ABS:%.*]] = call afn double @llvm.fabs.f64(double [[SQRT]])
3838
; CHECK-NEXT: [[ISINF:%.*]] = fcmp afn oeq double [[X]], 0xFFF0000000000000
39-
; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]]
39+
; CHECK-NEXT: [[TMP1:%.*]] = select afn i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]]
4040
; CHECK-NEXT: ret double [[TMP1]]
4141
;
4242
%pow = call afn double @pow(double %x, double 5.0e-01)
@@ -48,7 +48,7 @@ define <2 x double> @pow_intrinsic_half_approx(<2 x double> %x) {
4848
; CHECK-NEXT: [[SQRT:%.*]] = call afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]])
4949
; CHECK-NEXT: [[ABS:%.*]] = call afn <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]])
5050
; CHECK-NEXT: [[ISINF:%.*]] = fcmp afn oeq <2 x double> [[X]], <double 0xFFF0000000000000, double 0xFFF0000000000000>
51-
; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[ISINF]], <2 x double> <double 0x7FF0000000000000, double 0x7FF0000000000000>, <2 x double> [[ABS]]
51+
; CHECK-NEXT: [[TMP1:%.*]] = select afn <2 x i1> [[ISINF]], <2 x double> <double 0x7FF0000000000000, double 0x7FF0000000000000>, <2 x double> [[ABS]]
5252
; CHECK-NEXT: ret <2 x double> [[TMP1]]
5353
;
5454
%pow = call afn <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 5.0e-01, double 5.0e-01>)
@@ -92,7 +92,7 @@ define double @pow_libcall_half_nsz(double %x) {
9292
; CHECK-LABEL: @pow_libcall_half_nsz(
9393
; CHECK-NEXT: [[SQRT:%.*]] = call nsz double @sqrt(double [[X:%.*]])
9494
; CHECK-NEXT: [[ISINF:%.*]] = fcmp nsz oeq double [[X]], 0xFFF0000000000000
95-
; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[SQRT]]
95+
; CHECK-NEXT: [[TMP1:%.*]] = select nsz i1 [[ISINF]], double 0x7FF0000000000000, double [[SQRT]]
9696
; CHECK-NEXT: ret double [[TMP1]]
9797
;
9898
%pow = call nsz double @pow(double %x, double 5.0e-01)
@@ -103,7 +103,7 @@ define double @pow_intrinsic_half_nsz(double %x) {
103103
; CHECK-LABEL: @pow_intrinsic_half_nsz(
104104
; CHECK-NEXT: [[SQRT:%.*]] = call nsz double @llvm.sqrt.f64(double [[X:%.*]])
105105
; CHECK-NEXT: [[ISINF:%.*]] = fcmp nsz oeq double [[X]], 0xFFF0000000000000
106-
; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[SQRT]]
106+
; CHECK-NEXT: [[TMP1:%.*]] = select nsz i1 [[ISINF]], double 0x7FF0000000000000, double [[SQRT]]
107107
; CHECK-NEXT: ret double [[TMP1]]
108108
;
109109
%pow = call nsz double @llvm.pow.f64(double %x, double 5.0e-01)

llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,10 @@ define float @minloopattr(float* nocapture readonly %arg) #0 {
7474
; CHECK: middle.block:
7575
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
7676
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <4 x float> [[TMP6]], [[RDX_SHUF]]
77-
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP6]], <4 x float> [[RDX_SHUF]]
77+
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP6]], <4 x float> [[RDX_SHUF]]
7878
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
7979
; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast olt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
80-
; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]]
80+
; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]]
8181
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0
8282
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 65536, 65536
8383
; CHECK-NEXT: br i1 [[CMP_N]], label [[OUT:%.*]], label [[SCALAR_PH]]

0 commit comments

Comments
 (0)