Skip to content

Commit 0ba8db4

Browse files
committed
Begin fleshing out an interface in TTI for modelling the costs of
generic function calls and intrinsics. This is somewhat overlapping with an existing intrinsic cost method, but that one seems targetted at vector intrinsics. I'll merge them or separate their names and use cases in a separate commit. This sinks the test of 'callIsSmall' down into TTI where targets can control it. The whole thing feels very hack-ish to me though. I've left a FIXME comment about the fundamental design problem this presents. It isn't yet clear to me what the users of this function *really* care about. I'll have to do more analysis to figure that out. Putting this here at least provides it access to proper analysis pass tools and other such. It also allows us to more cleanly implement the baseline cost interfaces in TTI. With this commit, it is now theoretically possible to simplify much of the inline cost analysis's handling of calls by calling through to this interface. That conversion will have to happen in subsequent commits as it requires more extensive restructuring of the inline cost analysis. The CodeMetrics class is now really only in the business of running over a block of code and aggregating the metrics on that block of code, with the actual cost evaluation done entirely in terms of TTI. llvm-svn: 173148
1 parent 1e63b08 commit 0ba8db4

File tree

5 files changed

+225
-66
lines changed

5 files changed

+225
-66
lines changed

Diff for: llvm/include/llvm/Analysis/TargetTransformInfo.h

+49
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,41 @@ class TargetTransformInfo {
117117
virtual unsigned getGEPCost(const Value *Ptr,
118118
ArrayRef<const Value *> Operands) const;
119119

120+
/// \brief Estimate the cost of a function call when lowered.
121+
///
122+
/// The contract for this is the same as \c getOperationCost except that it
123+
/// supports an interface that provides extra information specific to call
124+
/// instructions.
125+
///
126+
/// This is the most basic query for estimating call cost: it only knows the
127+
/// function type and (potentially) the number of arguments at the call site.
128+
/// The latter is only interesting for varargs function types.
129+
virtual unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const;
130+
131+
/// \brief Estimate the cost of calling a specific function when lowered.
132+
///
133+
/// This overload adds the ability to reason about the particular function
134+
/// being called in the event it is a library call with special lowering.
135+
virtual unsigned getCallCost(const Function *F, int NumArgs = -1) const;
136+
137+
/// \brief Estimate the cost of calling a specific function when lowered.
138+
///
139+
/// This overload allows specifying a set of candidate argument values.
140+
virtual unsigned getCallCost(const Function *F,
141+
ArrayRef<const Value *> Arguments) const;
142+
143+
/// \brief Estimate the cost of an intrinsic when lowered.
144+
///
145+
/// Mirrors the \c getCallCost method but uses an intrinsic identifier.
146+
virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
147+
ArrayRef<Type *> ParamTys) const;
148+
149+
/// \brief Estimate the cost of an intrinsic when lowered.
150+
///
151+
/// Mirrors the \c getCallCost method but uses an intrinsic identifier.
152+
virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
153+
ArrayRef<const Value *> Arguments) const;
154+
120155
/// \brief Estimate the cost of a given IR user when lowered.
121156
///
122157
/// This can estimate the cost of either a ConstantExpr or Instruction when
@@ -134,6 +169,20 @@ class TargetTransformInfo {
134169
/// comments for a detailed explanation of the cost values.
135170
virtual unsigned getUserCost(const User *U) const;
136171

172+
/// \brief Test whether calls to a function lower to actual program function
173+
/// calls.
174+
///
175+
/// The idea is to test whether the program is likely to require a 'call'
176+
/// instruction or equivalent in order to call the given function.
177+
///
178+
/// FIXME: It's not clear that this is a good or useful query API. Client's
179+
/// should probably move to simpler cost metrics using the above.
180+
/// Alternatively, we could split the cost interface into distinct code-size
181+
/// and execution-speed costs. This would allow modelling the core of this
182+
/// query more accurately as the a call is a single small instruction, but
183+
/// incurs significant execution cost.
184+
virtual bool isLoweredToCall(const Function *F) const;
185+
137186
/// @}
138187

139188
/// \name Scalar Target Information

Diff for: llvm/lib/Analysis/CodeMetrics.cpp

+4-44
Original file line numberDiff line numberDiff line change
@@ -20,41 +20,6 @@
2020

2121
using namespace llvm;
2222

23-
/// callIsSmall - If a call is likely to lower to a single target instruction,
24-
/// or is otherwise deemed small return true.
25-
/// TODO: Perhaps calls like memcpy, strcpy, etc?
26-
bool llvm::callIsSmall(ImmutableCallSite CS) {
27-
if (isa<IntrinsicInst>(CS.getInstruction()))
28-
return true;
29-
30-
const Function *F = CS.getCalledFunction();
31-
if (!F) return false;
32-
33-
if (F->hasLocalLinkage()) return false;
34-
35-
if (!F->hasName()) return false;
36-
37-
StringRef Name = F->getName();
38-
39-
// These will all likely lower to a single selection DAG node.
40-
if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
41-
Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
42-
Name == "sin" || Name == "sinf" || Name == "sinl" ||
43-
Name == "cos" || Name == "cosf" || Name == "cosl" ||
44-
Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" )
45-
return true;
46-
47-
// These are all likely to be optimized into something smaller.
48-
if (Name == "pow" || Name == "powf" || Name == "powl" ||
49-
Name == "exp2" || Name == "exp2l" || Name == "exp2f" ||
50-
Name == "floor" || Name == "floorf" || Name == "ceil" ||
51-
Name == "round" || Name == "ffs" || Name == "ffsl" ||
52-
Name == "abs" || Name == "labs" || Name == "llabs")
53-
return true;
54-
55-
return false;
56-
}
57-
5823
/// analyzeBasicBlock - Fill in the current structure with information gleaned
5924
/// from the specified block.
6025
void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
@@ -63,9 +28,6 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
6328
unsigned NumInstsBeforeThisBB = NumInsts;
6429
for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
6530
II != E; ++II) {
66-
if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&*II))
67-
continue;
68-
6931
// Special handling for calls.
7032
if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
7133
ImmutableCallSite CS(cast<Instruction>(II));
@@ -83,12 +45,10 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
8345
// for that case.
8446
if (F == BB->getParent())
8547
isRecursive = true;
86-
}
87-
88-
if (!callIsSmall(CS)) {
89-
// Each argument to a call takes on average one instruction to set up.
90-
NumInsts += CS.arg_size();
9148

49+
if (TTI.isLoweredToCall(F))
50+
++NumCalls;
51+
} else {
9252
// We don't want inline asm to count as a call - that would prevent loop
9353
// unrolling. The argument setup cost is still real, though.
9454
if (!isa<InlineAsm>(CS.getCalledValue()))
@@ -112,7 +72,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
11272
if (InvI->hasFnAttr(Attribute::NoDuplicate))
11373
notDuplicatable = true;
11474

115-
++NumInsts;
75+
NumInsts += TTI.getUserCost(&*II);
11676
}
11777

11878
if (isa<ReturnInst>(BB->getTerminator()))

Diff for: llvm/lib/Analysis/IPA/InlineCost.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
736736
return false;
737737
}
738738

739-
if (!callIsSmall(CS)) {
739+
if (TTI.isLoweredToCall(F)) {
740740
// We account for the average 1 instruction per call argument setup
741741
// here.
742742
Cost += CS.arg_size() * InlineConstants::InstrCost;

Diff for: llvm/lib/Analysis/TargetTransformInfo.cpp

+154-18
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/IR/Instruction.h"
1515
#include "llvm/IR/IntrinsicInst.h"
1616
#include "llvm/IR/Instructions.h"
17+
#include "llvm/Support/CallSite.h"
1718
#include "llvm/Support/ErrorHandling.h"
1819

1920
using namespace llvm;
@@ -58,10 +59,39 @@ unsigned TargetTransformInfo::getGEPCost(
5859
return PrevTTI->getGEPCost(Ptr, Operands);
5960
}
6061

62+
unsigned TargetTransformInfo::getCallCost(FunctionType *FTy,
63+
int NumArgs) const {
64+
return PrevTTI->getCallCost(FTy, NumArgs);
65+
}
66+
67+
unsigned TargetTransformInfo::getCallCost(const Function *F,
68+
int NumArgs) const {
69+
return PrevTTI->getCallCost(F, NumArgs);
70+
}
71+
72+
unsigned TargetTransformInfo::getCallCost(
73+
const Function *F, ArrayRef<const Value *> Arguments) const {
74+
return PrevTTI->getCallCost(F, Arguments);
75+
}
76+
77+
unsigned TargetTransformInfo::getIntrinsicCost(
78+
Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> ParamTys) const {
79+
return PrevTTI->getIntrinsicCost(IID, RetTy, ParamTys);
80+
}
81+
82+
unsigned TargetTransformInfo::getIntrinsicCost(
83+
Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const {
84+
return PrevTTI->getIntrinsicCost(IID, RetTy, Arguments);
85+
}
86+
6187
unsigned TargetTransformInfo::getUserCost(const User *U) const {
6288
return PrevTTI->getUserCost(U);
6389
}
6490

91+
bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
92+
return PrevTTI->isLoweredToCall(F);
93+
}
94+
6595
bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
6696
return PrevTTI->isLegalAddImmediate(Imm);
6797
}
@@ -179,6 +209,7 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
179209
virtual void initializePass() {
180210
// Note that this subclass is special, and must *not* call initializeTTI as
181211
// it does not chain.
212+
TopTTI = this;
182213
PrevTTI = 0;
183214
DL = getAnalysisIfAvailable<DataLayout>();
184215
}
@@ -257,6 +288,84 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
257288
return TCC_Free;
258289
}
259290

291+
unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const {
292+
assert(FTy && "FunctionType must be provided to this routine.");
293+
294+
// The target-independent implementation just measures the size of the
295+
// function by approximating that each argument will take on average one
296+
// instruction to prepare.
297+
298+
if (NumArgs < 0)
299+
// Set the argument number to the number of explicit arguments in the
300+
// function.
301+
NumArgs = FTy->getNumParams();
302+
303+
return TCC_Basic * (NumArgs + 1);
304+
}
305+
306+
unsigned getCallCost(const Function *F, int NumArgs = -1) const {
307+
assert(F && "A concrete function must be provided to this routine.");
308+
309+
if (NumArgs < 0)
310+
// Set the argument number to the number of explicit arguments in the
311+
// function.
312+
NumArgs = F->arg_size();
313+
314+
if (Intrinsic::ID IID = (Intrinsic::ID)F->getIntrinsicID()) {
315+
FunctionType *FTy = F->getFunctionType();
316+
SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
317+
return TopTTI->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys);
318+
}
319+
320+
if (!TopTTI->isLoweredToCall(F))
321+
return TCC_Basic; // Give a basic cost if it will be lowered directly.
322+
323+
return TopTTI->getCallCost(F->getFunctionType(), NumArgs);
324+
}
325+
326+
unsigned getCallCost(const Function *F,
327+
ArrayRef<const Value *> Arguments) const {
328+
// Simply delegate to generic handling of the call.
329+
// FIXME: We should use instsimplify or something else to catch calls which
330+
// will constant fold with these arguments.
331+
return TopTTI->getCallCost(F, Arguments.size());
332+
}
333+
334+
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
335+
ArrayRef<Type *> ParamTys) const {
336+
switch (IID) {
337+
default:
338+
// Intrinsics rarely (if ever) have normal argument setup constraints.
339+
// Model them as having a basic instruction cost.
340+
// FIXME: This is wrong for libc intrinsics.
341+
return TCC_Basic;
342+
343+
case Intrinsic::dbg_declare:
344+
case Intrinsic::dbg_value:
345+
case Intrinsic::invariant_start:
346+
case Intrinsic::invariant_end:
347+
case Intrinsic::lifetime_start:
348+
case Intrinsic::lifetime_end:
349+
case Intrinsic::objectsize:
350+
case Intrinsic::ptr_annotation:
351+
case Intrinsic::var_annotation:
352+
// These intrinsics don't actually represent code after lowering.
353+
return TCC_Free;
354+
}
355+
}
356+
357+
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
358+
ArrayRef<const Value *> Arguments) const {
359+
// Delegate to the generic intrinsic handling code. This mostly provides an
360+
// opportunity for targets to (for example) special case the cost of
361+
// certain intrinsics based on constants used as arguments.
362+
SmallVector<Type *, 8> ParamTys;
363+
ParamTys.reserve(Arguments.size());
364+
for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
365+
ParamTys.push_back(Arguments[Idx]->getType());
366+
return TopTTI->getIntrinsicCost(IID, RetTy, ParamTys);
367+
}
368+
260369
unsigned getUserCost(const User *U) const {
261370
if (isa<PHINode>(U))
262371
return TCC_Free; // Model all PHI nodes as free.
@@ -266,25 +375,21 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
266375
// folded into their uses via addressing modes.
267376
return GEP->hasAllConstantIndices() ? TCC_Free : TCC_Basic;
268377

269-
// If we have a call of an intrinsic we can provide more detailed analysis
270-
// by inspecting the particular intrinsic called.
271-
// FIXME: Hoist this out into a getIntrinsicCost routine.
272-
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
273-
switch (II->getIntrinsicID()) {
274-
default:
275-
return TCC_Basic;
276-
case Intrinsic::dbg_declare:
277-
case Intrinsic::dbg_value:
278-
case Intrinsic::invariant_start:
279-
case Intrinsic::invariant_end:
280-
case Intrinsic::lifetime_start:
281-
case Intrinsic::lifetime_end:
282-
case Intrinsic::objectsize:
283-
case Intrinsic::ptr_annotation:
284-
case Intrinsic::var_annotation:
285-
// These intrinsics don't count as size.
286-
return TCC_Free;
378+
if (ImmutableCallSite CS = U) {
379+
const Function *F = CS.getCalledFunction();
380+
if (!F) {
381+
// Just use the called value type.
382+
Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
383+
return TopTTI->getCallCost(cast<FunctionType>(FTy), CS.arg_size());
287384
}
385+
386+
SmallVector<const Value *, 8> Arguments;
387+
for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(),
388+
AE = CS.arg_end();
389+
AI != AE; ++AI)
390+
Arguments.push_back(*AI);
391+
392+
return TopTTI->getCallCost(F, Arguments);
288393
}
289394

290395
if (const CastInst *CI = dyn_cast<CastInst>(U)) {
@@ -301,6 +406,37 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
301406
U->getOperand(0)->getType() : 0);
302407
}
303408

409+
bool isLoweredToCall(const Function *F) const {
410+
// FIXME: These should almost certainly not be handled here, and instead
411+
// handled with the help of TLI or the target itself. This was largely
412+
// ported from existing analysis heuristics here so that such refactorings
413+
// can take place in the future.
414+
415+
if (F->isIntrinsic())
416+
return false;
417+
418+
if (F->hasLocalLinkage() || !F->hasName())
419+
return true;
420+
421+
StringRef Name = F->getName();
422+
423+
// These will all likely lower to a single selection DAG node.
424+
if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
425+
Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
426+
Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
427+
Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
428+
return false;
429+
430+
// These are all likely to be optimized into something smaller.
431+
if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
432+
Name == "exp2l" || Name == "exp2f" || Name == "floor" || Name ==
433+
"floorf" || Name == "ceil" || Name == "round" || Name == "ffs" ||
434+
Name == "ffsl" || Name == "abs" || Name == "labs" || Name == "llabs")
435+
return false;
436+
437+
return true;
438+
}
439+
304440
bool isLegalAddImmediate(int64_t Imm) const {
305441
return false;
306442
}

0 commit comments

Comments
 (0)