Skip to content

Commit 201b191

Browse files
author
Haicheng Wu
committed
Recommit "[InlineCost] Use TTI to check if GEP is free." llvm#3
This is the third attemp to recommit r292526. The original summary: Currently, a GEP is considered free only if its indices are all constant. TTI::getGEPCost() can give target-specific more accurate analysis. TTI is already used for the cost of many other instructions. llvm-svn: 292633
1 parent 91fb1f4 commit 201b191

File tree

3 files changed

+50
-2
lines changed

3 files changed

+50
-2
lines changed

llvm/lib/Analysis/InlineCost.cpp

+18-2
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
134134
void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
135135
int InstructionCost);
136136
bool isGEPOffsetConstant(GetElementPtrInst &GEP);
137+
bool isGEPFree(GetElementPtrInst &GEP);
137138
bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
138139
bool simplifyCallSite(Function *F, CallSite CS);
139140
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
@@ -331,6 +332,21 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
331332
return true;
332333
}
333334

335+
/// \brief Use TTI to check whether a GEP is free.
336+
///
337+
/// Respects any simplified values known during the analysis of this callsite.
338+
bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
339+
SmallVector<Value *, 4> Indices;
340+
for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
341+
if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
342+
Indices.push_back(SimpleOp);
343+
else
344+
Indices.push_back(*I);
345+
return TargetTransformInfo::TCC_Free ==
346+
TTI.getGEPCost(GEP.getSourceElementType(), GEP.getPointerOperand(),
347+
Indices);
348+
}
349+
334350
bool CallAnalyzer::visitAlloca(AllocaInst &I) {
335351
// Check whether inlining will turn a dynamic alloca into a static
336352
// alloca and handle that case.
@@ -396,7 +412,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
396412
// Non-constant GEPs aren't folded, and disable SROA.
397413
if (SROACandidate)
398414
disableSROA(CostIt);
399-
return false;
415+
return isGEPFree(I);
400416
}
401417

402418
// Add the result as a new mapping to Base + Offset.
@@ -422,7 +438,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
422438
// Variable GEPs will require math and will disable SROA.
423439
if (SROACandidate)
424440
disableSROA(CostIt);
425-
return false;
441+
return isGEPFree(I);
426442
}
427443

428444
bool CallAnalyzer::visitBitCast(BitCastInst &I) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; REQUIRES: asserts
2+
; RUN: opt -inline -mtriple=aarch64--linux-gnu -mcpu=kryo -S -debug-only=inline-cost < %s 2>&1 | FileCheck %s
3+
4+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
5+
target triple = "aarch64--linux-gnu"
6+
7+
define void @outer([4 x i32]* %ptr, i32 %i) {
8+
call void @inner1([4 x i32]* %ptr, i32 %i)
9+
call void @inner2([4 x i32]* %ptr, i32 %i)
10+
ret void
11+
}
12+
; The gep in inner1() is reg+reg, which is a legal addressing mode for AArch64.
13+
; Thus, both the gep and ret can be simplified.
14+
; CHECK: Analyzing call of inner1
15+
; CHECK: NumInstructionsSimplified: 2
16+
; CHECK: NumInstructions: 2
17+
define void @inner1([4 x i32]* %ptr, i32 %i) {
18+
%G = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i32 0, i32 %i
19+
ret void
20+
}
21+
22+
; The gep in inner2() is reg+imm+reg, which is not a legal addressing mode for
23+
; AArch64. Thus, only the ret can be simplified and not the gep.
24+
; CHECK: Analyzing call of inner2
25+
; CHECK: NumInstructionsSimplified: 1
26+
; CHECK: NumInstructions: 2
27+
define void @inner2([4 x i32]* %ptr, i32 %i) {
28+
%G = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i32 1, i32 %i
29+
ret void
30+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
if not 'AArch64' in config.root.targets:
2+
config.unsupported = True

0 commit comments

Comments
 (0)