Skip to content

Commit c56ffed

Browse files
committed
[SystemZ] Bugfix in isFusableLoadOpStorePattern()
This function is responsible for checking the legality of fusing an instance of load -> op -> store into a single operation. In the SystemZ backend the check was incomplete and a test case emerged with a cycle in the instruction selection DAG as a result. Instead of using the NodeIds to determine node relationships, hasPredecessorHelper() now is used just like in the X86 backend. This handled the failing tests and as well gave a few additional transformations on benchmarks. The SystemZ isFusableLoadOpStorePattern() is now a very near copy of the X86 function, and it seems this could be made a utility function in common code instead. Review: Ulrich Weigand https://reviews.llvm.org/D60255 llvm-svn: 357688
1 parent 5776f66 commit c56ffed

File tree

2 files changed

+50
-15
lines changed

2 files changed

+50
-15
lines changed

llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp

+16-15
Original file line numberDiff line numberDiff line change
@@ -1275,6 +1275,9 @@ static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
12751275
InputChain = LoadNode->getChain();
12761276
} else if (Chain.getOpcode() == ISD::TokenFactor) {
12771277
SmallVector<SDValue, 4> ChainOps;
1278+
SmallVector<const SDNode *, 4> LoopWorklist;
1279+
SmallPtrSet<const SDNode *, 16> Visited;
1280+
const unsigned int Max = 1024;
12781281
for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
12791282
SDValue Op = Chain.getOperand(i);
12801283
if (Op == Load.getValue(1)) {
@@ -1283,28 +1286,26 @@ static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
12831286
ChainOps.push_back(Load.getOperand(0));
12841287
continue;
12851288
}
1286-
1287-
// Make sure using Op as part of the chain would not cause a cycle here.
1288-
// In theory, we could check whether the chain node is a predecessor of
1289-
// the load. But that can be very expensive. Instead visit the uses and
1290-
// make sure they all have smaller node id than the load.
1291-
int LoadId = LoadNode->getNodeId();
1292-
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
1293-
UE = UI->use_end(); UI != UE; ++UI) {
1294-
if (UI.getUse().getResNo() != 0)
1295-
continue;
1296-
if (UI->getNodeId() > LoadId)
1297-
return false;
1298-
}
1299-
1289+
LoopWorklist.push_back(Op.getNode());
13001290
ChainOps.push_back(Op);
13011291
}
13021292

1303-
if (ChainCheck)
1293+
if (ChainCheck) {
1294+
// Add the other operand of StoredVal to worklist.
1295+
for (SDValue Op : StoredVal->ops())
1296+
if (Op.getNode() != LoadNode)
1297+
LoopWorklist.push_back(Op.getNode());
1298+
1299+
// Check if Load is reachable from any of the nodes in the worklist.
1300+
if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
1301+
true))
1302+
return false;
1303+
13041304
// Make a new TokenFactor with all the other input chains except
13051305
// for the load.
13061306
InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),
13071307
MVT::Other, ChainOps);
1308+
}
13081309
}
13091310
if (!ChainCheck)
13101311
return false;
+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; Test that this test case does not abort after the folding of load -> add ->
2+
; store into an alsi. This folding is suppose to not happen as it would
3+
; introduce a loop in the DAG.
4+
;
5+
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -disable-basicaa -consthoist-gep | FileCheck %s
6+
7+
@g_295 = external dso_local unnamed_addr global i32, align 4
8+
@g_672 = external dso_local unnamed_addr global i64, align 8
9+
@g_1484 = external dso_local global <{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>, align 2
10+
11+
define void @fun() {
12+
; CHECK-LABEL: fun:
13+
14+
bb:
15+
br label %bb1
16+
17+
bb1: ; preds = %bb1, %bb
18+
store i32 2, i32* getelementptr inbounds (<{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>, <{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>* @g_1484, i64 0, i32 2, i32 16)
19+
%tmp = icmp slt i32 undef, 3
20+
br i1 %tmp, label %bb1, label %bb2
21+
22+
bb2: ; preds = %bb1
23+
%tmp3 = load i32, i32* getelementptr inbounds (<{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>, <{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>* @g_1484, i64 0, i32 2, i32 28)
24+
%tmp4 = load i64, i64* @g_672
25+
%tmp5 = add i64 %tmp4, 1
26+
store i64 %tmp5, i64* @g_672
27+
%tmp6 = icmp eq i64 %tmp5, 0
28+
%tmp7 = zext i1 %tmp6 to i32
29+
%tmp8 = icmp ult i32 %tmp3, %tmp7
30+
%tmp9 = zext i1 %tmp8 to i32
31+
store i32 %tmp9, i32* @g_295
32+
ret void
33+
}
34+

0 commit comments

Comments
 (0)