Skip to content

Commit 8bb46b0

Browse files
committed
[SystemZ] Make better use of VGEF/VGEG
Current code in SystemZDAGToDAGISel::tryGather refuses to perform any transformation if the Load SDNode has more than one use. This (erronously) counts uses of the chain result, which prevents the optimization in many cases unnecessarily. Fixed by this patch. llvm-svn: 349748
1 parent 36a3480 commit 8bb46b0

File tree

2 files changed

+33
-1
lines changed

2 files changed

+33
-1
lines changed

llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1147,7 +1147,7 @@ bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
11471147
return false;
11481148

11491149
auto *Load = dyn_cast<LoadSDNode>(N->getOperand(1));
1150-
if (!Load || !Load->hasOneUse())
1150+
if (!Load || !Load->hasNUsesOfValue(1, 0))
11511151
return false;
11521152
if (Load->getMemoryVT().getSizeInBits() !=
11531153
Load->getValueType(0).getSizeInBits())

llvm/test/CodeGen/SystemZ/vec-move-08.ll

+32
Original file line numberDiff line numberDiff line change
@@ -442,3 +442,35 @@ define <2 x double> @f39(<2 x double> %val, <2 x i64> %index, i64 %base) {
442442
%ret = insertelement <2 x double> %val, double %element, i32 1
443443
ret <2 x double> %ret
444444
}
445+
446+
; Test a v4i32 gather where the load is chained.
447+
define void @f40(<4 x i32> %val, <4 x i32> %index, i64 %base, <4 x i32> *%res) {
448+
; CHECK-LABEL: f40:
449+
; CHECK: vgef %v24, 0(%v26,%r2), 1
450+
; CHECK: vst %v24, 0(%r3)
451+
; CHECK: br %r14
452+
%elem = extractelement <4 x i32> %index, i32 1
453+
%ext = zext i32 %elem to i64
454+
%add = add i64 %base, %ext
455+
%ptr = inttoptr i64 %add to i32 *
456+
%element = load i32, i32 *%ptr
457+
%ret = insertelement <4 x i32> %val, i32 %element, i32 1
458+
store <4 x i32> %ret, <4 x i32> *%res
459+
ret void
460+
}
461+
462+
; Test a v2i64 gather where the load is chained.
463+
define void @f41(<2 x i64> %val, <2 x i64> %index, i64 %base, <2 x i64> *%res) {
464+
; CHECK-LABEL: f41:
465+
; CHECK: vgeg %v24, 0(%v26,%r2), 1
466+
; CHECK: vst %v24, 0(%r3)
467+
; CHECK: br %r14
468+
%elem = extractelement <2 x i64> %index, i32 1
469+
%add = add i64 %base, %elem
470+
%ptr = inttoptr i64 %add to i64 *
471+
%element = load i64, i64 *%ptr
472+
%ret = insertelement <2 x i64> %val, i64 %element, i32 1
473+
store <2 x i64> %ret, <2 x i64> *%res
474+
ret void
475+
}
476+

0 commit comments

Comments
 (0)