Skip to content

Commit 5ec1845

Browse files
committed
[AArch64][GlobalISel] Add a new reassociation for G_PTR_ADDs.
G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C) Improves CTMark -Os on AArch64: Program before after diff sqlite3 286932 287024 0.0% kc 432512 432508 -0.0% SPASS 412788 412764 -0.0% pairlocalalign 249460 249416 -0.0% bullet 475740 475512 -0.0% 7zip-benchmark 568864 568356 -0.1% consumer-typeset 419088 418648 -0.1% tramp3d-v4 367628 367224 -0.1% clamscan 383184 382732 -0.1% lencod 430028 429284 -0.2% Geomean difference -0.1% Differential Revision: https://reviews.llvm.org/D109528
1 parent 1ac209e commit 5ec1845

File tree

4 files changed

+217
-87
lines changed

4 files changed

+217
-87
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,14 @@ class CombinerHelper {
577577
/// Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width
578578
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo);
579579

580+
// Helpers for reassociation:
581+
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS,
582+
BuildFnTy &MatchInfo);
583+
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS,
584+
MachineInstr *RHS,
585+
BuildFnTy &MatchInfo);
586+
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS,
587+
MachineInstr *RHS, BuildFnTy &MatchInfo);
580588
/// Reassociate pointer calculations with G_ADD involved, to allow better
581589
/// addressing mode usage.
582590
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo);

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 102 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -4090,9 +4090,91 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(
40904090
return false;
40914091
}
40924092

4093-
bool CombinerHelper::matchReassocPtrAdd(
4094-
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4095-
assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
4093+
bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI,
4094+
MachineInstr *RHS,
4095+
BuildFnTy &MatchInfo) {
4096+
// G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4097+
Register Src1Reg = MI.getOperand(1).getReg();
4098+
if (RHS->getOpcode() != TargetOpcode::G_ADD)
4099+
return false;
4100+
auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4101+
if (!C2)
4102+
return false;
4103+
4104+
MatchInfo = [=, &MI](MachineIRBuilder &B) {
4105+
LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4106+
4107+
auto NewBase =
4108+
Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
4109+
Observer.changingInstr(MI);
4110+
MI.getOperand(1).setReg(NewBase.getReg(0));
4111+
MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4112+
Observer.changedInstr(MI);
4113+
};
4114+
return !reassociationCanBreakAddressingModePattern(MI);
4115+
}
4116+
4117+
bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI,
4118+
MachineInstr *LHS,
4119+
MachineInstr *RHS,
4120+
BuildFnTy &MatchInfo) {
4121+
// G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
4122+
// if and only if (G_PTR_ADD X, C) has one use.
4123+
Register LHSBase;
4124+
Register LHSCstOff;
4125+
if (!mi_match(MI.getBaseReg(), MRI,
4126+
m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_ICst(LHSCstOff)))))
4127+
return false;
4128+
4129+
auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
4130+
MatchInfo = [=, &MI](MachineIRBuilder &B) {
4131+
// When we change LHSPtrAdd's offset register we might cause it to use a reg
4132+
// before its def. Sink the instruction so the outer PTR_ADD to ensure this
4133+
// doesn't happen.
4134+
LHSPtrAdd->moveBefore(&MI);
4135+
Register RHSReg = MI.getOffsetReg();
4136+
Observer.changingInstr(MI);
4137+
MI.getOperand(2).setReg(LHSCstOff);
4138+
Observer.changedInstr(MI);
4139+
Observer.changingInstr(*LHSPtrAdd);
4140+
LHSPtrAdd->getOperand(2).setReg(RHSReg);
4141+
Observer.changedInstr(*LHSPtrAdd);
4142+
};
4143+
return !reassociationCanBreakAddressingModePattern(MI);
4144+
}
4145+
4146+
bool CombinerHelper::matchReassocFoldConstantsInSubTree(GPtrAdd &MI,
4147+
MachineInstr *LHS,
4148+
MachineInstr *RHS,
4149+
BuildFnTy &MatchInfo) {
4150+
// G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4151+
auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
4152+
if (!LHSPtrAdd)
4153+
return false;
4154+
4155+
Register Src2Reg = MI.getOperand(2).getReg();
4156+
Register LHSSrc1 = LHSPtrAdd->getBaseReg();
4157+
Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
4158+
auto C1 = getConstantVRegVal(LHSSrc2, MRI);
4159+
if (!C1)
4160+
return false;
4161+
auto C2 = getConstantVRegVal(Src2Reg, MRI);
4162+
if (!C2)
4163+
return false;
4164+
4165+
MatchInfo = [=, &MI](MachineIRBuilder &B) {
4166+
auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
4167+
Observer.changingInstr(MI);
4168+
MI.getOperand(1).setReg(LHSSrc1);
4169+
MI.getOperand(2).setReg(NewCst.getReg(0));
4170+
Observer.changedInstr(MI);
4171+
};
4172+
return !reassociationCanBreakAddressingModePattern(MI);
4173+
}
4174+
4175+
bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI,
4176+
BuildFnTy &MatchInfo) {
4177+
auto &PtrAdd = cast<GPtrAdd>(MI);
40964178
// We're trying to match a few pointer computation patterns here for
40974179
// re-association opportunities.
40984180
// 1) Isolating a constant operand to be on the RHS, e.g.:
@@ -4101,49 +4183,26 @@ bool CombinerHelper::matchReassocPtrAdd(
41014183
// 2) Folding two constants in each sub-tree as long as such folding
41024184
// doesn't break a legal addressing mode.
41034185
// G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4104-
Register Src1Reg = MI.getOperand(1).getReg();
4105-
Register Src2Reg = MI.getOperand(2).getReg();
4106-
MachineInstr *LHS = MRI.getVRegDef(Src1Reg);
4107-
MachineInstr *RHS = MRI.getVRegDef(Src2Reg);
4108-
4109-
if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) {
4110-
// Try to match example 1).
4111-
if (RHS->getOpcode() != TargetOpcode::G_ADD)
4112-
return false;
4113-
auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4114-
if (!C2)
4115-
return false;
4186+
//
4187+
// 3) Move a constant from the LHS of an inner op to the RHS of the outer.
4188+
// G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
4189+
// iif (G_PTR_ADD X, C) has one use.
4190+
MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
4191+
MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
4192+
4193+
// Try to match example 2.
4194+
if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
4195+
return true;
41164196

4117-
MatchInfo = [=,&MI](MachineIRBuilder &B) {
4118-
LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4197+
// Try to match example 3.
4198+
if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
4199+
return true;
41194200

4120-
auto NewBase =
4121-
Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
4122-
Observer.changingInstr(MI);
4123-
MI.getOperand(1).setReg(NewBase.getReg(0));
4124-
MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4125-
Observer.changedInstr(MI);
4126-
};
4127-
} else {
4128-
// Try to match example 2.
4129-
Register LHSSrc1 = LHS->getOperand(1).getReg();
4130-
Register LHSSrc2 = LHS->getOperand(2).getReg();
4131-
auto C1 = getConstantVRegVal(LHSSrc2, MRI);
4132-
if (!C1)
4133-
return false;
4134-
auto C2 = getConstantVRegVal(Src2Reg, MRI);
4135-
if (!C2)
4136-
return false;
4201+
// Try to match example 1.
4202+
if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
4203+
return true;
41374204

4138-
MatchInfo = [=, &MI](MachineIRBuilder &B) {
4139-
auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
4140-
Observer.changingInstr(MI);
4141-
MI.getOperand(1).setReg(LHSSrc1);
4142-
MI.getOperand(2).setReg(NewCst.getReg(0));
4143-
Observer.changedInstr(MI);
4144-
};
4145-
}
4146-
return !reassociationCanBreakAddressingModePattern(MI);
4205+
return false;
41474206
}
41484207

41494208
bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {

llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,3 +184,82 @@ body: |
184184
G_STORE %ptr_to_int(s64), %10(p0) :: (store 8)
185185
$w0 = COPY %7(s32)
186186
RET_ReallyLR implicit $w0
187+
...
188+
---
189+
name: reassoc_cst_inner_lhs
190+
alignment: 4
191+
tracksRegLiveness: true
192+
liveins:
193+
- { reg: '$w0' }
194+
- { reg: '$x1' }
195+
- { reg: '$x2' }
196+
- { reg: '$x3' }
197+
body: |
198+
bb.1:
199+
liveins: $w0, $x1, $x2, $x3
200+
201+
; CHECK-LABEL: name: reassoc_cst_inner_lhs
202+
; CHECK: liveins: $w0, $x1, $x2, $x3
203+
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
204+
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x3
205+
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
206+
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
207+
; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY1]], [[C1]](s64)
208+
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[SHL]](s64)
209+
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C]](s64)
210+
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
211+
; CHECK: $w0 = COPY [[LOAD]](s32)
212+
; CHECK: RET_ReallyLR
213+
%1:_(p0) = COPY $x1
214+
%2:_(p0) = COPY $x2
215+
%3:_(s64) = COPY $x3
216+
%8:_(s64) = G_CONSTANT i64 40
217+
%9:_(p0) = G_PTR_ADD %2, %8(s64)
218+
%10:_(s64) = G_CONSTANT i64 2
219+
%11:_(s64) = G_SHL %3, %10
220+
%12:_(p0) = G_PTR_ADD %9, %11(s64)
221+
%14:_(s32) = G_LOAD %12(p0) :: (load (s32))
222+
$w0 = COPY %14
223+
RET_ReallyLR
224+
225+
...
226+
---
227+
name: reassoc_cst_inner_lhs_multiuse
228+
alignment: 4
229+
tracksRegLiveness: true
230+
liveins:
231+
- { reg: '$w0' }
232+
- { reg: '$x1' }
233+
- { reg: '$x2' }
234+
- { reg: '$x3' }
235+
body: |
236+
bb.1:
237+
liveins: $w0, $x1, $x2, $x3
238+
239+
; CHECK-LABEL: name: reassoc_cst_inner_lhs_multiuse
240+
; CHECK: liveins: $w0, $x1, $x2, $x3
241+
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
242+
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x3
243+
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
244+
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
245+
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
246+
; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY1]], [[C1]](s64)
247+
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[SHL]](s64)
248+
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32))
249+
; CHECK: $w0 = COPY [[LOAD]](s32)
250+
; CHECK: $x0 = COPY [[PTR_ADD]](p0)
251+
; CHECK: RET_ReallyLR
252+
%1:_(p0) = COPY $x1
253+
%2:_(p0) = COPY $x2
254+
%3:_(s64) = COPY $x3
255+
%8:_(s64) = G_CONSTANT i64 40
256+
%9:_(p0) = G_PTR_ADD %2, %8(s64)
257+
%10:_(s64) = G_CONSTANT i64 2
258+
%11:_(s64) = G_SHL %3, %10
259+
%12:_(p0) = G_PTR_ADD %9, %11(s64)
260+
%14:_(s32) = G_LOAD %12(p0) :: (load (s32))
261+
$w0 = COPY %14
262+
$x0 = COPY %9
263+
RET_ReallyLR
264+
265+
...

0 commit comments

Comments
 (0)