-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[ELF][LoongArch] -r: Synthesize R_LARCH_ALIGN at input section start #153935
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Similay to llvm@94655dc The difference is that in LoongArch, the ALIGN is synthesize when the alignment is >4, (instend of >=4), and the number of bytes inserted is `sec->addralign - 4`.
|
@llvm/pr-subscribers-backend-loongarch @llvm/pr-subscribers-lld Author: Zhaoxin Yang (ylzsx) ChangesSimilay to The difference is that in LoongArch, the ALIGN is synthesize when the alignment is >4, (instend of >=4), and the number of bytes inserted is Full diff: https://github.com/llvm/llvm-project/pull/153935.diff 4 Files Affected:
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 838ca4d242c7b..db2c71c3b42b9 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -39,6 +39,7 @@ class LoongArch final : public TargetInfo {
void relocate(uint8_t *loc, const Relocation &rel,
uint64_t val) const override;
bool relaxOnce(int pass) const override;
+ bool synthesizeAlign(uint64_t &dot, InputSection *sec) override;
RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
void finalizeRelax(int passes) const override;
@@ -48,6 +49,19 @@ class LoongArch final : public TargetInfo {
void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
bool tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
const Relocation &rLo12, uint64_t secAddr) const;
+ template <class ELFT, class RelTy>
+ bool synthesizeAlignForInput(uint64_t &dot, InputSection *sec,
+ Relocs<RelTy> rels);
+ template <class ELFT, class RelTy>
+ void finalizeSynthesizeAligns(uint64_t &dot, InputSection *sec,
+ Relocs<RelTy> rels);
+ template <class ELFT>
+ bool synthesizeAlignAux(uint64_t &dot, InputSection *sec);
+
+ // The following two variables are used by synthesized ALIGN relocations.
+ InputSection *baseSec = nullptr;
+ // r_offset and r_addend pairs.
+ SmallVector<std::pair<uint64_t, uint64_t>, 0> synthesizedAligns;
};
} // end anonymous namespace
@@ -766,6 +780,117 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
}
}
+// If the section alignment is > 4, advance `dot` to insert NOPs and synthesize
+// an ALIGN relocation. Otherwise, return false to use default handling.
+template <class ELFT, class RelTy>
+bool LoongArch::synthesizeAlignForInput(uint64_t &dot, InputSection *sec,
+ Relocs<RelTy> rels) {
+ if (!baseSec) {
+ // Record the first input section with RELAX relocations. We will synthesize
+ // ALIGN relocations here.
+ for (auto rel : rels) {
+ if (rel.getType(false) == R_LARCH_RELAX) {
+ baseSec = sec;
+ break;
+ }
+ }
+ } else if (sec->addralign > 4) {
+ // If the alignment is > 4 and the section does not start with an ALIGN
+ // relocation, synthesize one.
+ bool hasAlignRel = llvm::any_of(rels, [](const RelTy &rel) {
+ return rel.r_offset == 0 && rel.getType(false) == R_LARCH_ALIGN;
+ });
+ if (!hasAlignRel) {
+ synthesizedAligns.emplace_back(dot - baseSec->getVA(),
+ sec->addralign - 4);
+ dot += sec->addralign - 4;
+ return true;
+ }
+ }
+ return false;
+}
+
+// Finalize the relocation section by appending synthesized ALIGN relocations
+// after processing all input sections.
+template <class ELFT, class RelTy>
+void LoongArch::finalizeSynthesizeAligns(uint64_t &dot, InputSection *sec,
+ Relocs<RelTy> rels) {
+ auto *f = cast<ObjFile<ELFT>>(baseSec->file);
+ auto shdr = f->template getELFShdrs<ELFT>()[baseSec->relSecIdx];
+ // Create a copy of InputSection.
+ sec = make<InputSection>(*f, shdr, baseSec->name);
+ auto *baseRelSec = cast<InputSection>(f->getSections()[baseSec->relSecIdx]);
+ *sec = *baseRelSec;
+ baseSec = nullptr;
+
+ // Allocate buffer for original and synthesized relocations in RELA format.
+ // If CREL is used, OutputSection::finalizeNonAllocCrel will convert RELA to
+ // CREL.
+ auto newSize = rels.size() + synthesizedAligns.size();
+ auto *relas = makeThreadLocalN<typename ELFT::Rela>(newSize);
+ sec->size = newSize * sizeof(typename ELFT::Rela);
+ sec->content_ = reinterpret_cast<uint8_t *>(relas);
+ sec->type = SHT_RELA;
+ // Copy original relocations to the new buffer, potentially converting CREL to
+ // RELA.
+ for (auto [i, r] : llvm::enumerate(rels)) {
+ relas[i].r_offset = r.r_offset;
+ relas[i].setSymbolAndType(r.getSymbol(0), r.getType(0), false);
+ if constexpr (RelTy::HasAddend)
+ relas[i].r_addend = r.r_addend;
+ }
+ // Append synthesized ALIGN relocations to the buffer.
+ for (auto [i, r] : llvm::enumerate(synthesizedAligns)) {
+ auto &rela = relas[rels.size() + i];
+ rela.r_offset = r.first;
+ rela.setSymbolAndType(0, R_LARCH_ALIGN, false);
+ rela.r_addend = r.second;
+ }
+ synthesizedAligns.clear();
+ // Replace the old relocation section with the new one in the output section.
+ // addOrphanSections ensures that the output relocation section is processed
+ // after osec.
+ for (SectionCommand *cmd : sec->getParent()->commands) {
+ auto *isd = dyn_cast<InputSectionDescription>(cmd);
+ if (!isd)
+ continue;
+ for (auto *&isec : isd->sections)
+ if (isec == baseRelSec)
+ isec = sec;
+ }
+}
+
+template <class ELFT>
+bool LoongArch::synthesizeAlignAux(uint64_t &dot, InputSection *sec) {
+ bool ret = false;
+ if (sec) {
+ invokeOnRelocs(*sec, ret = synthesizeAlignForInput<ELFT>, dot, sec);
+ } else if (baseSec) {
+ invokeOnRelocs(*baseSec, finalizeSynthesizeAligns<ELFT>, dot, sec);
+ }
+ return ret;
+}
+
+// Without linker relaxation enabled for a particular relocatable file or
+// section, the assembler will not generate R_LARCH_ALIGN relocations for
+// alignment directives. This becomes problematic in a two-stage linking
+// process: ld -r a.o b.o -o ab.o; ld ab.o -o ab. This function synthesizes an
+// R_LARCH_ALIGN relocation at section start when needed.
+//
+// When called with an input section (`sec` is not null): If the section
+// alignment is > 4, advance `dot` to insert NOPs and synthesize an ALIGN
+// relocation.
+//
+// When called after all input sections are processed (`sec` is null): The
+// output relocation section is updated with all the newly synthesized ALIGN
+// relocations.
+bool LoongArch::synthesizeAlign(uint64_t &dot, InputSection *sec) {
+ assert(ctx.arg.relocatable);
+ if (ctx.arg.is64)
+ return synthesizeAlignAux<ELF64LE>(dot, sec);
+ return synthesizeAlignAux<ELF32LE>(dot, sec);
+}
+
static bool relaxable(ArrayRef<Relocation> relocs, size_t i) {
return i + 1 < relocs.size() && relocs[i + 1].type == R_LARCH_RELAX;
}
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index e9ce4e26f3ed9..7460fbdea2a91 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -1230,9 +1230,9 @@ bool LinkerScript::assignOffsets(OutputSection *sec) {
if (sec->firstInOverlay)
state->overlaySize = 0;
- bool synthesizeAlign = ctx.arg.relocatable && ctx.arg.relax &&
- (sec->flags & SHF_EXECINSTR) &&
- ctx.arg.emachine == EM_RISCV;
+ bool synthesizeAlign =
+ ctx.arg.relocatable && ctx.arg.relax && (sec->flags & SHF_EXECINSTR) &&
+ (ctx.arg.emachine == EM_RISCV || ctx.arg.emachine == EM_LOONGARCH);
// We visited SectionsCommands from processSectionCommands to
// layout sections. Now, we visit SectionsCommands again to fix
// section offsets.
diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index 0b50f6d68ce5d..9b97fe5bab2bd 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -899,6 +899,9 @@ std::array<uint8_t, 4> OutputSection::getFiller(Ctx &ctx) {
return {1, 0, 1, 0};
return {0x13, 0, 0, 0};
}
+ if (ctx.arg.relocatable && ctx.arg.emachine == EM_LOONGARCH) {
+ return {0, 0, 0x40, 0x03};
+ }
return ctx.target->trapInstr;
}
diff --git a/lld/test/ELF/loongarch-relocatable-align.s b/lld/test/ELF/loongarch-relocatable-align.s
new file mode 100644
index 0000000000000..747a58ed61eaa
--- /dev/null
+++ b/lld/test/ELF/loongarch-relocatable-align.s
@@ -0,0 +1,147 @@
+# REQUIRES: loongarch
+
+## Test LA64.
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.o
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax --defsym ELF64=1 b.s -o b.o
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax b1.s -o b1.o
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax c.s -o c.o
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 d.s -o d.o
+
+## No RELAX. Don't synthesize ALIGN.
+# RUN: ld.lld -r b.o d.o -o bd.ro
+# RUN: llvm-readelf -r bd.ro | FileCheck %s --check-prefix=NOREL
+
+# NOREL: no relocations
+
+# RUN: ld.lld -r b.o b.o a.o b.o b1.o c.o d.o -o out.ro
+# RUN: llvm-objdump -dr --no-show-raw-insn out.ro | FileCheck %s
+# RUN: llvm-readelf -r out.ro | FileCheck %s --check-prefix=CHECK-REL
+
+# CHECK: <b0>:
+# CHECK-NEXT: 0: addi.d $a0, $a1, 1
+# CHECK-NEXT: 4: nop
+# CHECK-EMPTY:
+# CHECK-NEXT: <b0>:
+# CHECK-NEXT: 8: addi.d $a0, $a1, 1
+# CHECK-EMPTY:
+# CHECK-NEXT: <_start>:
+# CHECK-NEXT: c: pcalau12i $a0, 0
+# CHECK-NEXT: 000000000000000c: R_LARCH_PCALA_HI20 .Ltext1_start
+# CHECK-NEXT: 000000000000000c: R_LARCH_RELAX *ABS*
+# CHECK-NEXT: 10: addi.d $a0, $a0, 0
+# CHECK-NEXT: 0000000000000010: R_LARCH_PCALA_LO12 .Ltext1_start
+# CHECK-NEXT: 0000000000000010: R_LARCH_RELAX *ABS*
+# CHECK-NEXT: 14: nop
+# CHECK-NEXT: 0000000000000014: R_LARCH_ALIGN *ABS*+0x4
+# CHECK-EMPTY:
+# CHECK-NEXT: <b0>:
+# CHECK-NEXT: 18: addi.d $a0, $a1, 1
+# CHECK-NEXT: 1c: nop
+# CHECK-NEXT: 20: nop
+# CHECK-NEXT: 0000000000000020: R_LARCH_ALIGN *ABS*+0x4
+# CHECK-NEXT: 24: nop
+# CHECK-EMPTY:
+# CHECK-NEXT: <b1>:
+# CHECK-NEXT: 28: addi.d $a0, $a1, 3
+# CHECK-EMPTY:
+# CHECK-NEXT: <c0>:
+# CHECK-NEXT: 2c: addi.d $a0, $a1, 4
+# CHECK-NEXT: 30: nop
+# CHECK-NEXT: 0000000000000030: R_LARCH_ALIGN *ABS*+0x4
+# CHECK-EMPTY:
+# CHECK-NEXT: <d0>:
+# CHECK-NEXT: 34: addi.d $a0, $a1, 5
+
+# CHECK-REL: Relocation section '.rela.text' at offset {{.*}} contains 7 entries:
+# CHECK-REL: Relocation section '.rela.text1' at offset {{.*}} contains 5 entries:
+
+## Test LA32.
+# RUN: llvm-mc -filetype=obj -triple=loongarch32 -mattr=+relax a.s -o a.32.o
+# RUN: llvm-mc -filetype=obj -triple=loongarch32 -mattr=+relax b.s -o b.32.o
+# RUN: ld.lld -r a.32.o b.32.o -o out.32.ro
+# RUN: ld.lld -Ttext=0x10000 out.32.ro -o out32
+# RUN: llvm-objdump -dr --no-show-raw-insn out32 | FileCheck %s --check-prefix=CHECK32
+
+# CHECK32: <_start>:
+# CHECK32-NEXT: 10000: pcaddi $a0, 4
+# CHECK32-NEXT: 10004: nop
+# CHECK32-EMPTY:
+# CHECK32-NEXT: <b0>:
+# CHECK32-NEXT: 10008: addi.w $a0, $a1, 1
+# CHECK32: <.Ltext1_start>:
+# CHECK32-NEXT: 10010: pcaddi $a1, 0
+# CHECK32-NEXT: 10014: nop
+# CHECK32-NEXT: 10018: addi.w $a0, $a1, 2
+
+## Test CREL.
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax --crel a.s -o acrel.o
+# RUN: ld.lld -r acrel.o b.o -o out.crel.ro
+# RUN: llvm-objdump -dr --no-show-raw-insn out.crel.ro | FileCheck %s --check-prefix=CHECKC
+
+# CHECKC: <_start>:
+# CHECKC-NEXT: 0: pcalau12i $a0, 0
+# CHECKC-NEXT: 0000000000000000: R_LARCH_PCALA_HI20 .Ltext1_start
+# CHECKC-NEXT: 0000000000000000: R_LARCH_RELAX *ABS*
+# CHECKC-NEXT: 4: addi.d $a0, $a0, 0
+# CHECKC-NEXT: 0000000000000004: R_LARCH_PCALA_LO12 .Ltext1_start
+# CHECKC-NEXT: 0000000000000004: R_LARCH_RELAX *ABS*
+# CHECKC-NEXT: 8: nop
+# CHECKC-NEXT: 0000000000000008: R_LARCH_ALIGN *ABS*+0x4
+# CHECKC-EMPTY:
+# CHECKC-NEXT: <b0>:
+# CHECKC-NEXT: c: addi.d $a0, $a1, 1
+
+#--- a.s
+.globl _start
+_start:
+ la.pcrel $a0, .Ltext1_start
+
+.section .text1,"ax"
+.Ltext1_start:
+ la.pcrel $a1, .Ltext1_start
+
+#--- b.s
+.macro addi dst, src1, src2
+.ifdef ELF64
+ addi.d \dst, \src1, \src2
+.else
+ addi.w \dst, \src1, \src2
+.endif
+.endm
+
+## Needs synthesized ALIGN.
+.option push
+.option norelax
+.balign 8
+b0:
+ addi $a0, $a1, 1
+
+.section .text1,"ax"
+.balign 8
+ addi $a0, $a1, 2
+
+.option pop
+
+#--- b1.s
+# Starts with an ALIGN relocation, don't need synthesized ALIGN.
+.option push
+.option norelax
+ .reloc ., R_LARCH_ALIGN, 4
+ nop
+.balign 8
+b1:
+ addi.d $a0, $a1, 3
+.option pop
+
+#--- c.s
+## Alignment == 4, don't need synthesized ALIGN.
+.balign 4
+c0:
+ addi.d $a0, $a1, 4
+
+#--- d.s
+## Needs synthesized ALIGN.
+.balign 8
+d0:
+ addi.d $a0, $a1, 5
|
| return {0x13, 0, 0, 0}; | ||
| } | ||
| if (ctx.arg.relocatable && ctx.arg.emachine == EM_LOONGARCH) { | ||
| return {0, 0, 0x40, 0x03}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
delete braces
SixWeining
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Similay to
94655dc
The difference is that in LoongArch, the ALIGN is synthesized when the alignment is >4, (instead of >=4), and the number of bytes inserted is
sec->addralign - 4.