Skip to content

Conversation

@ylzsx
Copy link
Contributor

@ylzsx ylzsx commented Aug 16, 2025

Similay to
94655dc

The difference is that in LoongArch, the ALIGN is synthesized when the alignment is >4, (instead of >=4), and the number of bytes inserted is sec->addralign - 4.

Similay to
llvm@94655dc

The difference is that in LoongArch, the ALIGN is synthesize when the
alignment is >4, (instend of >=4), and the number of bytes inserted is
`sec->addralign - 4`.
@llvmbot
Copy link
Member

llvmbot commented Aug 16, 2025

@llvm/pr-subscribers-backend-loongarch
@llvm/pr-subscribers-lld-elf

@llvm/pr-subscribers-lld

Author: Zhaoxin Yang (ylzsx)

Changes

Similay to
94655dc

The difference is that in LoongArch, the ALIGN is synthesize when the alignment is >4, (instend of >=4), and the number of bytes inserted is sec->addralign - 4.


Full diff: https://github.com/llvm/llvm-project/pull/153935.diff

4 Files Affected:

  • (modified) lld/ELF/Arch/LoongArch.cpp (+125)
  • (modified) lld/ELF/LinkerScript.cpp (+3-3)
  • (modified) lld/ELF/OutputSections.cpp (+3)
  • (added) lld/test/ELF/loongarch-relocatable-align.s (+147)
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 838ca4d242c7b..db2c71c3b42b9 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -39,6 +39,7 @@ class LoongArch final : public TargetInfo {
   void relocate(uint8_t *loc, const Relocation &rel,
                 uint64_t val) const override;
   bool relaxOnce(int pass) const override;
+  bool synthesizeAlign(uint64_t &dot, InputSection *sec) override;
   RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
   void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
   void finalizeRelax(int passes) const override;
@@ -48,6 +49,19 @@ class LoongArch final : public TargetInfo {
   void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
   bool tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
                      const Relocation &rLo12, uint64_t secAddr) const;
+  template <class ELFT, class RelTy>
+  bool synthesizeAlignForInput(uint64_t &dot, InputSection *sec,
+                               Relocs<RelTy> rels);
+  template <class ELFT, class RelTy>
+  void finalizeSynthesizeAligns(uint64_t &dot, InputSection *sec,
+                                Relocs<RelTy> rels);
+  template <class ELFT>
+  bool synthesizeAlignAux(uint64_t &dot, InputSection *sec);
+
+  // The following two variables are used by synthesized ALIGN relocations.
+  InputSection *baseSec = nullptr;
+  // r_offset and r_addend pairs.
+  SmallVector<std::pair<uint64_t, uint64_t>, 0> synthesizedAligns;
 };
 } // end anonymous namespace
 
@@ -766,6 +780,117 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
   }
 }
 
+// If the section alignment is > 4, advance `dot` to insert NOPs and synthesize
+// an ALIGN relocation. Otherwise, return false to use default handling.
+template <class ELFT, class RelTy>
+bool LoongArch::synthesizeAlignForInput(uint64_t &dot, InputSection *sec,
+                                        Relocs<RelTy> rels) {
+  if (!baseSec) {
+    // Record the first input section with RELAX relocations. We will synthesize
+    // ALIGN relocations here.
+    for (auto rel : rels) {
+      if (rel.getType(false) == R_LARCH_RELAX) {
+        baseSec = sec;
+        break;
+      }
+    }
+  } else if (sec->addralign > 4) {
+    // If the alignment is > 4 and the section does not start with an ALIGN
+    // relocation, synthesize one.
+    bool hasAlignRel = llvm::any_of(rels, [](const RelTy &rel) {
+      return rel.r_offset == 0 && rel.getType(false) == R_LARCH_ALIGN;
+    });
+    if (!hasAlignRel) {
+      synthesizedAligns.emplace_back(dot - baseSec->getVA(),
+                                     sec->addralign - 4);
+      dot += sec->addralign - 4;
+      return true;
+    }
+  }
+  return false;
+}
+
+// Finalize the relocation section by appending synthesized ALIGN relocations
+// after processing all input sections.
+template <class ELFT, class RelTy>
+void LoongArch::finalizeSynthesizeAligns(uint64_t &dot, InputSection *sec,
+                                         Relocs<RelTy> rels) {
+  auto *f = cast<ObjFile<ELFT>>(baseSec->file);
+  auto shdr = f->template getELFShdrs<ELFT>()[baseSec->relSecIdx];
+  // Create a copy of InputSection.
+  sec = make<InputSection>(*f, shdr, baseSec->name);
+  auto *baseRelSec = cast<InputSection>(f->getSections()[baseSec->relSecIdx]);
+  *sec = *baseRelSec;
+  baseSec = nullptr;
+
+  // Allocate buffer for original and synthesized relocations in RELA format.
+  // If CREL is used, OutputSection::finalizeNonAllocCrel will convert RELA to
+  // CREL.
+  auto newSize = rels.size() + synthesizedAligns.size();
+  auto *relas = makeThreadLocalN<typename ELFT::Rela>(newSize);
+  sec->size = newSize * sizeof(typename ELFT::Rela);
+  sec->content_ = reinterpret_cast<uint8_t *>(relas);
+  sec->type = SHT_RELA;
+  // Copy original relocations to the new buffer, potentially converting CREL to
+  // RELA.
+  for (auto [i, r] : llvm::enumerate(rels)) {
+    relas[i].r_offset = r.r_offset;
+    relas[i].setSymbolAndType(r.getSymbol(0), r.getType(0), false);
+    if constexpr (RelTy::HasAddend)
+      relas[i].r_addend = r.r_addend;
+  }
+  // Append synthesized ALIGN relocations to the buffer.
+  for (auto [i, r] : llvm::enumerate(synthesizedAligns)) {
+    auto &rela = relas[rels.size() + i];
+    rela.r_offset = r.first;
+    rela.setSymbolAndType(0, R_LARCH_ALIGN, false);
+    rela.r_addend = r.second;
+  }
+  synthesizedAligns.clear();
+  // Replace the old relocation section with the new one in the output section.
+  // addOrphanSections ensures that the output relocation section is processed
+  // after osec.
+  for (SectionCommand *cmd : sec->getParent()->commands) {
+    auto *isd = dyn_cast<InputSectionDescription>(cmd);
+    if (!isd)
+      continue;
+    for (auto *&isec : isd->sections)
+      if (isec == baseRelSec)
+        isec = sec;
+  }
+}
+
+template <class ELFT>
+bool LoongArch::synthesizeAlignAux(uint64_t &dot, InputSection *sec) {
+  bool ret = false;
+  if (sec) {
+    invokeOnRelocs(*sec, ret = synthesizeAlignForInput<ELFT>, dot, sec);
+  } else if (baseSec) {
+    invokeOnRelocs(*baseSec, finalizeSynthesizeAligns<ELFT>, dot, sec);
+  }
+  return ret;
+}
+
+// Without linker relaxation enabled for a particular relocatable file or
+// section, the assembler will not generate R_LARCH_ALIGN relocations for
+// alignment directives. This becomes problematic in a two-stage linking
+// process: ld -r a.o b.o -o ab.o; ld ab.o -o ab. This function synthesizes an
+// R_LARCH_ALIGN relocation at section start when needed.
+//
+// When called with an input section (`sec` is not null): If the section
+// alignment is > 4, advance `dot` to insert NOPs and synthesize an ALIGN
+// relocation.
+//
+// When called after all input sections are processed (`sec` is null): The
+// output relocation section is updated with all the newly synthesized ALIGN
+// relocations.
+bool LoongArch::synthesizeAlign(uint64_t &dot, InputSection *sec) {
+  assert(ctx.arg.relocatable);
+  if (ctx.arg.is64)
+    return synthesizeAlignAux<ELF64LE>(dot, sec);
+  return synthesizeAlignAux<ELF32LE>(dot, sec);
+}
+
 static bool relaxable(ArrayRef<Relocation> relocs, size_t i) {
   return i + 1 < relocs.size() && relocs[i + 1].type == R_LARCH_RELAX;
 }
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index e9ce4e26f3ed9..7460fbdea2a91 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -1230,9 +1230,9 @@ bool LinkerScript::assignOffsets(OutputSection *sec) {
   if (sec->firstInOverlay)
     state->overlaySize = 0;
 
-  bool synthesizeAlign = ctx.arg.relocatable && ctx.arg.relax &&
-                         (sec->flags & SHF_EXECINSTR) &&
-                         ctx.arg.emachine == EM_RISCV;
+  bool synthesizeAlign =
+      ctx.arg.relocatable && ctx.arg.relax && (sec->flags & SHF_EXECINSTR) &&
+      (ctx.arg.emachine == EM_RISCV || ctx.arg.emachine == EM_LOONGARCH);
   // We visited SectionsCommands from processSectionCommands to
   // layout sections. Now, we visit SectionsCommands again to fix
   // section offsets.
diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index 0b50f6d68ce5d..9b97fe5bab2bd 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -899,6 +899,9 @@ std::array<uint8_t, 4> OutputSection::getFiller(Ctx &ctx) {
       return {1, 0, 1, 0};
     return {0x13, 0, 0, 0};
   }
+  if (ctx.arg.relocatable && ctx.arg.emachine == EM_LOONGARCH) {
+    return {0, 0, 0x40, 0x03};
+  }
   return ctx.target->trapInstr;
 }
 
diff --git a/lld/test/ELF/loongarch-relocatable-align.s b/lld/test/ELF/loongarch-relocatable-align.s
new file mode 100644
index 0000000000000..747a58ed61eaa
--- /dev/null
+++ b/lld/test/ELF/loongarch-relocatable-align.s
@@ -0,0 +1,147 @@
+# REQUIRES: loongarch
+
+## Test LA64.
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.o
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax --defsym ELF64=1 b.s -o b.o
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax b1.s -o b1.o
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax c.s -o c.o
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 d.s -o d.o
+
+## No RELAX. Don't synthesize ALIGN.
+# RUN: ld.lld -r b.o d.o -o bd.ro
+# RUN: llvm-readelf -r bd.ro | FileCheck %s --check-prefix=NOREL
+
+# NOREL: no relocations
+
+# RUN: ld.lld -r b.o b.o a.o b.o b1.o c.o d.o -o out.ro
+# RUN: llvm-objdump -dr --no-show-raw-insn out.ro | FileCheck %s
+# RUN: llvm-readelf -r out.ro | FileCheck %s --check-prefix=CHECK-REL
+
+# CHECK:      <b0>:
+# CHECK-NEXT:   0:    addi.d $a0, $a1, 1
+# CHECK-NEXT:   4:    nop
+# CHECK-EMPTY:
+# CHECK-NEXT: <b0>:
+# CHECK-NEXT:   8:    addi.d $a0, $a1, 1
+# CHECK-EMPTY:
+# CHECK-NEXT: <_start>:
+# CHECK-NEXT:   c:    pcalau12i $a0, 0
+# CHECK-NEXT:           000000000000000c:  R_LARCH_PCALA_HI20   .Ltext1_start
+# CHECK-NEXT:           000000000000000c:  R_LARCH_RELAX        *ABS*
+# CHECK-NEXT:   10:   addi.d $a0, $a0, 0
+# CHECK-NEXT:           0000000000000010:  R_LARCH_PCALA_LO12   .Ltext1_start
+# CHECK-NEXT:           0000000000000010:  R_LARCH_RELAX        *ABS*
+# CHECK-NEXT:   14:   nop
+# CHECK-NEXT:           0000000000000014:  R_LARCH_ALIGN        *ABS*+0x4
+# CHECK-EMPTY:
+# CHECK-NEXT: <b0>:
+# CHECK-NEXT:   18:   addi.d $a0, $a1, 1
+# CHECK-NEXT:   1c:   nop
+# CHECK-NEXT:   20:   nop
+# CHECK-NEXT:           0000000000000020:  R_LARCH_ALIGN        *ABS*+0x4
+# CHECK-NEXT:   24:   nop
+# CHECK-EMPTY:
+# CHECK-NEXT: <b1>:
+# CHECK-NEXT:   28:   addi.d $a0, $a1, 3
+# CHECK-EMPTY:
+# CHECK-NEXT: <c0>:
+# CHECK-NEXT:   2c:   addi.d $a0, $a1, 4
+# CHECK-NEXT:   30:   nop
+# CHECK-NEXT:           0000000000000030:  R_LARCH_ALIGN        *ABS*+0x4
+# CHECK-EMPTY:
+# CHECK-NEXT: <d0>:
+# CHECK-NEXT:   34:   addi.d $a0, $a1, 5
+
+# CHECK-REL:  Relocation section '.rela.text' at offset {{.*}} contains 7 entries:
+# CHECK-REL:  Relocation section '.rela.text1' at offset {{.*}} contains 5 entries:
+
+## Test LA32.
+# RUN: llvm-mc -filetype=obj -triple=loongarch32 -mattr=+relax a.s -o a.32.o
+# RUN: llvm-mc -filetype=obj -triple=loongarch32 -mattr=+relax b.s -o b.32.o
+# RUN: ld.lld -r a.32.o b.32.o -o out.32.ro
+# RUN: ld.lld -Ttext=0x10000 out.32.ro -o out32
+# RUN: llvm-objdump -dr --no-show-raw-insn out32 | FileCheck %s --check-prefix=CHECK32
+
+# CHECK32:      <_start>:
+# CHECK32-NEXT:   10000:    pcaddi $a0, 4
+# CHECK32-NEXT:   10004:    nop
+# CHECK32-EMPTY:
+# CHECK32-NEXT: <b0>:
+# CHECK32-NEXT:   10008:    addi.w $a0, $a1, 1
+# CHECK32:      <.Ltext1_start>:
+# CHECK32-NEXT:   10010:    pcaddi $a1, 0
+# CHECK32-NEXT:   10014:    nop
+# CHECK32-NEXT:   10018:    addi.w $a0, $a1, 2
+
+## Test CREL.
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax --crel a.s -o acrel.o
+# RUN: ld.lld -r acrel.o b.o -o out.crel.ro
+# RUN: llvm-objdump -dr --no-show-raw-insn out.crel.ro | FileCheck %s --check-prefix=CHECKC
+
+# CHECKC:      <_start>:
+# CHECKC-NEXT:   0:    pcalau12i $a0, 0
+# CHECKC-NEXT:           0000000000000000:  R_LARCH_PCALA_HI20   .Ltext1_start
+# CHECKC-NEXT:           0000000000000000:  R_LARCH_RELAX        *ABS*
+# CHECKC-NEXT:   4:    addi.d $a0, $a0, 0
+# CHECKC-NEXT:           0000000000000004:  R_LARCH_PCALA_LO12   .Ltext1_start
+# CHECKC-NEXT:           0000000000000004:  R_LARCH_RELAX        *ABS*
+# CHECKC-NEXT:   8:    nop
+# CHECKC-NEXT:           0000000000000008:  R_LARCH_ALIGN        *ABS*+0x4
+# CHECKC-EMPTY:
+# CHECKC-NEXT: <b0>:
+# CHECKC-NEXT:   c:    addi.d $a0, $a1, 1
+
+#--- a.s
+.globl _start
+_start:
+  la.pcrel $a0, .Ltext1_start
+
+.section .text1,"ax"
+.Ltext1_start:
+  la.pcrel $a1, .Ltext1_start
+
+#--- b.s
+.macro addi dst, src1, src2
+.ifdef ELF64
+  addi.d \dst, \src1, \src2
+.else
+  addi.w \dst, \src1, \src2
+.endif
+.endm
+
+## Needs synthesized ALIGN.
+.option push
+.option norelax
+.balign 8
+b0:
+  addi $a0, $a1, 1
+
+.section .text1,"ax"
+.balign 8
+  addi $a0, $a1, 2
+
+.option pop
+
+#--- b1.s
+# Starts with an ALIGN relocation, don't need synthesized ALIGN.
+.option push
+.option norelax
+  .reloc ., R_LARCH_ALIGN, 4
+  nop
+.balign 8
+b1:
+  addi.d $a0, $a1, 3
+.option pop
+
+#--- c.s
+## Alignment == 4, don't need synthesized ALIGN.
+.balign 4
+c0:
+  addi.d $a0, $a1, 4
+
+#--- d.s
+## Needs synthesized ALIGN.
+.balign 8
+d0:
+  addi.d $a0, $a1, 5

return {0x13, 0, 0, 0};
}
if (ctx.arg.relocatable && ctx.arg.emachine == EM_LOONGARCH) {
return {0, 0, 0x40, 0x03};
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

delete braces

Copy link
Contributor

@SixWeining SixWeining left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@ylzsx ylzsx merged commit 149d9a3 into llvm:main Aug 22, 2025
9 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants