Skip to content

Commit 149d9a3

Browse files
authored
[ELF][LoongArch] -r: Synthesize R_LARCH_ALIGN at input section start (llvm#153935)
Similay to llvm@94655dc The difference is that in LoongArch, the ALIGN is synthesized when the alignment is >4, (instead of >=4), and the number of bytes inserted is `sec->addralign - 4`.
1 parent 6560adb commit 149d9a3

File tree

4 files changed

+277
-3
lines changed

4 files changed

+277
-3
lines changed

lld/ELF/Arch/LoongArch.cpp

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class LoongArch final : public TargetInfo {
3939
void relocate(uint8_t *loc, const Relocation &rel,
4040
uint64_t val) const override;
4141
bool relaxOnce(int pass) const override;
42+
bool synthesizeAlign(uint64_t &dot, InputSection *sec) override;
4243
RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
4344
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
4445
void finalizeRelax(int passes) const override;
@@ -48,6 +49,19 @@ class LoongArch final : public TargetInfo {
4849
void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
4950
bool tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
5051
const Relocation &rLo12, uint64_t secAddr) const;
52+
template <class ELFT, class RelTy>
53+
bool synthesizeAlignForInput(uint64_t &dot, InputSection *sec,
54+
Relocs<RelTy> rels);
55+
template <class ELFT, class RelTy>
56+
void finalizeSynthesizeAligns(uint64_t &dot, InputSection *sec,
57+
Relocs<RelTy> rels);
58+
template <class ELFT>
59+
bool synthesizeAlignAux(uint64_t &dot, InputSection *sec);
60+
61+
// The following two variables are used by synthesized ALIGN relocations.
62+
InputSection *baseSec = nullptr;
63+
// r_offset and r_addend pairs.
64+
SmallVector<std::pair<uint64_t, uint64_t>, 0> synthesizedAligns;
5165
};
5266
} // end anonymous namespace
5367

@@ -766,6 +780,117 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
766780
}
767781
}
768782

783+
// If the section alignment is > 4, advance `dot` to insert NOPs and synthesize
784+
// an ALIGN relocation. Otherwise, return false to use default handling.
785+
template <class ELFT, class RelTy>
786+
bool LoongArch::synthesizeAlignForInput(uint64_t &dot, InputSection *sec,
787+
Relocs<RelTy> rels) {
788+
if (!baseSec) {
789+
// Record the first input section with RELAX relocations. We will synthesize
790+
// ALIGN relocations here.
791+
for (auto rel : rels) {
792+
if (rel.getType(false) == R_LARCH_RELAX) {
793+
baseSec = sec;
794+
break;
795+
}
796+
}
797+
} else if (sec->addralign > 4) {
798+
// If the alignment is > 4 and the section does not start with an ALIGN
799+
// relocation, synthesize one.
800+
bool hasAlignRel = llvm::any_of(rels, [](const RelTy &rel) {
801+
return rel.r_offset == 0 && rel.getType(false) == R_LARCH_ALIGN;
802+
});
803+
if (!hasAlignRel) {
804+
synthesizedAligns.emplace_back(dot - baseSec->getVA(),
805+
sec->addralign - 4);
806+
dot += sec->addralign - 4;
807+
return true;
808+
}
809+
}
810+
return false;
811+
}
812+
813+
// Finalize the relocation section by appending synthesized ALIGN relocations
814+
// after processing all input sections.
815+
template <class ELFT, class RelTy>
816+
void LoongArch::finalizeSynthesizeAligns(uint64_t &dot, InputSection *sec,
817+
Relocs<RelTy> rels) {
818+
auto *f = cast<ObjFile<ELFT>>(baseSec->file);
819+
auto shdr = f->template getELFShdrs<ELFT>()[baseSec->relSecIdx];
820+
// Create a copy of InputSection.
821+
sec = make<InputSection>(*f, shdr, baseSec->name);
822+
auto *baseRelSec = cast<InputSection>(f->getSections()[baseSec->relSecIdx]);
823+
*sec = *baseRelSec;
824+
baseSec = nullptr;
825+
826+
// Allocate buffer for original and synthesized relocations in RELA format.
827+
// If CREL is used, OutputSection::finalizeNonAllocCrel will convert RELA to
828+
// CREL.
829+
auto newSize = rels.size() + synthesizedAligns.size();
830+
auto *relas = makeThreadLocalN<typename ELFT::Rela>(newSize);
831+
sec->size = newSize * sizeof(typename ELFT::Rela);
832+
sec->content_ = reinterpret_cast<uint8_t *>(relas);
833+
sec->type = SHT_RELA;
834+
// Copy original relocations to the new buffer, potentially converting CREL to
835+
// RELA.
836+
for (auto [i, r] : llvm::enumerate(rels)) {
837+
relas[i].r_offset = r.r_offset;
838+
relas[i].setSymbolAndType(r.getSymbol(0), r.getType(0), false);
839+
if constexpr (RelTy::HasAddend)
840+
relas[i].r_addend = r.r_addend;
841+
}
842+
// Append synthesized ALIGN relocations to the buffer.
843+
for (auto [i, r] : llvm::enumerate(synthesizedAligns)) {
844+
auto &rela = relas[rels.size() + i];
845+
rela.r_offset = r.first;
846+
rela.setSymbolAndType(0, R_LARCH_ALIGN, false);
847+
rela.r_addend = r.second;
848+
}
849+
synthesizedAligns.clear();
850+
// Replace the old relocation section with the new one in the output section.
851+
// addOrphanSections ensures that the output relocation section is processed
852+
// after osec.
853+
for (SectionCommand *cmd : sec->getParent()->commands) {
854+
auto *isd = dyn_cast<InputSectionDescription>(cmd);
855+
if (!isd)
856+
continue;
857+
for (auto *&isec : isd->sections)
858+
if (isec == baseRelSec)
859+
isec = sec;
860+
}
861+
}
862+
863+
template <class ELFT>
864+
bool LoongArch::synthesizeAlignAux(uint64_t &dot, InputSection *sec) {
865+
bool ret = false;
866+
if (sec) {
867+
invokeOnRelocs(*sec, ret = synthesizeAlignForInput<ELFT>, dot, sec);
868+
} else if (baseSec) {
869+
invokeOnRelocs(*baseSec, finalizeSynthesizeAligns<ELFT>, dot, sec);
870+
}
871+
return ret;
872+
}
873+
874+
// Without linker relaxation enabled for a particular relocatable file or
875+
// section, the assembler will not generate R_LARCH_ALIGN relocations for
876+
// alignment directives. This becomes problematic in a two-stage linking
877+
// process: ld -r a.o b.o -o ab.o; ld ab.o -o ab. This function synthesizes an
878+
// R_LARCH_ALIGN relocation at section start when needed.
879+
//
880+
// When called with an input section (`sec` is not null): If the section
881+
// alignment is > 4, advance `dot` to insert NOPs and synthesize an ALIGN
882+
// relocation.
883+
//
884+
// When called after all input sections are processed (`sec` is null): The
885+
// output relocation section is updated with all the newly synthesized ALIGN
886+
// relocations.
887+
bool LoongArch::synthesizeAlign(uint64_t &dot, InputSection *sec) {
888+
assert(ctx.arg.relocatable);
889+
if (ctx.arg.is64)
890+
return synthesizeAlignAux<ELF64LE>(dot, sec);
891+
return synthesizeAlignAux<ELF32LE>(dot, sec);
892+
}
893+
769894
static bool relaxable(ArrayRef<Relocation> relocs, size_t i) {
770895
return i + 1 < relocs.size() && relocs[i + 1].type == R_LARCH_RELAX;
771896
}

lld/ELF/LinkerScript.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,9 +1230,9 @@ bool LinkerScript::assignOffsets(OutputSection *sec) {
12301230
if (sec->firstInOverlay)
12311231
state->overlaySize = 0;
12321232

1233-
bool synthesizeAlign = ctx.arg.relocatable && ctx.arg.relax &&
1234-
(sec->flags & SHF_EXECINSTR) &&
1235-
ctx.arg.emachine == EM_RISCV;
1233+
bool synthesizeAlign =
1234+
ctx.arg.relocatable && ctx.arg.relax && (sec->flags & SHF_EXECINSTR) &&
1235+
(ctx.arg.emachine == EM_LOONGARCH || ctx.arg.emachine == EM_RISCV);
12361236
// We visited SectionsCommands from processSectionCommands to
12371237
// layout sections. Now, we visit SectionsCommands again to fix
12381238
// section offsets.

lld/ELF/OutputSections.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,8 @@ std::array<uint8_t, 4> OutputSection::getFiller(Ctx &ctx) {
899899
return {1, 0, 1, 0};
900900
return {0x13, 0, 0, 0};
901901
}
902+
if (ctx.arg.relocatable && ctx.arg.emachine == EM_LOONGARCH)
903+
return {0, 0, 0x40, 0x03};
902904
return ctx.target->trapInstr;
903905
}
904906

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
# REQUIRES: loongarch
2+
3+
## Test LA64.
4+
# RUN: rm -rf %t && split-file %s %t && cd %t
5+
# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.o
6+
# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax --defsym ELF64=1 b.s -o b.o
7+
# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax b1.s -o b1.o
8+
# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax c.s -o c.o
9+
# RUN: llvm-mc -filetype=obj -triple=loongarch64 d.s -o d.o
10+
11+
## No RELAX. Don't synthesize ALIGN.
12+
# RUN: ld.lld -r b.o d.o -o bd.ro
13+
# RUN: llvm-readelf -r bd.ro | FileCheck %s --check-prefix=NOREL
14+
15+
# NOREL: no relocations
16+
17+
# RUN: ld.lld -r b.o b.o a.o b.o b1.o c.o d.o -o out.ro
18+
# RUN: llvm-objdump -dr --no-show-raw-insn out.ro | FileCheck %s
19+
# RUN: llvm-readelf -r out.ro | FileCheck %s --check-prefix=CHECK-REL
20+
21+
# CHECK: <b0>:
22+
# CHECK-NEXT: 0: addi.d $a0, $a1, 1
23+
# CHECK-NEXT: 4: nop
24+
# CHECK-EMPTY:
25+
# CHECK-NEXT: <b0>:
26+
# CHECK-NEXT: 8: addi.d $a0, $a1, 1
27+
# CHECK-EMPTY:
28+
# CHECK-NEXT: <_start>:
29+
# CHECK-NEXT: c: pcalau12i $a0, 0
30+
# CHECK-NEXT: 000000000000000c: R_LARCH_PCALA_HI20 .Ltext1_start
31+
# CHECK-NEXT: 000000000000000c: R_LARCH_RELAX *ABS*
32+
# CHECK-NEXT: 10: addi.d $a0, $a0, 0
33+
# CHECK-NEXT: 0000000000000010: R_LARCH_PCALA_LO12 .Ltext1_start
34+
# CHECK-NEXT: 0000000000000010: R_LARCH_RELAX *ABS*
35+
# CHECK-NEXT: 14: nop
36+
# CHECK-NEXT: 0000000000000014: R_LARCH_ALIGN *ABS*+0x4
37+
# CHECK-EMPTY:
38+
# CHECK-NEXT: <b0>:
39+
# CHECK-NEXT: 18: addi.d $a0, $a1, 1
40+
# CHECK-NEXT: 1c: nop
41+
# CHECK-NEXT: 20: nop
42+
# CHECK-NEXT: 0000000000000020: R_LARCH_ALIGN *ABS*+0x4
43+
# CHECK-NEXT: 24: nop
44+
# CHECK-EMPTY:
45+
# CHECK-NEXT: <b1>:
46+
# CHECK-NEXT: 28: addi.d $a0, $a1, 3
47+
# CHECK-EMPTY:
48+
# CHECK-NEXT: <c0>:
49+
# CHECK-NEXT: 2c: addi.d $a0, $a1, 4
50+
# CHECK-NEXT: 30: nop
51+
# CHECK-NEXT: 0000000000000030: R_LARCH_ALIGN *ABS*+0x4
52+
# CHECK-EMPTY:
53+
# CHECK-NEXT: <d0>:
54+
# CHECK-NEXT: 34: addi.d $a0, $a1, 5
55+
56+
# CHECK-REL: Relocation section '.rela.text' at offset {{.*}} contains 7 entries:
57+
# CHECK-REL: Relocation section '.rela.text1' at offset {{.*}} contains 5 entries:
58+
59+
## Test LA32.
60+
# RUN: llvm-mc -filetype=obj -triple=loongarch32 -mattr=+relax a.s -o a.32.o
61+
# RUN: llvm-mc -filetype=obj -triple=loongarch32 -mattr=+relax b.s -o b.32.o
62+
# RUN: ld.lld -r a.32.o b.32.o -o out.32.ro
63+
# RUN: ld.lld -Ttext=0x10000 out.32.ro -o out32
64+
# RUN: llvm-objdump -dr --no-show-raw-insn out32 | FileCheck %s --check-prefix=CHECK32
65+
66+
# CHECK32: <_start>:
67+
# CHECK32-NEXT: 10000: pcaddi $a0, 4
68+
# CHECK32-NEXT: 10004: nop
69+
# CHECK32-EMPTY:
70+
# CHECK32-NEXT: <b0>:
71+
# CHECK32-NEXT: 10008: addi.w $a0, $a1, 1
72+
# CHECK32: <.Ltext1_start>:
73+
# CHECK32-NEXT: 10010: pcaddi $a1, 0
74+
# CHECK32-NEXT: 10014: nop
75+
# CHECK32-NEXT: 10018: addi.w $a0, $a1, 2
76+
77+
## Test CREL.
78+
# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax --crel a.s -o acrel.o
79+
# RUN: ld.lld -r acrel.o b.o -o out.crel.ro
80+
# RUN: llvm-objdump -dr --no-show-raw-insn out.crel.ro | FileCheck %s --check-prefix=CHECKC
81+
82+
# CHECKC: <_start>:
83+
# CHECKC-NEXT: 0: pcalau12i $a0, 0
84+
# CHECKC-NEXT: 0000000000000000: R_LARCH_PCALA_HI20 .Ltext1_start
85+
# CHECKC-NEXT: 0000000000000000: R_LARCH_RELAX *ABS*
86+
# CHECKC-NEXT: 4: addi.d $a0, $a0, 0
87+
# CHECKC-NEXT: 0000000000000004: R_LARCH_PCALA_LO12 .Ltext1_start
88+
# CHECKC-NEXT: 0000000000000004: R_LARCH_RELAX *ABS*
89+
# CHECKC-NEXT: 8: nop
90+
# CHECKC-NEXT: 0000000000000008: R_LARCH_ALIGN *ABS*+0x4
91+
# CHECKC-EMPTY:
92+
# CHECKC-NEXT: <b0>:
93+
# CHECKC-NEXT: c: addi.d $a0, $a1, 1
94+
95+
#--- a.s
96+
.globl _start
97+
_start:
98+
la.pcrel $a0, .Ltext1_start
99+
100+
.section .text1,"ax"
101+
.Ltext1_start:
102+
la.pcrel $a1, .Ltext1_start
103+
104+
#--- b.s
105+
.macro addi dst, src1, src2
106+
.ifdef ELF64
107+
addi.d \dst, \src1, \src2
108+
.else
109+
addi.w \dst, \src1, \src2
110+
.endif
111+
.endm
112+
113+
## Needs synthesized ALIGN.
114+
.option push
115+
.option norelax
116+
.balign 8
117+
b0:
118+
addi $a0, $a1, 1
119+
120+
.section .text1,"ax"
121+
.balign 8
122+
addi $a0, $a1, 2
123+
124+
.option pop
125+
126+
#--- b1.s
127+
# Starts with an ALIGN relocation, don't need synthesized ALIGN.
128+
.option push
129+
.option norelax
130+
.reloc ., R_LARCH_ALIGN, 4
131+
nop
132+
.balign 8
133+
b1:
134+
addi.d $a0, $a1, 3
135+
.option pop
136+
137+
#--- c.s
138+
## Alignment == 4, don't need synthesized ALIGN.
139+
.balign 4
140+
c0:
141+
addi.d $a0, $a1, 4
142+
143+
#--- d.s
144+
## Needs synthesized ALIGN.
145+
.balign 8
146+
d0:
147+
addi.d $a0, $a1, 5

0 commit comments

Comments
 (0)