Skip to content

Commit ffc48e3

Browse files
committed
Auto merge of rust-lang#115641 - durin42:llvm-18-fatlto-take-2, r=nikic
lto: load bitcode sections by name Upstream change llvm/llvm-project@6b539f5 changed `isSectionBitcode` works and it now only respects `.llvm.lto` sections instead of also `.llvmbc`, which it says was never intended to be used for LTO. We instead load sections by name, and sniff for raw bitcode by hand. This is an alternative approach to rust-lang#115136, where we tried the same thing using the `object` crate, but it got too fraught to continue. r? `@nikic` `@rustbot` label: +llvm-main
2 parents 26f4b72 + 0db6602 commit ffc48e3

File tree

4 files changed

+89
-16
lines changed

4 files changed

+89
-16
lines changed

compiler/rustc_codegen_llvm/src/back/lto.rs

+26-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
use crate::back::write::{self, save_temp_bitcode, CodegenDiagnosticsStage, DiagnosticHandlers};
1+
use crate::back::write::{
2+
self, bitcode_section_name, save_temp_bitcode, CodegenDiagnosticsStage, DiagnosticHandlers,
3+
};
24
use crate::errors::{
35
DynamicLinkingWithLTO, LlvmError, LtoBitcodeFromRlib, LtoDisallowed, LtoDylib,
46
};
@@ -120,6 +122,7 @@ fn prepare_lto(
120122
info!("adding bitcode from {}", name);
121123
match get_bitcode_slice_from_object_data(
122124
child.data(&*archive_data).expect("corrupt rlib"),
125+
cgcx,
123126
) {
124127
Ok(data) => {
125128
let module = SerializedModule::FromRlib(data.to_vec());
@@ -141,10 +144,29 @@ fn prepare_lto(
141144
Ok((symbols_below_threshold, upstream_modules))
142145
}
143146

144-
fn get_bitcode_slice_from_object_data(obj: &[u8]) -> Result<&[u8], LtoBitcodeFromRlib> {
147+
fn get_bitcode_slice_from_object_data<'a>(
148+
obj: &'a [u8],
149+
cgcx: &CodegenContext<LlvmCodegenBackend>,
150+
) -> Result<&'a [u8], LtoBitcodeFromRlib> {
151+
// We're about to assume the data here is an object file with sections, but if it's raw LLVM IR that
152+
// won't work. Fortunately, if that's what we have we can just return the object directly, so we sniff
153+
// the relevant magic strings here and return.
154+
if obj.starts_with(b"\xDE\xC0\x17\x0B") || obj.starts_with(b"BC\xC0\xDE") {
155+
return Ok(obj);
156+
}
157+
// We drop the "__LLVM," prefix here because on Apple platforms there's a notion of "segment name"
158+
// which in the public API for sections gets treated as part of the section name, but internally
159+
// in MachOObjectFile.cpp gets treated separately.
160+
let section_name = bitcode_section_name(cgcx).trim_start_matches("__LLVM,");
145161
let mut len = 0;
146-
let data =
147-
unsafe { llvm::LLVMRustGetBitcodeSliceFromObjectData(obj.as_ptr(), obj.len(), &mut len) };
162+
let data = unsafe {
163+
llvm::LLVMRustGetSliceFromObjectDataByName(
164+
obj.as_ptr(),
165+
obj.len(),
166+
section_name.as_ptr(),
167+
&mut len,
168+
)
169+
};
148170
if !data.is_null() {
149171
assert!(len != 0);
150172
let bc = unsafe { slice::from_raw_parts(data, len) };

compiler/rustc_codegen_llvm/src/back/write.rs

+24-12
Original file line numberDiff line numberDiff line change
@@ -873,6 +873,27 @@ fn create_section_with_flags_asm(section_name: &str, section_flags: &str, data:
873873
asm
874874
}
875875

876+
fn target_is_apple(cgcx: &CodegenContext<LlvmCodegenBackend>) -> bool {
877+
cgcx.opts.target_triple.triple().contains("-ios")
878+
|| cgcx.opts.target_triple.triple().contains("-darwin")
879+
|| cgcx.opts.target_triple.triple().contains("-tvos")
880+
|| cgcx.opts.target_triple.triple().contains("-watchos")
881+
}
882+
883+
fn target_is_aix(cgcx: &CodegenContext<LlvmCodegenBackend>) -> bool {
884+
cgcx.opts.target_triple.triple().contains("-aix")
885+
}
886+
887+
pub(crate) fn bitcode_section_name(cgcx: &CodegenContext<LlvmCodegenBackend>) -> &'static str {
888+
if target_is_apple(cgcx) {
889+
"__LLVM,__bitcode\0"
890+
} else if target_is_aix(cgcx) {
891+
".ipa\0"
892+
} else {
893+
".llvmbc\0"
894+
}
895+
}
896+
876897
/// Embed the bitcode of an LLVM module in the LLVM module itself.
877898
///
878899
/// This is done primarily for iOS where it appears to be standard to compile C
@@ -933,11 +954,8 @@ unsafe fn embed_bitcode(
933954
// Unfortunately, LLVM provides no way to set custom section flags. For ELF
934955
// and COFF we emit the sections using module level inline assembly for that
935956
// reason (see issue #90326 for historical background).
936-
let is_aix = cgcx.opts.target_triple.triple().contains("-aix");
937-
let is_apple = cgcx.opts.target_triple.triple().contains("-ios")
938-
|| cgcx.opts.target_triple.triple().contains("-darwin")
939-
|| cgcx.opts.target_triple.triple().contains("-tvos")
940-
|| cgcx.opts.target_triple.triple().contains("-watchos");
957+
let is_aix = target_is_aix(cgcx);
958+
let is_apple = target_is_apple(cgcx);
941959
if is_apple
942960
|| is_aix
943961
|| cgcx.opts.target_triple.triple().starts_with("wasm")
@@ -952,13 +970,7 @@ unsafe fn embed_bitcode(
952970
);
953971
llvm::LLVMSetInitializer(llglobal, llconst);
954972

955-
let section = if is_apple {
956-
"__LLVM,__bitcode\0"
957-
} else if is_aix {
958-
".ipa\0"
959-
} else {
960-
".llvmbc\0"
961-
};
973+
let section = bitcode_section_name(cgcx);
962974
llvm::LLVMSetSection(llglobal, section.as_ptr().cast());
963975
llvm::LLVMRustSetLinkage(llglobal, llvm::Linkage::PrivateLinkage);
964976
llvm::LLVMSetGlobalConstant(llglobal, llvm::True);

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

+6
Original file line numberDiff line numberDiff line change
@@ -2322,6 +2322,12 @@ extern "C" {
23222322
len: usize,
23232323
out_len: &mut usize,
23242324
) -> *const u8;
2325+
pub fn LLVMRustGetSliceFromObjectDataByName(
2326+
data: *const u8,
2327+
len: usize,
2328+
name: *const u8,
2329+
out_len: &mut usize,
2330+
) -> *const u8;
23252331

23262332
pub fn LLVMRustLinkerNew(M: &Module) -> &mut Linker<'_>;
23272333
pub fn LLVMRustLinkerAdd(

compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp

+33
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include <stdio.h>
22

3+
#include <cstddef>
34
#include <iomanip>
45
#include <vector>
56
#include <set>
@@ -1558,6 +1559,38 @@ LLVMRustGetBitcodeSliceFromObjectData(const char *data,
15581559
return BitcodeOrError->getBufferStart();
15591560
}
15601561

1562+
// Find a section of an object file by name. Fail if the section is missing or
1563+
// empty.
1564+
extern "C" const char *LLVMRustGetSliceFromObjectDataByName(const char *data,
1565+
size_t len,
1566+
const char *name,
1567+
size_t *out_len) {
1568+
*out_len = 0;
1569+
StringRef Data(data, len);
1570+
MemoryBufferRef Buffer(Data, ""); // The id is unused.
1571+
file_magic Type = identify_magic(Buffer.getBuffer());
1572+
Expected<std::unique_ptr<object::ObjectFile>> ObjFileOrError =
1573+
object::ObjectFile::createObjectFile(Buffer, Type);
1574+
if (!ObjFileOrError) {
1575+
LLVMRustSetLastError(toString(ObjFileOrError.takeError()).c_str());
1576+
return nullptr;
1577+
}
1578+
for (const object::SectionRef &Sec : (*ObjFileOrError)->sections()) {
1579+
Expected<StringRef> Name = Sec.getName();
1580+
if (Name && *Name == name) {
1581+
Expected<StringRef> SectionOrError = Sec.getContents();
1582+
if (!SectionOrError) {
1583+
LLVMRustSetLastError(toString(SectionOrError.takeError()).c_str());
1584+
return nullptr;
1585+
}
1586+
*out_len = SectionOrError->size();
1587+
return SectionOrError->data();
1588+
}
1589+
}
1590+
LLVMRustSetLastError("could not find requested section");
1591+
return nullptr;
1592+
}
1593+
15611594
// Computes the LTO cache key for the provided 'ModId' in the given 'Data',
15621595
// storing the result in 'KeyOut'.
15631596
// Currently, this cache key is a SHA-1 hash of anything that could affect

0 commit comments

Comments
 (0)