Skip to content

Commit a1532ed

Browse files
committed
[InstrProfiling] Make CountersPtr in __profd_ relative
Change `CountersPtr` in `__profd_` to a label difference, which is a link-time constant. On ELF, when linking a shared object, this requires that `__profc_` is either private or linkonce/linkonce_odr hidden. On COFF, we need D104564 so that `.quad a-b` (64-bit label difference) can lower to a 32-bit PC-relative relocation. ``` # ELF: R_X86_64_PC64 (PC-relative) .quad .L__profc_foo-.L__profd_foo # Mach-O: a pair of 8-byte X86_64_RELOC_UNSIGNED and X86_64_RELOC_SUBTRACTOR .quad l___profc_foo-l___profd_foo # COFF: we actually use IMAGE_REL_AMD64_REL32/IMAGE_REL_ARM64_REL32 so # the high 32-bit value is zero even if .L__profc_foo < .L__profd_foo # As compensation, we truncate CountersDelta in the header so that # __llvm_profile_merge_from_buffer and llvm-profdata reader keep working. .quad .L__profc_foo-.L__profd_foo ``` (Note: link.exe sorts `.lprfc` before `.lprfd` even if the object writer has `.lprfd` before `.lprfc`, so we cannot work around by reordering `.lprfc` and `.lprfd`.) With this change, a stage 2 (`-DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_BUILD_INSTRUMENTED=IR`) `ld -pie` linked clang is 1.74% smaller due to fewer R_X86_64_RELATIVE relocations. ``` % readelf -r pie | awk '$3~/R.*/{s[$3]++} END {for (k in s) print k, s[k]}' R_X86_64_JUMP_SLO 331 R_X86_64_TPOFF64 2 R_X86_64_RELATIVE 476059 # was: 607712 R_X86_64_64 2616 R_X86_64_GLOB_DAT 31 ``` The absolute function address (used by llvm-profdata to collect indirect call targets) can be converted to relative as well, but is not done in this patch. Differential Revision: https://reviews.llvm.org/D104556
1 parent 69cdadd commit a1532ed

File tree

17 files changed

+78
-34
lines changed

17 files changed

+78
-34
lines changed

clang/test/Profile/c-linkage-available_externally.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
// RUN: %clang_cc1 -O2 -triple x86_64-apple-macosx10.9 -main-file-name c-linkage-available_externally.c %s -o - -emit-llvm -fprofile-instrument=clang | FileCheck %s
44

55
// CHECK: @__profc_foo = linkonce_odr hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
6-
// CHECK: @__profd_foo = linkonce_odr hidden global {{.*}} i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_foo, i32 0, i32 0){{.*}}, section "__DATA,__llvm_prf_data,regular,live_support", align 8
6+
// CHECK: @__profd_foo = linkonce_odr hidden global {{.*}} i64 sub (i64 ptrtoint ([1 x i64]* @__profc_foo to i64), i64 ptrtoint ({ i64, i64, i64, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i64)), {{.*}}, section "__DATA,__llvm_prf_data,regular,live_support", align 8
77
inline int foo(void) { return 1; }
88

99
int main(void) {

compiler-rt/include/profile/InstrProfData.inc

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,7 @@ INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), NameRef, \
7575
INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \
7676
ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \
7777
Inc->getHash()->getZExtValue()))
78-
INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt64PtrTy(Ctx), CounterPtr, \
79-
ConstantExpr::getBitCast(CounterPtr, \
80-
llvm::Type::getInt64PtrTy(Ctx)))
78+
INSTR_PROF_DATA(const IntPtrT, IntPtrTy, CounterPtr, RelativeCounterPtr)
8179
/* This is used to map function pointers for the indirect call targets to
8280
* function name hashes during the conversion from raw to merged profile
8381
* data.
@@ -134,7 +132,8 @@ INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCo
134132
INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
135133
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesAfterCounters, PaddingBytesAfterCounters)
136134
INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize)
137-
INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
135+
INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta,
136+
(uintptr_t)CountersBegin - (uintptr_t)DataBegin)
138137
INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
139138
INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
140139
INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))

compiler-rt/lib/profile/InstrProfilingMerge.c

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,14 @@ int __llvm_profile_check_compatibility(const char *ProfileData,
8181
return 0;
8282
}
8383

84+
static uintptr_t signextIfWin64(void *V) {
85+
#ifdef _WIN64
86+
return (uintptr_t)(int32_t)(uintptr_t)V;
87+
#else
88+
return (uintptr_t)V;
89+
#endif
90+
}
91+
8492
COMPILER_RT_VISIBILITY
8593
int __llvm_profile_merge_from_buffer(const char *ProfileData,
8694
uint64_t ProfileSize) {
@@ -89,6 +97,7 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData,
8997
uint64_t *SrcCountersStart;
9098
const char *SrcNameStart;
9199
const char *SrcValueProfDataStart, *SrcValueProfData;
100+
uintptr_t CountersDelta = Header->CountersDelta;
92101

93102
SrcDataStart =
94103
(__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header));
@@ -105,15 +114,30 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData,
105114
DstData = (__llvm_profile_data *)__llvm_profile_begin_data(),
106115
SrcValueProfData = SrcValueProfDataStart;
107116
SrcData < SrcDataEnd; ++SrcData, ++DstData) {
108-
uint64_t *DstCounters = (uint64_t *)DstData->CounterPtr;
117+
// For the in-memory destination, CounterPtr is the distance from the start
118+
// address of the data to the start address of the counter. On WIN64,
119+
// CounterPtr is a truncated 32-bit value due to COFF limitation. Sign
120+
// extend CounterPtr to get the original value.
121+
uint64_t *DstCounters =
122+
(uint64_t *)((uintptr_t)DstData + signextIfWin64(DstData->CounterPtr));
109123
unsigned NVK = 0;
110124

125+
// SrcData is a serialized representation of the memory image. We need to
126+
// compute the in-buffer counter offset from the in-memory address distance.
127+
// The initial CountersDelta is the in-memory address difference
128+
// start(__llvm_prf_cnts)-start(__llvm_prf_data), so SrcData->CounterPtr -
129+
// CountersDelta computes the offset into the in-buffer counter section.
130+
//
131+
// On WIN64, CountersDelta is truncated as well, so no need for signext.
132+
uint64_t *SrcCounters =
133+
SrcCountersStart +
134+
((uintptr_t)SrcData->CounterPtr - CountersDelta) / sizeof(uint64_t);
135+
// CountersDelta needs to be decreased as we advance to the next data
136+
// record.
137+
CountersDelta -= sizeof(*SrcData);
111138
unsigned NC = SrcData->NumCounters;
112139
if (NC == 0)
113140
return 1;
114-
uint64_t *SrcCounters = SrcCountersStart + ((size_t)SrcData->CounterPtr -
115-
Header->CountersDelta) /
116-
sizeof(uint64_t);
117141
if (SrcCounters < SrcCountersStart ||
118142
(const char *)SrcCounters >= SrcNameStart ||
119143
(const char *)(SrcCounters + NC) > SrcNameStart)

compiler-rt/lib/profile/InstrProfilingWriter.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,12 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
283283
#define INSTR_PROF_RAW_HEADER(Type, Name, Init) Header.Name = Init;
284284
#include "profile/InstrProfData.inc"
285285

286+
/* On WIN64, label differences are truncated 32-bit values. Truncate
287+
* CountersDelta to match. */
288+
#ifdef _WIN64
289+
Header.CountersDelta = (void *)(uint32_t)Header.CountersDelta;
290+
#endif
291+
286292
/* Write the profile header. */
287293
ProfDataIOVec IOVec[] = {{&Header, sizeof(__llvm_profile_header), 1, 0}};
288294
if (Writer->Write(Writer, IOVec, sizeof(IOVec) / sizeof(*IOVec)))

llvm/include/llvm/ProfileData/InstrProfData.inc

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,7 @@ INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), NameRef, \
7575
INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \
7676
ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \
7777
Inc->getHash()->getZExtValue()))
78-
INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt64PtrTy(Ctx), CounterPtr, \
79-
ConstantExpr::getBitCast(CounterPtr, \
80-
llvm::Type::getInt64PtrTy(Ctx)))
78+
INSTR_PROF_DATA(const IntPtrT, IntPtrTy, CounterPtr, RelativeCounterPtr)
8179
/* This is used to map function pointers for the indirect call targets to
8280
* function name hashes during the conversion from raw to merged profile
8381
* data.
@@ -134,7 +132,8 @@ INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCo
134132
INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
135133
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesAfterCounters, PaddingBytesAfterCounters)
136134
INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize)
137-
INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
135+
INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta,
136+
(uintptr_t)CountersBegin - (uintptr_t)DataBegin)
138137
INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
139138
INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
140139
INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))

llvm/lib/ProfileData/InstrProfReader.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,15 @@ Error RawInstrProfReader<IntPtrT>::readRawCounts(
437437
// may itself be corrupt.
438438
if (MaxNumCounters < 0 || NumCounters > (uint32_t)MaxNumCounters)
439439
return error(instrprof_error::malformed);
440+
441+
// We need to compute the in-buffer counter offset from the in-memory address
442+
// distance. The initial CountersDelta is the in-memory address difference
443+
// start(__llvm_prf_cnts)-start(__llvm_prf_data), so SrcData->CounterPtr -
444+
// CountersDelta computes the offset into the in-buffer counter section.
445+
//
446+
// CountersDelta decreases as we advance to the next data record.
440447
ptrdiff_t CounterOffset = getCounterOffset(CounterPtr);
448+
CountersDelta -= sizeof(*Data);
441449
if (CounterOffset < 0 || CounterOffset > MaxNumCounters ||
442450
((uint32_t)CounterOffset + NumCounters) > (uint32_t)MaxNumCounters)
443451
return error(instrprof_error::malformed);

llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -920,6 +920,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
920920
}
921921

922922
// Create data variable.
923+
auto *IntPtrTy = M->getDataLayout().getIntPtrType(M->getContext());
923924
auto *Int16Ty = Type::getInt16Ty(Ctx);
924925
auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
925926
Type *DataTypes[] = {
@@ -936,10 +937,6 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
936937
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
937938
Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
938939

939-
Constant *DataVals[] = {
940-
#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
941-
#include "llvm/ProfileData/InstrProfData.inc"
942-
};
943940
// If the data variable is not referenced by code (if we don't emit
944941
// @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
945942
// data variable live under linker GC, the data variable can be private. This
@@ -953,8 +950,19 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
953950
Visibility = GlobalValue::DefaultVisibility;
954951
}
955952
auto *Data =
956-
new GlobalVariable(*M, DataTy, false, Linkage,
957-
ConstantStruct::get(DataTy, DataVals), DataVarName);
953+
new GlobalVariable(*M, DataTy, false, Linkage, nullptr, DataVarName);
954+
// Reference the counter variable with a label difference (link-time
955+
// constant).
956+
auto *RelativeCounterPtr =
957+
ConstantExpr::getSub(ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy),
958+
ConstantExpr::getPtrToInt(Data, IntPtrTy));
959+
960+
Constant *DataVals[] = {
961+
#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
962+
#include "llvm/ProfileData/InstrProfData.inc"
963+
};
964+
Data->setInitializer(ConstantStruct::get(DataTy, DataVals));
965+
958966
Data->setVisibility(Visibility);
959967
Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
960968
Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));

llvm/test/Instrumentation/InstrProfiling/icall.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,9 @@ attributes #0 = { nounwind }
5656
; STATIC-SAME: @__llvm_prf_vnodes
5757
; STATIC-SAME: @__llvm_prf_nm
5858

59-
; STATIC: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0)
60-
; STATIC-EXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 zeroext 0)
61-
; STATIC-SEXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 signext 0)
59+
; STATIC: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0)
60+
; STATIC-EXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 zeroext 0)
61+
; STATIC-SEXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 signext 0)
6262

6363
; STATIC: declare void @__llvm_profile_instrument_target(i64, i8*, i32)
6464
; STATIC-EXT: declare void @__llvm_profile_instrument_target(i64, i8*, i32 zeroext)

llvm/test/Instrumentation/InstrProfiling/profiling.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
; CHECK-NOT: __profn_baz
1919

2020
; ELF: @__profc_foo = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8
21-
; ELF: @__profd_foo = private {{.*}}, section "__llvm_prf_data", comdat($__profc_foo), align 8
21+
; ELF: @__profd_foo = private global { i64, i64, i64, i8*, i8*, i32, [2 x i16] } { i64 [[#]], i64 0, i64 sub (i64 ptrtoint ([1 x i64]* @__profc_foo to i64), i64 ptrtoint ({ i64, i64, i64, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i64)), i8* null, i8* null, i32 1, [2 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profc_foo), align 8
2222
; MACHO: @__profc_foo = private global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
2323
; MACHO: @__profd_foo = private {{.*}}, section "__DATA,__llvm_prf_data,regular,live_support", align 8
2424
; WIN: @__profc_foo = private global [1 x i64] zeroinitializer, section ".lprfc$M", align 8
@@ -59,9 +59,9 @@ declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
5959
; WIN: @llvm.compiler.used = appending global {{.*}} @__llvm_profile_runtime_user {{.*}} @__profd_foo {{.*}} @__profd_bar {{.*}} @__profd_baz
6060

6161
; ELF_GENERIC: define internal void @__llvm_profile_register_functions() unnamed_addr {
62-
; ELF_GENERIC-NEXT: call void @__llvm_profile_register_function(i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*))
63-
; ELF_GENERIC-NEXT: call void @__llvm_profile_register_function(i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_bar to i8*))
64-
; ELF_GENERIC-NEXT: call void @__llvm_profile_register_function(i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_baz to i8*))
62+
; ELF_GENERIC-NEXT: call void @__llvm_profile_register_function(i8* bitcast ({ i64, i64, i64, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*))
63+
; ELF_GENERIC-NEXT: call void @__llvm_profile_register_function(i8* bitcast ({ i64, i64, i64, i8*, i8*, i32, [2 x i16] }* @__profd_bar to i8*))
64+
; ELF_GENERIC-NEXT: call void @__llvm_profile_register_function(i8* bitcast ({ i64, i64, i64, i8*, i8*, i32, [2 x i16] }* @__profd_baz to i8*))
6565
; ELF_GENERIC-NEXT: call void @__llvm_profile_register_names_function(i8* getelementptr inbounds {{.*}} @__llvm_prf_nm
6666
; ELF_GENERIC-NEXT: ret void
6767
; ELF_GENERIC-NEXT: }

llvm/test/Transforms/PGOProfile/comdat_internal.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ $foo = comdat any
1414
; CHECK: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat
1515
; CHECK-NOT: __profn__stdin__foo
1616
; CHECK: @__profc__stdin__foo.[[#FOO_HASH]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8
17-
; CHECK: @__profd__stdin__foo.[[#FOO_HASH]] = private global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 -5640069336071256030, i64 [[#FOO_HASH]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo.[[#FOO_HASH]], i32 0, i32 0), i8* null
17+
; CHECK: @__profd__stdin__foo.[[#FOO_HASH]] = private global { i64, i64, i64, i8*, i8*, i32, [2 x i16] } { i64 -5640069336071256030, i64 [[#FOO_HASH]], i64 sub (i64 ptrtoint ([1 x i64]* @__profc__stdin__foo.742261418966908927 to i64), i64 ptrtoint ({ i64, i64, i64, i8*, i8*, i32, [2 x i16] }* @__profd__stdin__foo.742261418966908927 to i64)), i8* null
1818
; CHECK-NOT: bitcast (i32 ()* @foo to i8*)
1919
; CHECK-SAME: , i8* null, i32 1, [2 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profc__stdin__foo.[[#FOO_HASH]]), align 8
2020
; CHECK: @__llvm_prf_nm

0 commit comments

Comments
 (0)