-
-
Notifications
You must be signed in to change notification settings - Fork 5.6k
/
Copy pathMakefile
136 lines (116 loc) · 5.6 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
.PHONY: clean clean_profiles restore_originals
# Settings taken from https://github.com/rust-lang/rust/blob/master/src/tools/opt-dist/src/bolt.rs
BOLT_ARGS :=
# Reorder basic blocks within functions
BOLT_ARGS += -reorder-blocks=ext-tsp
# Reorder functions within the binary
BOLT_ARGS += -reorder-functions=cdsort
# Split function code into hot and code regions
BOLT_ARGS += -split-functions
# Split as many basic blocks as possible
BOLT_ARGS += -split-all-cold
# Move jump tables to a separate section
BOLT_ARGS += -jump-tables=move
# Use regular size pages for code alignment
BOLT_ARGS += -no-huge-pages
# Fold functions with identical code
BOLT_ARGS += -icf=1
# Split using best available strategy (three-way splitting, Cache-Directed Sort)
# Disabled for libjulia-internal till https://github.com/llvm/llvm-project/issues/89508 is fixed
# BOLT_ARGS += -split-strategy=cdsplit
# Update DWARF debug info in the final binary
BOLT_ARGS += -update-debug-sections
# Print optimization statistics
BOLT_ARGS += -dyno-stats
# BOLT doesn't fully support computed gotos, https://github.com/llvm/llvm-project/issues/89117
# Use escaped regex as the name BOLT recognises is often a bit different, e.g. apply_cl/1(*2)
# This doesn't actually seem to do anything, the actual mitigation is not using --use-old-text
# which we do in the bolt target
BOLT_ARGS += -skip-funcs=.\*apply_cl.\*
# -fno-reorder-blocks-and-partition is needed on gcc >= 8.
BOLT_FLAGS := $\
"BOLT_CFLAGS_GCC+=-fno-reorder-blocks-and-partition" $\
"BOLT_LDFLAGS=-Wl,--emit-relocs"
STAGE0_BUILD:=$(CURDIR)/toolchain
STAGE1_BUILD:=$(CURDIR)/optimized.build
STAGE0_BINARIES:=$(STAGE0_BUILD)/usr/bin/
PROFILE_DIR:=$(CURDIR)/profiles-bolt
JULIA_ROOT:=$(CURDIR)/../..
LLVM_BOLT:=$(STAGE0_BINARIES)llvm-bolt
LLVM_MERGEFDATA:=$(STAGE0_BINARIES)merge-fdata
# If you add new files to optimize, you need to add BOLT_LDFLAGS and BOLT_CFLAGS to the build of your new file.
SYMLINKS_TO_OPTIMIZE := libLLVM.so libjulia-internal.so libjulia-codegen.so
FILES_TO_OPTIMIZE := $(shell for file in $(SYMLINKS_TO_OPTIMIZE); do readlink $(STAGE1_BUILD)/usr/lib/$$file; done)
AFTER_INSTRUMENT_MESSAGE:='Run `make finish_stage1` to finish off the build. $\
You can now optionally collect more profiling data by running Julia with an appropriate workload, $\
if you wish, run `make clean_profiles` before doing so to remove any profiling data generated by `make finish_stage1`. $\
You should end up with some data in $(PROFILE_DIR). Afterwards run `make merge_data && make bolt`.'
$(STAGE0_BUILD) $(STAGE1_BUILD):
$(MAKE) -C $(JULIA_ROOT) O=$@ configure
stage0: | $(STAGE0_BUILD)
$(MAKE) -C $(STAGE0_BUILD)/deps install-BOLT && \
touch $@
# Build with our custom flags, binary builder doesn't use them so we need to build LLVM for now.
# We manually skip package image creation so that we can profile it
$(STAGE1_BUILD): stage0
stage1: export USE_BINARYBUILDER_LLVM=0
stage1: | $(STAGE1_BUILD)
$(MAKE) -C $(STAGE1_BUILD) $(BOLT_FLAGS) julia-src-release julia-symlink julia-libccalltest \
julia-libccalllazyfoo julia-libccalllazybar julia-libllvmcalltest && \
touch $@
copy_originals: stage1
for file in $(FILES_TO_OPTIMIZE); do \
abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
cp $$abs_file "$$abs_file.original"; \
done && \
touch $@
# I don't think there's any particular reason to have -no-huge-pages here, perhaps slightly more accurate profile data
# as the final build uses -no-huge-pages
# We reset the mtime of the files to prevent make from rebuilding targets depending on them.
bolt_instrument: copy_originals
for file in $(FILES_TO_OPTIMIZE); do \
abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
old_time=$$(stat -c %Y $$abs_file); \
$(LLVM_BOLT) "$$abs_file.original" -o $$abs_file --instrument --instrumentation-file-append-pid --instrumentation-file="$(PROFILE_DIR)/$$file-prof" -no-huge-pages; \
mkdir -p $$(dirname "$(PROFILE_DIR)/$$file-prof"); \
touch -d "@$$old_time" $$abs_file; \
printf "\n"; \
done && \
touch $@
@echo $(AFTER_INSTRUMENT_MESSAGE)
finish_stage1: stage1
$(MAKE) -C $(STAGE1_BUILD)
merge_data: bolt_instrument
for file in $(FILES_TO_OPTIMIZE); do \
profiles=$(PROFILE_DIR)/$$file-prof.*.fdata; \
$(LLVM_MERGEFDATA) $$profiles > "$(PROFILE_DIR)/$$file-prof.merged.fdata"; \
done && \
touch $@
# The --use-old-text saves about 16 MiB of libLLVM.so size.
# However, the rust folk found it succeeds very non-deterministically for them.
# It tries to reuse old text segments to reduce binary size
# BOLT doesn't fully support computed gotos https://github.com/llvm/llvm-project/issues/89117, so we cannot use --use-old-text on libjulia-internal
# That flag saves less than 1 MiB for libjulia-internal so oh well.
# We reset the mtime of the files to prevent make from rebuilding targets depending on them.
bolt: merge_data
for file in $(FILES_TO_OPTIMIZE); do \
abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
old_time=$$(stat -c %Y $$abs_file); \
$(LLVM_BOLT) "$$abs_file.original" -data "$(PROFILE_DIR)/$$file-prof.merged.fdata" -o $$abs_file $(BOLT_ARGS) $$(if [ "$$file" != $(shell readlink $(STAGE1_BUILD)/usr/lib/libjulia-internal.so) ]; then echo "--use-old-text -split-strategy=cdsplit"; fi); \
touch -d "@$$old_time" $$abs_file; \
done && \
touch $@
clean_profiles:
rm -rf $(PROFILE_DIR)
clean:
rm -f stage0 stage1 bolt copy_originals merge_data bolt_instrument
restore_originals: copy_originals
for file in $(FILES_TO_OPTIMIZE); do \
abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
cp -P "$$abs_file.original" $$abs_file; \
done
delete_originals: copy_originals
for file in $(FILES_TO_OPTIMIZE); do \
abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
rm "$$abs_file.original"; \
done