Skip to content

Commit 94faada

Browse files
committed
[llvm][CodeGen] Machine Function Splitter
We introduce a codegen optimization pass which splits functions into hot and cold parts. This pass leverages the basic block sections feature recently introduced in LLVM from the Propeller project. The pass targets functions with profile coverage, identifies cold blocks and moves them to a separate section. The linker groups all cold blocks across functions together, decreasing fragmentation and improving icache and itlb utilization. We evaluated the Machine Function Splitter pass on clang bootstrap and SPECInt 2017. For clang bootstrap we observe a mean 2.33% runtime improvement with a ~32% reduction in itlb and stlb misses. Additionally, L1 icache misses reduced by 9.5% while L2 instruction misses reduced by 20%. For SPECInt we report the change in IntRate the C/C++ benchmarks. All benchmarks apart from mcf and x264 improve, on average by 0.6% with the max for deepsjeng at 1.6%. Benchmark % Change 500.perlbench_r 0.78 502.gcc_r 0.82 505.mcf_r -0.30 520.omnetpp_r 0.18 523.xalancbmk_r 0.37 525.x264_r -0.46 531.deepsjeng_r 1.61 541.leela_r 0.83 557.xz_r 0.15 Differential Revision: https://reviews.llvm.org/D85368
1 parent 064981f commit 94faada

12 files changed

+483
-46
lines changed

Diff for: llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
//===- BasicBlockSectionUtils.h - Utilities for basic block sections --===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H
10+
#define LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H
11+
12+
#include "llvm/ADT/STLExtras.h"
13+
14+
namespace llvm {
15+
16+
class MachineFunction;
17+
class MachineBasicBlock;
18+
19+
using MachineBasicBlockComparator =
20+
function_ref<bool(const MachineBasicBlock &, const MachineBasicBlock &)>;
21+
22+
void sortBasicBlocksAndUpdateBranches(MachineFunction &MF,
23+
MachineBasicBlockComparator MBBCmp);
24+
25+
} // end namespace llvm
26+
27+
#endif // LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H

Diff for: llvm/include/llvm/CodeGen/CommandFlags.h

+2
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ bool getEnableAddrsig();
114114

115115
bool getEmitCallSiteInfo();
116116

117+
bool getEnableMachineFunctionSplitter();
118+
117119
bool getEnableDebugEntryValues();
118120

119121
bool getValueTrackingVariableLocations();

Diff for: llvm/include/llvm/CodeGen/MachineFunction.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,8 @@ class MachineFunction {
494494
/// Returns true if this function has basic block sections enabled.
495495
bool hasBBSections() const {
496496
return (BBSectionsType == BasicBlockSection::All ||
497-
BBSectionsType == BasicBlockSection::List);
497+
BBSectionsType == BasicBlockSection::List ||
498+
BBSectionsType == BasicBlockSection::Preset);
498499
}
499500

500501
/// Returns true if basic block labels are to be generated for this function.

Diff for: llvm/include/llvm/CodeGen/Passes.h

+4
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ namespace llvm {
5050
/// selectively enable basic block sections.
5151
MachineFunctionPass *createBasicBlockSectionsPass(const MemoryBuffer *Buf);
5252

53+
/// createMachineFunctionSplitterPass - This pass splits machine functions
54+
/// using profile information.
55+
MachineFunctionPass *createMachineFunctionSplitterPass();
56+
5357
/// MachineFunctionPrinter pass - This pass prints out the machine function to
5458
/// the given stream as a debugging tool.
5559
MachineFunctionPass *

Diff for: llvm/include/llvm/InitializePasses.h

+1
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ void initializeMachineCopyPropagationPass(PassRegistry&);
280280
void initializeMachineDominanceFrontierPass(PassRegistry&);
281281
void initializeMachineDominatorTreePass(PassRegistry&);
282282
void initializeMachineFunctionPrinterPassPass(PassRegistry&);
283+
void initializeMachineFunctionSplitterPass(PassRegistry &);
283284
void initializeMachineLICMPass(PassRegistry&);
284285
void initializeMachineLoopInfoPass(PassRegistry&);
285286
void initializeMachineModuleInfoWrapperPassPass(PassRegistry &);

Diff for: llvm/include/llvm/Target/TargetOptions.h

+11-4
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ namespace llvm {
6767
Labels, // Do not use Basic Block Sections but label basic blocks. This
6868
// is useful when associating profile counts from virtual addresses
6969
// to basic blocks.
70+
Preset, // Similar to list but the blocks are identified by passes which
71+
// seek to use Basic Block Sections, e.g. MachineFunctionSplitter.
72+
// This option cannot be set via the command line.
7073
None // Do not use Basic Block Sections.
7174
};
7275

@@ -124,10 +127,11 @@ namespace llvm {
124127
TrapUnreachable(false), NoTrapAfterNoreturn(false), TLSSize(0),
125128
EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false),
126129
EmitStackSizeSection(false), EnableMachineOutliner(false),
127-
SupportsDefaultOutlining(false), EmitAddrsig(false),
128-
EmitCallSiteInfo(false), SupportsDebugEntryValues(false),
129-
EnableDebugEntryValues(false), ValueTrackingVariableLocations(false),
130-
ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false),
130+
EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false),
131+
EmitAddrsig(false), EmitCallSiteInfo(false),
132+
SupportsDebugEntryValues(false), EnableDebugEntryValues(false),
133+
ValueTrackingVariableLocations(false), ForceDwarfFrameSection(false),
134+
XRayOmitFunctionIndex(false),
131135
FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {}
132136

133137
/// DisableFramePointerElim - This returns true if frame pointer elimination
@@ -257,6 +261,9 @@ namespace llvm {
257261
/// Enables the MachineOutliner pass.
258262
unsigned EnableMachineOutliner : 1;
259263

264+
/// Enables the MachineFunctionSplitter pass.
265+
unsigned EnableMachineFunctionSplitter : 1;
266+
260267
/// Set if the target supports default outlining behaviour.
261268
unsigned SupportsDefaultOutlining : 1;
262269

Diff for: llvm/lib/CodeGen/BasicBlockSections.cpp

+47-40
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
#include "llvm/ADT/SmallVector.h"
7070
#include "llvm/ADT/StringMap.h"
7171
#include "llvm/ADT/StringRef.h"
72+
#include "llvm/CodeGen/BasicBlockSectionUtils.h"
7273
#include "llvm/CodeGen/MachineFunction.h"
7374
#include "llvm/CodeGen/MachineFunctionPass.h"
7475
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -226,9 +227,9 @@ static bool getBBClusterInfoForFunction(
226227
// and "Cold" succeeding all other clusters.
227228
// FuncBBClusterInfo represent the cluster information for basic blocks. If this
228229
// is empty, it means unique sections for all basic blocks in the function.
229-
static bool assignSectionsAndSortBasicBlocks(
230-
MachineFunction &MF,
231-
const std::vector<Optional<BBClusterInfo>> &FuncBBClusterInfo) {
230+
static void
231+
assignSections(MachineFunction &MF,
232+
const std::vector<Optional<BBClusterInfo>> &FuncBBClusterInfo) {
232233
assert(MF.hasBBSections() && "BB Sections is not set for function.");
233234
// This variable stores the section ID of the cluster containing eh_pads (if
234235
// all eh_pads are one cluster). If more than one cluster contain eh_pads, we
@@ -271,12 +272,51 @@ static bool assignSectionsAndSortBasicBlocks(
271272
for (auto &MBB : MF)
272273
if (MBB.isEHPad())
273274
MBB.setSectionID(EHPadsSectionID.getValue());
275+
}
274276

277+
void llvm::sortBasicBlocksAndUpdateBranches(
278+
MachineFunction &MF, MachineBasicBlockComparator MBBCmp) {
275279
SmallVector<MachineBasicBlock *, 4> PreLayoutFallThroughs(
276280
MF.getNumBlockIDs());
277281
for (auto &MBB : MF)
278282
PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough();
279283

284+
MF.sort(MBBCmp);
285+
286+
// Set IsBeginSection and IsEndSection according to the assigned section IDs.
287+
MF.assignBeginEndSections();
288+
289+
// After reordering basic blocks, we must update basic block branches to
290+
// insert explicit fallthrough branches when required and optimize branches
291+
// when possible.
292+
updateBranches(MF, PreLayoutFallThroughs);
293+
}
294+
295+
bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
296+
auto BBSectionsType = MF.getTarget().getBBSectionsType();
297+
assert(BBSectionsType != BasicBlockSection::None &&
298+
"BB Sections not enabled!");
299+
// Renumber blocks before sorting them for basic block sections. This is
300+
// useful during sorting, basic blocks in the same section will retain the
301+
// default order. This renumbering should also be done for basic block
302+
// labels to match the profiles with the correct blocks.
303+
MF.RenumberBlocks();
304+
305+
if (BBSectionsType == BasicBlockSection::Labels) {
306+
MF.setBBSectionsType(BBSectionsType);
307+
MF.createBBLabels();
308+
return true;
309+
}
310+
311+
std::vector<Optional<BBClusterInfo>> FuncBBClusterInfo;
312+
if (BBSectionsType == BasicBlockSection::List &&
313+
!getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo,
314+
FuncBBClusterInfo))
315+
return true;
316+
MF.setBBSectionsType(BBSectionsType);
317+
MF.createBBLabels();
318+
assignSections(MF, FuncBBClusterInfo);
319+
280320
// We make sure that the cluster including the entry basic block precedes all
281321
// other clusters.
282322
auto EntryBBSectionID = MF.front().getSectionID();
@@ -300,7 +340,8 @@ static bool assignSectionsAndSortBasicBlocks(
300340
// contiguous and ordered accordingly. Furthermore, clusters are ordered in
301341
// increasing order of their section IDs, with the exception and the
302342
// cold section placed at the end of the function.
303-
MF.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) {
343+
auto Comparator = [&](const MachineBasicBlock &X,
344+
const MachineBasicBlock &Y) {
304345
auto XSectionID = X.getSectionID();
305346
auto YSectionID = Y.getSectionID();
306347
if (XSectionID != YSectionID)
@@ -311,43 +352,9 @@ static bool assignSectionsAndSortBasicBlocks(
311352
return FuncBBClusterInfo[X.getNumber()]->PositionInCluster <
312353
FuncBBClusterInfo[Y.getNumber()]->PositionInCluster;
313354
return X.getNumber() < Y.getNumber();
314-
});
315-
316-
// Set IsBeginSection and IsEndSection according to the assigned section IDs.
317-
MF.assignBeginEndSections();
318-
319-
// After reordering basic blocks, we must update basic block branches to
320-
// insert explicit fallthrough branches when required and optimize branches
321-
// when possible.
322-
updateBranches(MF, PreLayoutFallThroughs);
323-
324-
return true;
325-
}
326-
327-
bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
328-
auto BBSectionsType = MF.getTarget().getBBSectionsType();
329-
assert(BBSectionsType != BasicBlockSection::None &&
330-
"BB Sections not enabled!");
331-
// Renumber blocks before sorting them for basic block sections. This is
332-
// useful during sorting, basic blocks in the same section will retain the
333-
// default order. This renumbering should also be done for basic block
334-
// labels to match the profiles with the correct blocks.
335-
MF.RenumberBlocks();
336-
337-
if (BBSectionsType == BasicBlockSection::Labels) {
338-
MF.setBBSectionsType(BBSectionsType);
339-
MF.createBBLabels();
340-
return true;
341-
}
355+
};
342356

343-
std::vector<Optional<BBClusterInfo>> FuncBBClusterInfo;
344-
if (BBSectionsType == BasicBlockSection::List &&
345-
!getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo,
346-
FuncBBClusterInfo))
347-
return true;
348-
MF.setBBSectionsType(BBSectionsType);
349-
MF.createBBLabels();
350-
assignSectionsAndSortBasicBlocks(MF, FuncBBClusterInfo);
357+
sortBasicBlocksAndUpdateBranches(MF, Comparator);
351358
return true;
352359
}
353360

Diff for: llvm/lib/CodeGen/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ add_llvm_component_library(LLVMCodeGen
8181
MachineFunction.cpp
8282
MachineFunctionPass.cpp
8383
MachineFunctionPrinterPass.cpp
84+
MachineFunctionSplitter.cpp
8485
MachineInstrBundle.cpp
8586
MachineInstr.cpp
8687
MachineLICM.cpp

Diff for: llvm/lib/CodeGen/CommandFlags.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ CGOPT(DebuggerKind, DebuggerTuningOpt)
8484
CGOPT(bool, EnableStackSizeSection)
8585
CGOPT(bool, EnableAddrsig)
8686
CGOPT(bool, EmitCallSiteInfo)
87+
CGOPT(bool, EnableMachineFunctionSplitter)
8788
CGOPT(bool, EnableDebugEntryValues)
8889
CGOPT(bool, ValueTrackingVariableLocations)
8990
CGOPT(bool, ForceDwarfFrameSection)
@@ -407,6 +408,13 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
407408
cl::init(false));
408409
CGBINDOPT(ValueTrackingVariableLocations);
409410

411+
static cl::opt<bool> EnableMachineFunctionSplitter(
412+
"split-machine-functions",
413+
cl::desc("Split out cold basic blocks from machine functions based on "
414+
"profile information"),
415+
cl::init(false));
416+
CGBINDOPT(EnableMachineFunctionSplitter);
417+
410418
static cl::opt<bool> ForceDwarfFrameSection(
411419
"force-dwarf-frame-section",
412420
cl::desc("Always emit a debug frame section."), cl::init(false));
@@ -479,6 +487,7 @@ TargetOptions codegen::InitTargetOptionsFromCodeGenFlags() {
479487
Options.ExplicitEmulatedTLS = EmulatedTLSView->getNumOccurrences() > 0;
480488
Options.ExceptionModel = getExceptionModel();
481489
Options.EmitStackSizeSection = getEnableStackSizeSection();
490+
Options.EnableMachineFunctionSplitter = getEnableMachineFunctionSplitter();
482491
Options.EmitAddrsig = getEnableAddrsig();
483492
Options.EmitCallSiteInfo = getEmitCallSiteInfo();
484493
Options.EnableDebugEntryValues = getEnableDebugEntryValues();

0 commit comments

Comments
 (0)