-
Notifications
You must be signed in to change notification settings - Fork 138
/
Copy pathGeneralLoopUnroller.hpp
361 lines (310 loc) · 14.8 KB
/
GeneralLoopUnroller.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
/*******************************************************************************
* Copyright IBM Corp. and others 2000
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
* or the Apache License, Version 2.0 which accompanies this distribution
* and is available at https://www.apache.org/licenses/LICENSE-2.0.
*
* This Source Code may also be made available under the following Secondary
* Licenses when the conditions for such availability set forth in the
* Eclipse Public License, v. 2.0 are satisfied: GNU General Public License,
* version 2 with the GNU Classpath Exception [1] and GNU General Public
* License, version 2 with the OpenJDK Assembly Exception [2].
*
* [1] https://www.gnu.org/software/classpath/license.html
* [2] https://openjdk.org/legal/assembly-exception.html
*
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 OR GPL-2.0-only WITH OpenJDK-assembly-exception-1.0
*******************************************************************************/
#ifndef GENERALLOOPUNROLLER_INCL
#define GENERALLOOPUNROLLER_INCL
#include <stddef.h>
#include <stdint.h>
#include "codegen/LinkageConventionsEnum.hpp"
#include "compile/Compilation.hpp"
#include "env/TRMemory.hpp"
#include "env/jittypes.h"
#include "il/Block.hpp"
#include "il/DataTypes.hpp"
#include "il/ILOpCodes.hpp"
#include "il/ILOps.hpp"
#include "il/Node.hpp"
#include "il/Node_inlines.hpp"
#include "il/Symbol.hpp"
#include "il/SymbolReference.hpp"
#include "il/TreeTop.hpp"
#include "il/TreeTop_inlines.hpp"
#include "infra/Array.hpp"
#include "infra/Assert.hpp"
#include "infra/List.hpp"
#include "optimizer/OptimizationManager.hpp"
#include "optimizer/Optimizations.hpp"
#include "optimizer/InductionVariable.hpp"
#include "optimizer/LoopCanonicalizer.hpp"
class TR_BitVector;
class TR_BlockStructure;
class TR_Debug;
class TR_HashTab;
class TR_RegionStructure;
class TR_Structure;
class TR_StructureSubGraphNode;
class TR_UseDefInfo;
namespace TR { class AutomaticSymbol; }
namespace TR { class CFG; }
namespace TR { class CFGEdge; }
namespace TR { class Optimization; }
namespace TR { class Optimizer; }
namespace TR { class ParameterSymbol; }
class TR_LoopUnroller
{
public:
enum UnrollKind
{
NoUnroll, //
CompleteUnroll, // No loop
ExactUnroll, // Without residue
PeelOnceUnrollOnce, //
UnrollWithResidue, //
GeneralUnroll, // Retains backedges
SPMDKernel, // Vector
};
// unroll a counted loop - returns true on success
//
static bool unroll(TR::Compilation *comp, TR_RegionStructure *loop,
TR_PrimaryInductionVariable *piv, UnrollKind kind,
int32_t unrollCount, int32_t peelCount, TR::Optimizer *optimizer=NULL);
// unroll a non-counted loop - returns true on success
//
static bool unroll(TR::Compilation *comp, TR_RegionStructure *loop,
int32_t unrollCount, int32_t peelCount, TR::Optimizer *optimizer=NULL);
static TR_StructureSubGraphNode *findNodeInHierarchy(TR_RegionStructure*, int32_t);
protected:
TR_LoopUnroller(TR::Compilation *comp, TR::Optimizer *optimizer, TR_RegionStructure *loop,
TR_StructureSubGraphNode *branchNode, int32_t unrollCount,
int32_t peelCount, TR::Block *invariantBlock, UnrollKind unrollKind, int32_t vectorSize = 1);
TR_LoopUnroller(TR::Compilation *comp, TR::Optimizer *optimizer, TR_RegionStructure *loop,
TR_PrimaryInductionVariable *piv, UnrollKind unrollKind,
int32_t unrollCount, int32_t peelCount, TR::Block *invariantBlock, int32_t vectorSize = 1);
TR::Compilation *comp() { return _comp; }
TR::Optimizer *optimizer() { return _optimizer; }
bool trace() { return comp()->trace(OMR::generalLoopUnroller) || comp()->trace(OMR::SPMDKernelParallelization); }
TR_Debug * getDebug() { return comp()->getDebug(); }
TR_Memory * trMemory() { return _trMemory; }
TR_StackMemory trStackMemory() { return _trMemory; }
TR_HeapMemory trHeapMemory() { return _trMemory; }
TR_PersistentMemory * trPersistentMemory() { return _trMemory->trPersistentMemory(); }
bool isCountedLoop() { return (_piv != 0); }
bool isIncreasingLoop() { TR_ASSERT(isCountedLoop(), "assertion failure"); return (_piv->getDeltaOnBackEdge() > 0); }
int32_t getLoopStride()
{ TR_ASSERT(isCountedLoop(), "assertion failure"); return _piv->getDeltaOnBackEdge(); }
TR::Node *getTestNode() { TR_ASSERT(_piv, "assertion failure"); return _piv->getBranchBlock()->getLastRealTreeTop()->getNode(); }
TR::DataType getIndexType() { TR_ASSERT(_piv, "assertion failure"); return _piv->getSymRef()->getSymbol()->getType(); }
TR::ILOpCode getTestOpCode2() { return getTestNode()->getOpCode(); }
TR::DataType getTestChildType() { return getTestNode()->getFirstChild()->getType(); }
bool getTestIsUnsigned() { return getTestOpCode2().isUnsignedCompare(); }
static bool isWellFormedLoop(TR_RegionStructure *loop, TR::Compilation *, TR::Block *&loopInvariantBlock);
static bool isTransactionStartLoop(TR_RegionStructure *loop, TR::Compilation *);
enum EdgeContext {InvalidContext = 0, BackEdgeFromPrevGeneration, BackEdgeToEntry,
ExitEdgeFromBranchNode, BackEdgeFromLastGenerationCompleteUnroll};
void prepareForArrayShadowRenaming(TR_RegionStructure *loop);
void getLoopPreheaders(TR_RegionStructure *loop, TR_ScratchList<TR::Block> *preheaders);
void collectInternalPointers();
void collectArrayAccesses();
void examineNode(TR::Node *node, intptr_t visitCount);
struct IntrnPtr;
IntrnPtr *findIntrnPtr(int32_t symRefNum);
bool haveIdenticalOffsets(IntrnPtr *intrnPtr1, IntrnPtr *intrnPtr2);
bool isSymRefSameTypeArrayShadow(TR::Node *node);
void examineArrayAccesses();
void refineArrayAliasing();
int32_t unroll(TR_RegionStructure *loop, TR_StructureSubGraphNode *branchNode);
void unrollLoopOnce(TR_RegionStructure *loop, TR_StructureSubGraphNode *branchNode, bool finalUnroll);
bool shouldConnectToNextIteration(TR_StructureSubGraphNode *subNode, TR_RegionStructure *loop);
void generateSpillLoop(TR_RegionStructure*, TR_StructureSubGraphNode*);
void modifyOriginalLoop(TR_RegionStructure*, TR_StructureSubGraphNode*);
void modifyBranchTree(TR_RegionStructure *loop,
TR_StructureSubGraphNode *loopNode,
TR_StructureSubGraphNode *branchNode);
int32_t numExitEdgesTo(TR_RegionStructure *from, int32_t to);
bool edgeAlreadyExists(TR_StructureSubGraphNode *from,
TR_StructureSubGraphNode *to);
bool edgeAlreadyExists(TR_StructureSubGraphNode *from,
int32_t toNum);
bool cfgEdgeAlreadyExists(TR::Block *from, TR::Block *to, EdgeContext edgeContext = InvalidContext);
TR_StructureSubGraphNode *getEntryBlockNode(TR_StructureSubGraphNode *node);
void cloneBlocksInRegion(TR_RegionStructure *, bool isSpillLoop = false);
inline TR_Structure *cloneStructure(TR_Structure *);
TR_Structure *cloneBlockStructure(TR_BlockStructure *);
TR_Structure *cloneRegionStructure(TR_RegionStructure *);
void removeExternalEdge(TR_RegionStructure *parent,
TR_StructureSubGraphNode *from, int32_t to);
void redirectBackEdgeToExitDestination(TR_RegionStructure *loop,
TR_StructureSubGraphNode *branchNode,
TR_StructureSubGraphNode *fromNode,
bool notLoopBranchNode);
void addEdgeForSpillLoop(TR_RegionStructure *region,
TR::CFGEdge *originalEdge,
TR_StructureSubGraphNode *newFromNode,
TR_StructureSubGraphNode *newToNode,
bool removeOriginal = false,
EdgeContext context = InvalidContext,
bool notLoopBranchNode = false);
void addExitEdgeAndFixEverything(TR_RegionStructure *region,
TR::CFGEdge *originalEdge,
TR_StructureSubGraphNode *newFromNode,
TR_StructureSubGraphNode *exitNode = NULL,
TR::Block *newExitBlock = NULL,
EdgeContext context = InvalidContext);
void addEdgeAndFixEverything(TR_RegionStructure *region, TR::CFGEdge *originalEdge,
TR_StructureSubGraphNode *fromNode = NULL,
TR_StructureSubGraphNode *newToNode = NULL,
bool redirectOriginal = false,
bool removeOriginalEdges = false,
bool edgeToEntry = false,
EdgeContext context = InvalidContext);
//void fixExitEdges(TR_Structure *s, TR_Structure *clone);
void fixExitEdges(TR_Structure *s, TR_Structure *clone,
TR_StructureSubGraphNode *branchNode = 0);
void addEdgesInHierarchy(TR_RegionStructure *region,
TR_StructureSubGraphNode *from, TR_StructureSubGraphNode *to);
TR::Symbol *findSymbolInTree(TR::Node *node);
static bool nodeRefersToSymbol(TR::Node*, TR::Symbol*);
bool isSuccessor(TR::Block *A, TR::Block *B);
TR::CFGEdge *findEdge(TR_StructureSubGraphNode *from, TR_StructureSubGraphNode *to);
TR::Node *createIfNodeForSpillLoop(TR::Node *ifNode);
void swingBlocks(TR::Block *from, TR::Block *to); //Adds an edge to the queue of to-be-swung blocks
void processSwingBlocks(TR::Block *from, TR::Block *to);
void processSwingQueue();
bool isInternalPointerLimitExceeded();
void prepareLoopStructure(TR_RegionStructure *);
struct SwingPair
{
TR::Block *from;
TR::Block *to;
SwingPair(TR::Block *f, TR::Block *t) : from(f), to(t) {}
};
TR::Compilation *_comp;
TR_Memory * _trMemory;
TR::Optimizer *_optimizer;
TR_RegionStructure *_loop;
TR_StructureSubGraphNode *_branchNode;
int32_t _unrollCount;
int32_t _peelCount;
UnrollKind _unrollKind;
int32_t _vectorSize;
TR_RegionStructure *_rootStructure;
TR::CFG *_cfg;
int32_t _iteration;
TR::Block **_blockMapper[2];
TR_StructureSubGraphNode **_nodeMapper[2];
List<SwingPair> _swingQueue;
int32_t _numNodes; // number of blocks at the start of unrolling
TR_StructureSubGraphNode *_firstEntryNode;
// Counted Loops only
TR_PrimaryInductionVariable *_piv;
TR_StructureSubGraphNode *_spillNode;
TR::Block *_spillBranchBlock;
bool _spillLoopRequired;
TR::Block *_overflowTestBlock; // FIXME: init
TR::Block *_loopIterTestBlock; // FIXME: init
TR::Block *_loopInvariantBlock;
bool _loopInvariantBlockAtEnd;
bool _branchToExit;
bool _wasEQorNELoop;
TR::ILOpCodes _origLoopCondition;
TR::TreeTop *_startPosOfUnrolledBodies;
TR::TreeTop *_endPosOfUnrolledBodies;
TR_ScratchList<TR::SymbolReference> _newSymRefs;
struct IntrnPtr
{
int32_t symRefNum;
TR_BasicInductionVariable *biv;
int32_t bivNum;
TR::Node *offsetNode;
bool isOffsetConst;
int64_t offsetValue;
};
TR_ScratchList<IntrnPtr> _listOfInternalPointers;
struct ArrayAccess
{
TR::Node *aaNode;
TR::Node *intrnPtrNode;
};
struct ListsOfArrayAccesses
{
int32_t symRefNum;
TR_ScratchList<ArrayAccess> *list;
};
TR_ScratchList<ListsOfArrayAccesses> _listOfListsOfArrayAccesses;
friend class TR_SPMDKernelParallelizer;
};
/**
* Class TR_GeneralLoopUnroller
* ============================
*
* The general loop unroller optimization can unroll or peel a majority of
* loops with or without the loop driving tests done after each iteration
* (loop) body. Usually a peel is inserted before the unrolled loop, and
* the residual iterations to be done (at most u - 1 residual iterations
* where u is the unroll factor) are also done using the peeled code.
* Peeling aids partial redundancy elimination (PRE) as code does not have
* to be moved; instead dominated expressions can be commoned (which is far
* easier than code motion due to problems introduced by exception
* checks in Java).
*
* Async checks (yield points) are eliminated from u - 1 unrolled bodies
* and only one remains.
*
* Unroll factors and code growth thresholds are arrived at based on
* profiling information when available. This analysis uses induction
* variable information found by value propagation.
*/
class LoopWeightProbe;
class TR_GeneralLoopUnroller : public TR_LoopTransformer
{
public:
TR_GeneralLoopUnroller(TR::OptimizationManager *manager);
static TR::Optimization *create(TR::OptimizationManager *manager)
{
return new (manager->allocator()) TR_GeneralLoopUnroller(manager);
}
virtual int32_t perform();
virtual const char * optDetailString() const throw();
private:
void collectNonColdInnerLoops(TR_RegionStructure *region, List<TR_RegionStructure> &innerLoops);
// candidates for being made static
void gatherStatistics(TR_Structure *str, int32_t &numNodes, int32_t &numBlocks,
int32_t &numBranches, int32_t &numSubscripts, LoopWeightProbe &lwp);
int32_t weighNaturalLoop(TR_RegionStructure *loop,
TR_LoopUnroller::UnrollKind &, int32_t &unrollCount,
int32_t &peelCount, int32_t &cost);
bool canUnrollUnCountedLoop(TR_RegionStructure *loop,
int32_t numBlocks, int32_t numNodes,
int32_t entryBlockFrequency);
bool branchContainsInductionVariable(TR_RegionStructure *loop, TR::Node *branchNode, bool checkOnlyPiv = true);
bool branchContainsInductionVariable(TR::Node *branchNode, TR::SymbolReference *ivSymRef);
void countNodesAndSubscripts(TR::Node *node, int32_t &numNodes, int32_t &numSubscripts,
LoopWeightProbe &lwp);
void profileNonCountedLoops(List<TR_RegionStructure> &loops);
bool _haveProfilingInfo;
struct UnrollInfo
{
TR_ALLOC(TR_Memory::InductionVariableAnalysis);
UnrollInfo(TR_RegionStructure *loop,
int32_t w, int32_t c,
TR_LoopUnroller::UnrollKind unrollKind,
int32_t unrollCount, int32_t peelCount)
: _loop(loop), _weight(w), _cost(c), _unrollKind(unrollKind),
_unrollCount(unrollCount), _peelCount(peelCount) {}
TR_RegionStructure *_loop;
TR_LoopUnroller::UnrollKind _unrollKind;
int32_t _weight;
int32_t _cost;
int32_t _unrollCount;
int32_t _peelCount;
};
int32_t _basicSizeThreshold;
};
#endif