-
Notifications
You must be signed in to change notification settings - Fork 10.5k
/
Copy pathStringOptimization.cpp
519 lines (438 loc) · 18.1 KB
/
StringOptimization.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
//===--- StringOptimization.cpp - Optimize string operations --------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// This pass performs several optimizations on String operations.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "string-optimization"
#include "swift/SILOptimizer/PassManager/Transforms.h"
#include "swift/SILOptimizer/Analysis/ValueTracking.h"
#include "swift/SIL/SILFunction.h"
#include "swift/SIL/SILBasicBlock.h"
#include "swift/SIL/SILBuilder.h"
#include "swift/AST/SemanticAttrs.h"
#include "swift/AST/ParameterList.h"
#include "swift/AST/ASTMangler.h"
#include "swift/Demangling/Demangle.h"
#include "llvm/Support/Debug.h"
using namespace swift;
namespace {
/// Optimizes String operations with constant operands.
/// Specifically:
/// * Replaces x.append(y) with x = y if x is empty.
/// * Removes x.append("")
/// * Replaces x.append(y) with x = x + y if x and y are constant strings.
/// * Replaces _typeName(T.self) with a constant string if T is statically
/// known.
///
/// This pass must run on high-level SIL, where semantic calls are still in
/// place.
///
/// The optimization is implemented in a simple way. Therfore it cannot handle
/// complicated patterns, e.g. the dataflow analysis for the String.append self
/// argument is only done within a single block.
/// But this is totally sufficient to be able to constant propagate strings in
/// string interpolations.
///
/// If we want to make this optimization more powerful it's best done by using
/// the ConstExprStepEvaluator (which is currently lacking a few features to be
/// used for this optimization).
class StringOptimization {
struct StringInfo {
/// The string
StringRef str;
/// Negative means: not constant
int numCodeUnits = -1;
/// Not 0 for the empty-string initializer which reserves a capacity.
int reservedCapacity = 0;
bool isConstant() const { return numCodeUnits >= 0; }
bool isEmpty() const { return isConstant() && str.empty(); }
};
/// The stdlib's String type.
SILType stringType;
/// The String initializer which takes an UTF8 string literal as argument.
SILFunction *makeUTF8Func = nullptr;
/// Caches the analysis result for an alloc_stack or an inout function
/// argument, whether it is an "identifyable" object.
/// See mayWriteToIdentifyableObject().
llvm::DenseMap<SILValue, bool> identifyableObjectsCache;
public:
bool run(SILFunction *F);
private:
bool optimizeBlock(SILBasicBlock &block);
bool optimizeStringAppend(ApplyInst *appendCall,
llvm::DenseMap<SILValue, SILValue> &storedStrings);
bool optimizeTypeName(ApplyInst *typeNameCall);
static ApplyInst *isSemanticCall(SILInstruction *inst, StringRef attr,
unsigned numArgs);
StoreInst *isStringStoreToIdentifyableObject(SILInstruction *inst);
static void invalidateModifiedObjects(SILInstruction *inst,
llvm::DenseMap<SILValue, SILValue> &storedStrings);
static StringInfo getStringInfo(SILValue value);
static Optional<int> getIntConstant(SILValue value);
static void replaceAppendWith(ApplyInst *appendCall, SILValue newValue,
bool copyNewValue);
ApplyInst *createStringInit(StringRef str, SILInstruction *beforeInst);
};
/// The main entry point of the optimization.
bool StringOptimization::run(SILFunction *F) {
NominalTypeDecl *stringDecl = F->getModule().getASTContext().getStringDecl();
if (!stringDecl)
return false;
stringType = SILType::getPrimitiveObjectType(
stringDecl->getDeclaredInterfaceType()->getCanonicalType());
bool changed = false;
for (SILBasicBlock &block : *F) {
changed |= optimizeBlock(block);
}
return changed;
}
/// Run the optimization on a basic block.
bool StringOptimization::optimizeBlock(SILBasicBlock &block) {
bool changed = false;
/// Maps identifyable objects (alloc_stack, inout parameters) to string values
/// which are stored in those objects.
llvm::DenseMap<SILValue, SILValue> storedStrings;
for (auto iter = block.begin(); iter != block.end();) {
SILInstruction *inst = &*iter++;
if (StoreInst *store = isStringStoreToIdentifyableObject(inst)) {
storedStrings[store->getDest()] = store->getSrc();
continue;
}
if (ApplyInst *append = isSemanticCall(inst, semantics::STRING_APPEND, 2)) {
if (optimizeStringAppend(append, storedStrings)) {
changed = true;
continue;
}
}
if (ApplyInst *typeName = isSemanticCall(inst, semantics::TYPENAME, 2)) {
if (optimizeTypeName(typeName)) {
changed = true;
continue;
}
}
// Remove items from storedStrings if inst overwrites (or potentially
// overwrites) a stored String in an identifyable object.
invalidateModifiedObjects(inst, storedStrings);
}
return changed;
}
/// Optimize String.append in case anything is known about the parameters.
bool StringOptimization::optimizeStringAppend(ApplyInst *appendCall,
llvm::DenseMap<SILValue, SILValue> &storedStrings) {
SILValue rhs = appendCall->getArgument(0);
StringInfo rhsString = getStringInfo(rhs);
// Remove lhs.append(rhs) if rhs is empty.
if (rhsString.isEmpty()) {
appendCall->eraseFromParent();
return true;
}
SILValue lhsAddr = appendCall->getArgument(1);
StringInfo lhsString = getStringInfo(storedStrings[lhsAddr]);
// The following two optimizations are a trade-off: Performance-wise it may be
// benefitial to initialize an empty string with reserved capacity and then
// append multiple other string components.
// Removing the empty string (with the reserved capacity) might result in more
// allocations.
// So we just do this optimization up to a certain capacity limit (found by
// experiment).
if (lhsString.reservedCapacity > 50)
return false;
// Replace lhs.append(rhs) with 'lhs = rhs' if lhs is empty.
if (lhsString.isEmpty()) {
replaceAppendWith(appendCall, rhs, /*copyNewValue*/ true);
storedStrings[lhsAddr] = rhs;
return true;
}
// Replace lhs.append(rhs) with "lhs = lhs + rhs" if both lhs and rhs are
// constant.
if (lhsString.isConstant() && rhsString.isConstant()) {
std::string concat = lhsString.str;
concat += rhsString.str;
if (ApplyInst *stringInit = createStringInit(concat, appendCall)) {
replaceAppendWith(appendCall, stringInit, /*copyNewValue*/ false);
storedStrings[lhsAddr] = stringInit;
return true;
}
}
return false;
}
/// Checks if the demangling tree contains any node which prevents constant
/// folding of the type name.
static bool containsProblematicNode(Demangle::Node *node, bool qualified) {
switch (node->getKind()) {
case Demangle::Node::Kind::LocalDeclName:
// The printing of contexts for local types is completely different
// in the runtime. Don't constant fold if we need to print the context.
if (qualified)
return true;
break;
case Demangle::Node::Kind::Class:
// ObjC class names are not derived from the mangling but from the
// ObjC runtime. We cannot constant fold this.
if (node->getChild(0)->getText() == "__C")
return true;
break;
default:
break;
}
for (Demangle::Node *child : *node) {
if (containsProblematicNode(child, qualified))
return true;
}
return false;
}
/// Try to replace a _typeName() call with a constant string if the type is
/// statically known.
bool StringOptimization::optimizeTypeName(ApplyInst *typeNameCall) {
// Check, if the type is statically known.
auto *anyType =
dyn_cast<InitExistentialMetatypeInst>(typeNameCall->getArgument(0));
if (!anyType)
return false;
auto *metatypeInst = dyn_cast<MetatypeInst>(anyType->getOperand());
if (!metatypeInst)
return false;
auto metatype = metatypeInst->getType().getAs<MetatypeType>();
Type ty = metatype->getInstanceType();
if (ty->hasArchetype())
return false;
// Usually the "qualified" parameter of _typeName() is a constant boolean.
Optional<int> isQualifiedOpt = getIntConstant(typeNameCall->getArgument(1));
if (!isQualifiedOpt)
return false;
bool isQualified = isQualifiedOpt.getValue();
// Create the constant type string by mangling + demangling.
Mangle::ASTMangler mangler;
std::string mangledTypeName = mangler.mangleTypeForTypeName(ty);
Demangle::DemangleOptions options;
options.PrintForTypeName = true;
options.DisplayLocalNameContexts = false;
options.QualifyEntities = isQualified;
Demangle::Context ctx;
Demangle::NodePointer root = ctx.demangleTypeAsNode(mangledTypeName);
if (!root || containsProblematicNode(root, isQualified))
return false;
std::string typeStr = nodeToString(root, options);
if (typeStr.empty())
return false;
ApplyInst *stringInit = createStringInit(typeStr, typeNameCall);
if (!stringInit)
return false;
typeNameCall->replaceAllUsesWith(stringInit);
typeNameCall->eraseFromParent();
return true;
}
/// Returns the apply instruction if \p inst is a call of a function which has
/// a semantic attribute \p attr and exactly \p numArgs arguments.
ApplyInst *StringOptimization::isSemanticCall(SILInstruction *inst,
StringRef attr, unsigned numArgs) {
auto *apply = dyn_cast<ApplyInst>(inst);
if (!apply || apply->getNumArguments() != numArgs)
return nullptr;
SILFunction *callee = apply->getReferencedFunctionOrNull();
if (callee && callee->hasSemanticsAttr(attr))
return apply;
return nullptr;
}
/// Returns true for all instructions which we can safely analyze as a potential
/// write to an identifyable objects.
///
/// If we see any other kind of object user, which may write to an object, or
/// let the object address escape in some unexpected way (like address
/// projections), we'll just ignore that object and will not treat it as
/// "identifyable" object.
static bool mayWriteToIdentifyableObject(SILInstruction *inst) {
// For simplicity, only handle store and apply. This is sufficient for most
// case, especially for string interpolation.
return isa<StoreInst>(inst) || isa<ApplyInst>(inst);
}
/// Returns the store intstruction if \p inst is a store of a String to an
/// identifyable object.
StoreInst *StringOptimization::
isStringStoreToIdentifyableObject(SILInstruction *inst) {
auto *store = dyn_cast<StoreInst>(inst);
if (!store)
return nullptr;
if (store->getSrc()->getType() != stringType)
return nullptr;
SILValue destAddr = store->getDest();
// We only handle alloc_stack an indirect function arguments. For those we can
// be sure that they are not aliased, just by checking all users.
if (!isa<AllocStackInst>(destAddr) && !isExclusiveArgument(destAddr))
return nullptr;
if (identifyableObjectsCache.count(destAddr) != 0) {
return identifyableObjectsCache[destAddr] ? store : nullptr;
}
// Check if it's an "identifyable" object. This is the case if it only has
// users which we are able to track in a simple way: stores and applies.
for (Operand *use : destAddr->getUses()) {
SILInstruction *user = use->getUser();
switch (user->getKind()) {
// Those instructions do not write to destAddr nor let they destAddr
// escape.
case SILInstructionKind::DebugValueAddrInst:
case SILInstructionKind::DeallocStackInst:
case SILInstructionKind::LoadInst:
break;
default:
if (!mayWriteToIdentifyableObject(user)) {
// We don't handle user. It is some instruction which may write to
// destAddr or let destAddr "escape" (like an address projection).
identifyableObjectsCache[destAddr] = false;
return nullptr;
}
break;
}
}
identifyableObjectsCache[destAddr] = true;
return store;
}
/// Removes all objects from \p storedStrings which \p inst (potentially)
/// modifies.
void StringOptimization::invalidateModifiedObjects(SILInstruction *inst,
llvm::DenseMap<SILValue, SILValue> &storedStrings) {
// Ignore non-writing instructions, like "load", "dealloc_stack".
// Note that identifyable objects (= keys in storedStrings) can only have
// certain kind of instructions as users: all instruction which we handle in
// isStringStoreToIdentifyableObject().
if (!mayWriteToIdentifyableObject(inst))
return;
for (Operand &op : inst->getAllOperands()) {
storedStrings.erase(op.get());
}
}
/// Returns information about value if it's a constant string.
StringOptimization::StringInfo StringOptimization::getStringInfo(SILValue value) {
// Start with a non-constant result.
StringInfo result;
auto *apply = dyn_cast_or_null<ApplyInst>(value);
if (!apply)
return result;
SILFunction *callee = apply->getReferencedFunctionOrNull();
if (!callee)
return result;
if (callee->hasSemanticsAttr(semantics::STRING_INIT_EMPTY)) {
// An empty string initializer.
result.numCodeUnits = 0;
return result;
}
if (callee->hasSemanticsAttr(semantics::STRING_INIT_EMPTY_WITH_CAPACITY)) {
// An empty string initializer with initial capacity.
result.numCodeUnits = 0;
result.reservedCapacity = std::numeric_limits<int>::max();
if (apply->getNumArguments() > 0) {
if (Optional<int> capacity = getIntConstant(apply->getArgument(0)))
result.reservedCapacity = capacity.getValue();
}
return result;
}
if (callee->hasSemanticsAttr(semantics::STRING_MAKE_UTF8)) {
// A string literal initializer.
SILValue stringVal = apply->getArgument(0);
auto *stringLiteral = dyn_cast<StringLiteralInst>(stringVal);
SILValue lengthVal = apply->getArgument(1);
auto *intLiteral = dyn_cast<IntegerLiteralInst>(lengthVal);
if (intLiteral && stringLiteral &&
// For simplicity, we only support UTF8 string literals.
stringLiteral->getEncoding() == StringLiteralInst::Encoding::UTF8) {
result.str = stringLiteral->getValue();
result.numCodeUnits = intLiteral->getValue().getSExtValue();
return result;
}
}
return result;
}
/// Returns the constant integer value if \a value is an Int or Bool struct with
/// an integer_literal as operand.
Optional<int> StringOptimization::getIntConstant(SILValue value) {
auto *boolOrIntStruct = dyn_cast<StructInst>(value);
if (!boolOrIntStruct || boolOrIntStruct->getNumOperands() != 1)
return None;
auto *literal = dyn_cast<IntegerLiteralInst>(boolOrIntStruct->getOperand(0));
if (!literal || literal->getValue().getActiveBits() > 64)
return None;
return literal->getValue().getSExtValue();
}
/// Replace a String.append() with a store of \p newValue to the destination.
void StringOptimization::replaceAppendWith(ApplyInst *appendCall,
SILValue newValue, bool copyNewValue) {
SILBuilder builder(appendCall);
SILLocation loc = appendCall->getLoc();
SILValue destAddr = appendCall->getArgument(1);
if (appendCall->getFunction()->hasOwnership()) {
if (copyNewValue)
newValue = builder.createCopyValue(loc, newValue);
builder.createStore(loc, newValue, destAddr,
StoreOwnershipQualifier::Assign);
} else {
if (copyNewValue)
builder.createRetainValue(loc, newValue, builder.getDefaultAtomicity());
builder.createDestroyAddr(loc, destAddr);
builder.createStore(loc, newValue, destAddr,
StoreOwnershipQualifier::Unqualified);
}
appendCall->eraseFromParent();
}
/// Creates a call to a string initializer.
ApplyInst *StringOptimization::createStringInit(StringRef str,
SILInstruction *beforeInst) {
SILBuilder builder(beforeInst);
SILLocation loc = beforeInst->getLoc();
SILModule &module = beforeInst->getFunction()->getModule();
ASTContext &ctxt = module.getASTContext();
if (!makeUTF8Func) {
// Find the String initializer which takes a string_literal as argument.
ConstructorDecl *makeUTF8Decl = ctxt.getMakeUTF8StringDecl();
if (!makeUTF8Decl)
return nullptr;
auto Mangled = SILDeclRef(makeUTF8Decl, SILDeclRef::Kind::Allocator).mangle();
makeUTF8Func = module.findFunction(Mangled, SILLinkage::PublicExternal);
if (!makeUTF8Func)
return nullptr;
}
auto *literal = builder.createStringLiteral(loc, str,
StringLiteralInst::Encoding::UTF8);
auto *length = builder.createIntegerLiteral(loc,
SILType::getBuiltinWordType(ctxt),
literal->getCodeUnitCount());
auto *isAscii = builder.createIntegerLiteral(loc,
SILType::getBuiltinIntegerType(1, ctxt),
intmax_t(ctxt.isASCIIString(str)));
SILType stringMetaType = SILType::getPrimitiveObjectType(
CanType(MetatypeType::get(stringType.getASTType(),
MetatypeRepresentation::Thin)));
auto *metaTypeInst = builder.createMetatype(loc, stringMetaType);
auto *functionRef = builder.createFunctionRefFor(loc, makeUTF8Func);
return builder.createApply(loc, functionRef, SubstitutionMap(),
{ literal, length, isAscii, metaTypeInst });
}
/// The StringOptimization function pass.
class StringOptimizationPass : public SILFunctionTransform {
public:
void run() override {
SILFunction *F = getFunction();
if (!F->shouldOptimize())
return;
LLVM_DEBUG(llvm::dbgs() << "*** StringOptimization on function: "
<< F->getName() << " ***\n");
StringOptimization stringOptimization;
bool changed = stringOptimization.run(F);
if (changed) {
invalidateAnalysis(SILAnalysis::InvalidationKind::CallsAndInstructions);
}
}
};
} // end anonymous namespace
SILTransform *swift::createStringOptimization() {
return new StringOptimizationPass();
}