-
Notifications
You must be signed in to change notification settings - Fork 10.4k
/
Copy pathStringOptimization.cpp
774 lines (661 loc) · 27.1 KB
/
StringOptimization.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
//===--- StringOptimization.cpp - Optimize string operations --------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// This pass performs several optimizations on String operations.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "string-optimization"
#include "swift/SILOptimizer/PassManager/Transforms.h"
#include "swift/SILOptimizer/Analysis/ValueTracking.h"
#include "swift/SIL/SILFunction.h"
#include "swift/SIL/SILBasicBlock.h"
#include "swift/SIL/SILGlobalVariable.h"
#include "swift/SIL/SILBuilder.h"
#include "swift/AST/SemanticAttrs.h"
#include "swift/AST/ParameterList.h"
#include "swift/AST/ASTMangler.h"
#include "swift/Demangling/Demangle.h"
#include "llvm/Support/Debug.h"
using namespace swift;
namespace {
/// Optimizes String operations with constant operands.
/// Specifically:
/// * Replaces x.append(y) with x = y if x is empty.
/// * Removes x.append("")
/// * Replaces x.append(y) with x = x + y if x and y are constant strings.
/// * Replaces _typeName(T.self) with a constant string if T is statically
/// known.
/// * Replaces String(literal).utf8CString with the string literal itself.
///
/// This pass must run on high-level SIL, where semantic calls are still in
/// place.
///
/// The optimization is implemented in a simple way. Therefore it cannot handle
/// complicated patterns, e.g. the dataflow analysis for the String.append self
/// argument is only done within a single block.
/// But this is totally sufficient to be able to constant propagate strings in
/// string interpolations.
///
/// If we want to make this optimization more powerful it's best done by using
/// the ConstExprStepEvaluator (which is currently lacking a few features to be
/// used for this optimization).
class StringOptimization {
struct StringInfo {
/// The string
StringRef str;
/// Negative means: not constant
int reservedCapacity = 0;
StringInfo(StringRef str, int reservedCapacity = 0) :
str(str), reservedCapacity(reservedCapacity) { }
bool isConstant() const { return reservedCapacity >= 0; }
bool isEmpty() const { return isConstant() && str.empty(); }
static StringInfo unknown() { return StringInfo(StringRef(), -1); }
};
/// The stdlib's String type.
SILType stringType;
/// The String initializer which takes an UTF8 string literal as argument.
SILFunction *makeUTF8Func = nullptr;
/// Caches the analysis result for an alloc_stack or an inout function
/// argument, whether it is an "identifiable" object.
/// See mayWriteToIdentifyableObject().
llvm::DenseMap<SILValue, bool> identifiableObjectsCache;
public:
bool run(SILFunction *F);
private:
bool optimizeBlock(SILBasicBlock &block);
bool optimizeStringAppend(ApplyInst *appendCall,
llvm::DenseMap<SILValue, SILValue> &storedStrings);
bool optimizeStringConcat(ApplyInst *concatCall);
bool optimizeTypeName(ApplyInst *typeNameCall);
bool optimizeGetCString(ApplyInst *getCStringCall);
static ApplyInst *isSemanticCall(SILInstruction *inst, StringRef attr,
unsigned numArgs);
StoreInst *isStringStoreToIdentifyableObject(SILInstruction *inst);
static void invalidateModifiedObjects(SILInstruction *inst,
llvm::DenseMap<SILValue, SILValue> &storedStrings);
static StringInfo getStringInfo(SILValue value);
static StringInfo getStringFromStaticLet(SILValue value);
static std::optional<int> getIntConstant(SILValue value);
static void replaceAppendWith(ApplyInst *appendCall, SILValue newValue);
static SILValue copyValue(SILValue value, SILInstruction *before);
ApplyInst *createStringInit(StringRef str, SILInstruction *beforeInst);
};
/// The main entry point of the optimization.
bool StringOptimization::run(SILFunction *F) {
NominalTypeDecl *stringDecl = F->getModule().getASTContext().getStringDecl();
if (!stringDecl)
return false;
stringType = SILType::getPrimitiveObjectType(
stringDecl->getDeclaredInterfaceType()->getCanonicalType());
bool changed = false;
for (SILBasicBlock &block : *F) {
changed |= optimizeBlock(block);
}
return changed;
}
/// Run the optimization on a basic block.
bool StringOptimization::optimizeBlock(SILBasicBlock &block) {
bool changed = false;
/// Maps identifiable objects (alloc_stack, inout parameters) to string values
/// which are stored in those objects.
llvm::DenseMap<SILValue, SILValue> storedStrings;
for (auto iter = block.begin(); iter != block.end();) {
SILInstruction *inst = &*iter++;
if (StoreInst *store = isStringStoreToIdentifyableObject(inst)) {
storedStrings[store->getDest()] = store->getSrc();
continue;
}
if (ApplyInst *append = isSemanticCall(inst, semantics::STRING_APPEND, 2)) {
if (optimizeStringAppend(append, storedStrings)) {
changed = true;
continue;
}
}
if (ApplyInst *append = isSemanticCall(inst, semantics::STRING_CONCAT, 3)) {
if (optimizeStringConcat(append)) {
changed = true;
continue;
}
}
if (ApplyInst *typeName = isSemanticCall(inst, semantics::TYPENAME, 2)) {
if (optimizeTypeName(typeName)) {
changed = true;
continue;
}
}
if (ApplyInst *getCString = isSemanticCall(inst,
semantics::STRING_GET_UTF8_CSTRING, 1)) {
if (optimizeGetCString(getCString)) {
changed = true;
continue;
}
}
// Remove items from storedStrings if inst overwrites (or potentially
// overwrites) a stored String in an identifiable object.
invalidateModifiedObjects(inst, storedStrings);
}
return changed;
}
/// Optimize String.append in case anything is known about the parameters.
bool StringOptimization::optimizeStringAppend(ApplyInst *appendCall,
llvm::DenseMap<SILValue, SILValue> &storedStrings) {
SILValue rhs = appendCall->getArgument(0);
StringInfo rhsString = getStringInfo(rhs);
// Remove lhs.append(rhs) if rhs is empty.
if (rhsString.isEmpty()) {
appendCall->eraseFromParent();
return true;
}
SILValue lhsAddr = appendCall->getArgument(1);
StringInfo lhsString = getStringInfo(storedStrings[lhsAddr]);
// The following two optimizations are a trade-off: Performance-wise it may be
// beneficial to initialize an empty string with reserved capacity and then
// append multiple other string components.
// Removing the empty string (with the reserved capacity) might result in more
// allocations.
// So we just do this optimization up to a certain capacity limit (found by
// experiment).
if (lhsString.reservedCapacity > 50)
return false;
// Replace lhs.append(rhs) with 'lhs = rhs' if lhs is empty.
if (lhsString.isEmpty()) {
replaceAppendWith(appendCall, copyValue(rhs, appendCall));
storedStrings[lhsAddr] = rhs;
return true;
}
// Replace lhs.append(rhs) with "lhs = lhs + rhs" if both lhs and rhs are
// constant.
if (lhsString.isConstant() && rhsString.isConstant()) {
std::string concat = lhsString.str.str();
concat += rhsString.str;
if (ApplyInst *stringInit = createStringInit(concat, appendCall)) {
replaceAppendWith(appendCall, stringInit);
storedStrings[lhsAddr] = stringInit;
return true;
}
}
return false;
}
/// Optimize String.+ in case anything is known about the parameters.
bool StringOptimization::optimizeStringConcat(ApplyInst *concatCall) {
SILValue lhs = concatCall->getArgument(0);
SILValue rhs = concatCall->getArgument(1);
StringInfo rhsString = getStringInfo(rhs);
// Replace lhs + "" with lhs
if (rhsString.isEmpty()) {
lhs = copyValue(lhs, concatCall);
concatCall->replaceAllUsesWith(lhs);
concatCall->eraseFromParent();
return true;
}
// Replace "" + rhs with rhs
StringInfo lhsString = getStringInfo(lhs);
if (lhsString.isEmpty()) {
rhs = copyValue(rhs, concatCall);
concatCall->replaceAllUsesWith(rhs);
concatCall->eraseFromParent();
return true;
}
// Replace lhs + rhs with "lhs + rhs" if both lhs and rhs are constant.
if (lhsString.isConstant() && rhsString.isConstant()) {
std::string concat = lhsString.str.str();
concat += rhsString.str;
if (ApplyInst *stringInit = createStringInit(concat, concatCall)) {
concatCall->replaceAllUsesWith(stringInit);
concatCall->eraseFromParent();
return true;
}
}
return false;
}
/// Checks if the demangling tree contains any node which prevents constant
/// folding of the type name.
static bool containsProblematicNode(Demangle::Node *node, bool qualified) {
switch (node->getKind()) {
case Demangle::Node::Kind::LocalDeclName:
// The printing of contexts for local types is completely different
// in the runtime. Don't constant fold if we need to print the context.
if (qualified)
return true;
break;
case Demangle::Node::Kind::Class: {
// ObjC class names are not derived from the mangling but from the
// ObjC runtime. We cannot constant fold this.
Demangle::Node *context = node->getChild(0);
if (context->getKind() == Demangle::Node::Kind::Module &&
context->getText() == "__C") {
return true;
}
break;
}
default:
break;
}
for (Demangle::Node *child : *node) {
if (containsProblematicNode(child, qualified))
return true;
}
return false;
}
/// Try to replace a _typeName() call with a constant string if the type is
/// statically known.
bool StringOptimization::optimizeTypeName(ApplyInst *typeNameCall) {
// Check, if the type is statically known.
auto *anyType =
dyn_cast<InitExistentialMetatypeInst>(typeNameCall->getArgument(0));
if (!anyType)
return false;
auto *metatypeInst = dyn_cast<MetatypeInst>(anyType->getOperand());
if (!metatypeInst)
return false;
auto metatype = metatypeInst->getType().getAs<MetatypeType>();
Type ty = metatype->getInstanceType();
if (ty->hasArchetype() || ty->hasDynamicSelfType())
return false;
// Usually the "qualified" parameter of _typeName() is a constant boolean.
std::optional<int> isQualifiedOpt =
getIntConstant(typeNameCall->getArgument(1));
if (!isQualifiedOpt)
return false;
bool isQualified = isQualifiedOpt.value();
// Create the constant type string by mangling + demangling.
Mangle::ASTMangler mangler(ty->getASTContext());
std::string mangledTypeName = mangler.mangleTypeForTypeName(ty);
Demangle::DemangleOptions options;
options.PrintForTypeName = true;
options.DisplayLocalNameContexts = false;
options.QualifyEntities = isQualified;
Demangle::Context ctx;
Demangle::NodePointer root = ctx.demangleTypeAsNode(mangledTypeName);
if (!root || containsProblematicNode(root, isQualified))
return false;
std::string typeStr = nodeToString(root, options);
if (typeStr.empty())
return false;
ApplyInst *stringInit = createStringInit(typeStr, typeNameCall);
if (!stringInit)
return false;
typeNameCall->replaceAllUsesWith(stringInit);
typeNameCall->eraseFromParent();
return true;
}
/// Replaces a String initializer followed by String.utf8CString with a
/// (UTF8 encoded) string literal.
///
/// Note that string literals are always generated with a trailing 0-byte.
bool StringOptimization::optimizeGetCString(ApplyInst *getCStringCall) {
// Is this a String.utf8CString of a literal String?
StringInfo stringInfo = getStringInfo(getCStringCall->getArgument(0));
if (!stringInfo.isConstant())
return false;
StringLiteralInst *literal = nullptr;
bool changed = false;
SmallVector<SILInstruction *, 16> workList;
workList.push_back(getCStringCall);
/// String.utf8CString returns an array of Int8. Search for ref_tail_addr of
/// the array buffer.
while (!workList.empty()) {
SILInstruction *inst = workList.pop_back_val();
// Look through string_extract which extract the buffer from the array.
if (isa<StructExtractInst>(inst) || inst == getCStringCall) {
for (Operand *use : cast<SingleValueInstruction>(inst)->getUses()) {
workList.push_back(use->getUser());
}
continue;
}
if (auto *rta = dyn_cast<RefTailAddrInst>(inst)) {
// Replace the ref_tail_addr with a pointer_to_address of the string
// literal.
if (!literal) {
// Build the literal if we don't have one, yet.
SILBuilder builder(getCStringCall);
literal = builder.createStringLiteral(getCStringCall->getLoc(),
stringInfo.str, StringLiteralInst::Encoding::UTF8);
}
SILBuilder builder(rta);
auto *strAddr = builder.createPointerToAddress(rta->getLoc(), literal,
rta->getType(), /*isStrict*/ false);
rta->replaceAllUsesWith(strAddr);
changed = true;
}
}
return changed;
}
/// Returns the apply instruction if \p inst is a call of a function which has
/// a semantic attribute \p attr and exactly \p numArgs arguments.
ApplyInst *StringOptimization::isSemanticCall(SILInstruction *inst,
StringRef attr, unsigned numArgs) {
auto *apply = dyn_cast<ApplyInst>(inst);
if (!apply || apply->getNumArguments() != numArgs)
return nullptr;
SILFunction *callee = apply->getReferencedFunctionOrNull();
if (callee && callee->hasSemanticsAttr(attr))
return apply;
return nullptr;
}
/// Returns true for all instructions which we can safely analyze as a potential
/// write to an identifiable objects.
///
/// If we see any other kind of object user, which may write to an object, or
/// let the object address escape in some unexpected way (like address
/// projections), we'll just ignore that object and will not treat it as
/// "identifiable" object.
static bool mayWriteToIdentifyableObject(SILInstruction *inst) {
// For simplicity, only handle store and apply. This is sufficient for most
// case, especially for string interpolation.
return isa<StoreInst>(inst) || isa<ApplyInst>(inst);
}
/// Returns the store instruction if \p inst is a store of a String to an
/// identifiable object.
StoreInst *StringOptimization::
isStringStoreToIdentifyableObject(SILInstruction *inst) {
auto *store = dyn_cast<StoreInst>(inst);
if (!store)
return nullptr;
if (store->getSrc()->getType() != stringType)
return nullptr;
SILValue destAddr = store->getDest();
// We only handle alloc_stack an indirect function arguments. For those we can
// be sure that they are not aliased, just by checking all users.
if (!isa<AllocStackInst>(destAddr) && !isExclusiveArgument(destAddr))
return nullptr;
if (identifiableObjectsCache.count(destAddr) != 0) {
return identifiableObjectsCache[destAddr] ? store : nullptr;
}
// Check if it's an "identifiable" object. This is the case if it only has
// users which we are able to track in a simple way: stores and applies.
for (Operand *use : destAddr->getUses()) {
SILInstruction *user = use->getUser();
switch (user->getKind()) {
// Those instructions do not write to destAddr nor let they destAddr
// escape.
case SILInstructionKind::DeallocStackInst:
case SILInstructionKind::LoadInst:
break;
case SILInstructionKind::LoadBorrowInst:
break;
case SILInstructionKind::DebugValueInst:
if (DebugValueInst::hasAddrVal(user))
break;
LLVM_FALLTHROUGH;
default:
if (!mayWriteToIdentifyableObject(user)) {
// We don't handle user. It is some instruction which may write to
// destAddr or let destAddr "escape" (like an address projection).
identifiableObjectsCache[destAddr] = false;
return nullptr;
}
break;
}
}
identifiableObjectsCache[destAddr] = true;
return store;
}
/// Removes all objects from \p storedStrings which \p inst (potentially)
/// modifies.
void StringOptimization::invalidateModifiedObjects(SILInstruction *inst,
llvm::DenseMap<SILValue, SILValue> &storedStrings) {
// Ignore non-writing instructions, like "load", "dealloc_stack".
// Note that identifiable objects (= keys in storedStrings) can only have
// certain kind of instructions as users: all instruction which we handle in
// isStringStoreToIdentifyableObject().
if (!mayWriteToIdentifyableObject(inst))
return;
for (Operand &op : inst->getAllOperands()) {
storedStrings.erase(op.get());
}
}
/// If \p value is a struct_extract, return its operand and field.
static std::pair<SILValue, VarDecl *> skipStructExtract(SILValue value) {
if (auto *sei = dyn_cast<StructExtractInst>(value))
return {sei->getOperand(), sei->getField()};
// Look through function calls, which do the struct_extract in the callee.
// This specifically targets
// String(stringInterpolation: DefaultStringInterpolation)
// which is not inlined in the high level pipeline (due to the specified
// effects).
auto *apply = dyn_cast<ApplyInst>(value);
if (!apply)
return {value, nullptr};
SILFunction *callee = apply->getReferencedFunctionOrNull();
if (!callee || !callee->isDefinition())
return {value, nullptr};
// `String(stringInterpolation: DefaultStringInterpolation)` has only a single
// basic block. Avoid the effort of searching all blocks for a `return`.
auto *ret = dyn_cast<ReturnInst>(callee->getEntryBlock()->getTerminator());
if (!ret)
return {value, nullptr};
auto *sei = dyn_cast<StructExtractInst>(ret->getOperand());
if (!sei)
return {value, nullptr};
auto *arg = dyn_cast<SILFunctionArgument>(sei->getOperand());
if (!arg)
return {value, nullptr};
value = apply->getArgument(arg->getIndex());
return {value, sei->getField()};
}
/// Returns information about value if it's a constant string.
StringOptimization::StringInfo StringOptimization::getStringInfo(SILValue value) {
if (!value)
return StringInfo::unknown();
// Look through struct_extract(struct(value)).
// This specifically targets calls to
// String(stringInterpolation: DefaultStringInterpolation)
// which are not inlined in the high level pipeline.
VarDecl *field = nullptr;
std::tie(value, field) = skipStructExtract(value);
if (field) {
auto *si = dyn_cast<StructInst>(value);
if (!si)
return StringInfo::unknown();
value = si->getFieldValue(field);
}
auto *apply = dyn_cast<ApplyInst>(value);
if (!apply) {
return getStringFromStaticLet(value);
}
SILFunction *callee = apply->getReferencedFunctionOrNull();
if (!callee)
return StringInfo::unknown();
if (callee->hasSemanticsAttr(semantics::STRING_INIT_EMPTY)) {
// An empty string initializer.
return StringInfo("");
}
if (callee->hasSemanticsAttr(semantics::STRING_INIT_EMPTY_WITH_CAPACITY)) {
// An empty string initializer with initial capacity.
int reservedCapacity = std::numeric_limits<int>::max();
if (apply->getNumArguments() > 0) {
if (std::optional<int> capacity = getIntConstant(apply->getArgument(0)))
reservedCapacity = capacity.value();
}
return StringInfo("", reservedCapacity);
}
if (callee->hasSemanticsAttr(semantics::STRING_MAKE_UTF8)) {
// A string literal initializer.
SILValue stringVal = apply->getArgument(0);
auto *stringLiteral = dyn_cast<StringLiteralInst>(stringVal);
SILValue lengthVal = apply->getArgument(1);
auto *intLiteral = dyn_cast<IntegerLiteralInst>(lengthVal);
if (intLiteral && stringLiteral &&
// For simplicity, we only support UTF8 string literals.
stringLiteral->getEncoding() == StringLiteralInst::Encoding::UTF8 &&
// This passed number of code units should always match the size of the
// string in the string literal. Just to be on the safe side, check it.
intLiteral->getValue() == stringLiteral->getValue().size()) {
return StringInfo(stringLiteral->getValue());
}
}
return StringInfo::unknown();
}
/// Return the string if \p value is a load from a global static let, which is
/// initialized with a String constant.
StringOptimization::StringInfo
StringOptimization::getStringFromStaticLet(SILValue value) {
// Match the pattern
// %ptr_to_global = apply %addressor()
// %global_addr = pointer_to_address %ptr_to_global
// %value = load %global_addr
if (!isa<LoadInst>(value) && !isa<LoadBorrowInst>(value)) {
return StringInfo::unknown();
}
auto *load = value->getDefiningInstruction();
SILFunction *initializer = nullptr;
auto *globalAddr = dyn_cast<GlobalAddrInst>(load->getOperand(0));
if (globalAddr) {
// The global accessor is inlined.
// Usually the global_addr is immediately preceeded by a call to
// `builtin "once"` which initializes the global.
SILInstruction *prev = globalAddr->getPreviousInstruction();
if (!prev)
return StringInfo::unknown();
auto *bi = dyn_cast<BuiltinInst>(prev);
if (!bi || bi->getBuiltinInfo().ID != BuiltinValueKind::Once)
return StringInfo::unknown();
initializer = getCalleeOfOnceCall(bi);
} else {
// The global accessor is not inlined, yet.
auto *pta = dyn_cast<PointerToAddressInst>(load->getOperand(0));
if (!pta)
return StringInfo::unknown();
auto *addressorCall = dyn_cast<ApplyInst>(pta->getOperand());
if (!addressorCall)
return StringInfo::unknown();
SILFunction *addressorFunc = addressorCall->getReferencedFunctionOrNull();
if (!addressorFunc)
return StringInfo::unknown();
// The addressor function has a builtin.once call to the initializer.
BuiltinInst *onceCall = nullptr;
initializer = findInitializer(addressorFunc, onceCall);
}
if (!initializer || !initializer->isGlobalInitOnceFunction())
return StringInfo::unknown();
if (initializer->size() != 1)
return StringInfo::unknown();
// Match the pattern
// %addr = global_addr @staticStringLet
// ...
// %str = apply %stringInitializer(...)
// store %str to %addr
GlobalAddrInst *gAddr = nullptr;
for (SILInstruction &inst : initializer->front()) {
if (auto *ga = dyn_cast<GlobalAddrInst>(&inst)) {
if (gAddr)
return StringInfo::unknown();
gAddr = ga;
}
}
if (!gAddr || !gAddr->getReferencedGlobal()->isLet())
return StringInfo::unknown();
if (globalAddr && globalAddr->getReferencedGlobal() != gAddr->getReferencedGlobal())
return StringInfo::unknown();
Operand *gUse = gAddr->getSingleUse();
auto *store = dyn_cast<StoreInst>(gUse->getUser());
if (!store || store->getDest() != gAddr)
return StringInfo::unknown();
SILValue initVal = store->getSrc();
// This check is probably not needed, but let's be on the safe side:
// it prevents an infinite recursion if the initializer of the global is
// itself a load of another global, and so on.
if (isa<LoadInst>(initVal) || isa<LoadBorrowInst>(initVal))
return StringInfo::unknown();
return getStringInfo(initVal);
}
/// Returns the constant integer value if \a value is an Int or Bool struct with
/// an integer_literal as operand.
std::optional<int> StringOptimization::getIntConstant(SILValue value) {
auto *boolOrIntStruct = dyn_cast<StructInst>(value);
if (!boolOrIntStruct || boolOrIntStruct->getNumOperands() != 1)
return std::nullopt;
auto *literal = dyn_cast<IntegerLiteralInst>(boolOrIntStruct->getOperand(0));
if (!literal || literal->getValue().getActiveBits() > 64)
return std::nullopt;
return literal->getValue().getSExtValue();
}
/// Replace a String.append() with a store of \p newValue to the destination.
void StringOptimization::replaceAppendWith(ApplyInst *appendCall,
SILValue newValue) {
SILBuilder builder(appendCall);
SILLocation loc = appendCall->getLoc();
SILValue destAddr = appendCall->getArgument(1);
if (appendCall->getFunction()->hasOwnership()) {
builder.createStore(loc, newValue, destAddr,
StoreOwnershipQualifier::Assign);
} else {
builder.createDestroyAddr(loc, destAddr);
builder.createStore(loc, newValue, destAddr,
StoreOwnershipQualifier::Unqualified);
}
appendCall->eraseFromParent();
}
/// Returns a copy of \p value. Depending if the function is in OSSA, insert
/// either a copy_value or retain_value.
SILValue StringOptimization::copyValue(SILValue value, SILInstruction *before) {
SILBuilder builder(before);
SILLocation loc = before->getLoc();
if (before->getFunction()->hasOwnership())
return builder.createCopyValue(loc, value);
builder.createRetainValue(loc, value, builder.getDefaultAtomicity());
return value;
}
/// Creates a call to a string initializer.
ApplyInst *StringOptimization::createStringInit(StringRef str,
SILInstruction *beforeInst) {
SILBuilderWithScope builder(beforeInst);
SILLocation loc = beforeInst->getLoc();
SILModule &module = beforeInst->getFunction()->getModule();
ASTContext &ctxt = module.getASTContext();
if (!makeUTF8Func) {
// Find the String initializer which takes a string_literal as argument.
ConstructorDecl *makeUTF8Decl = ctxt.getMakeUTF8StringDecl();
if (!makeUTF8Decl)
return nullptr;
auto Mangled = SILDeclRef(makeUTF8Decl, SILDeclRef::Kind::Allocator).mangle();
makeUTF8Func = module.loadFunction(Mangled, SILModule::LinkingMode::LinkAll);
if (!makeUTF8Func)
return nullptr;
}
auto *literal = builder.createStringLiteral(loc, str,
StringLiteralInst::Encoding::UTF8);
auto *length = builder.createIntegerLiteral(loc,
SILType::getBuiltinWordType(ctxt),
literal->getCodeUnitCount());
auto *isAscii = builder.createIntegerLiteral(loc,
SILType::getBuiltinIntegerType(1, ctxt),
intmax_t(ctxt.isASCIIString(str)));
SILType stringMetaType = SILType::getPrimitiveObjectType(
CanType(MetatypeType::get(stringType.getASTType(),
MetatypeRepresentation::Thin)));
auto *metaTypeInst = builder.createMetatype(loc, stringMetaType);
auto *functionRef = builder.createFunctionRefFor(loc, makeUTF8Func);
return builder.createApply(loc, functionRef, SubstitutionMap(),
{ literal, length, isAscii, metaTypeInst });
}
/// The StringOptimization function pass.
class StringOptimizationPass : public SILFunctionTransform {
public:
void run() override {
SILFunction *F = getFunction();
if (!F->shouldOptimize())
return;
LLVM_DEBUG(llvm::dbgs() << "*** StringOptimization on function: "
<< F->getName() << " ***\n");
StringOptimization stringOptimization;
bool changed = stringOptimization.run(F);
if (changed) {
invalidateAnalysis(SILAnalysis::InvalidationKind::CallsAndInstructions);
}
}
};
} // end anonymous namespace
SILTransform *swift::createStringOptimization() {
return new StringOptimizationPass();
}