Skip to content

Commit d1ad27a

Browse files
committed
Merge remote-tracking branch 'origin/swift-5.0-branch' into stable
2 parents 4c0d911 + fee6bb7 commit d1ad27a

File tree

6 files changed

+184
-55
lines changed

6 files changed

+184
-55
lines changed

include/llvm/Analysis/ValueTracking.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,8 @@ class Value;
209209
/// return the i8 value that it is represented with. This is true for all i8
210210
/// values obviously, but is also true for i32 0, i32 -1, i16 0xF0F0, double
211211
/// 0.0 etc. If the value can't be handled with a repeated byte store (e.g.
212-
/// i16 0x1234), return null.
212+
/// i16 0x1234), return null. If the value is entirely undef and padding,
213+
/// return undef.
213214
Value *isBytewiseValue(Value *V);
214215

215216
/// Given an aggregrate and an sequence of indices, see if the scalar value

lib/Analysis/ValueTracking.cpp

Lines changed: 61 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2917,62 +2917,91 @@ bool llvm::isKnownNeverNaN(const Value *V) {
29172917
return true;
29182918
}
29192919

2920-
/// If the specified value can be set by repeating the same byte in memory,
2921-
/// return the i8 value that it is represented with. This is
2922-
/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
2923-
/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated
2924-
/// byte store (e.g. i16 0x1234), return null.
29252920
Value *llvm::isBytewiseValue(Value *V) {
29262921
// All byte-wide stores are splatable, even of arbitrary variables.
2927-
if (V->getType()->isIntegerTy(8)) return V;
2922+
if (V->getType()->isIntegerTy(8))
2923+
return V;
2924+
2925+
LLVMContext &Ctx = V->getContext();
2926+
2927+
// Undef don't care.
2928+
auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx));
2929+
if (isa<UndefValue>(V))
2930+
return UndefInt8;
2931+
2932+
Constant *C = dyn_cast<Constant>(V);
2933+
if (!C) {
2934+
// Conceptually, we could handle things like:
2935+
// %a = zext i8 %X to i16
2936+
// %b = shl i16 %a, 8
2937+
// %c = or i16 %a, %b
2938+
// but until there is an example that actually needs this, it doesn't seem
2939+
// worth worrying about.
2940+
return nullptr;
2941+
}
29282942

29292943
// Handle 'null' ConstantArrayZero etc.
2930-
if (Constant *C = dyn_cast<Constant>(V))
2931-
if (C->isNullValue())
2932-
return Constant::getNullValue(Type::getInt8Ty(V->getContext()));
2944+
if (C->isNullValue())
2945+
return Constant::getNullValue(Type::getInt8Ty(Ctx));
29332946

2934-
// Constant float and double values can be handled as integer values if the
2947+
// Constant floating-point values can be handled as integer values if the
29352948
// corresponding integer value is "byteable". An important case is 0.0.
2936-
if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
2937-
if (CFP->getType()->isFloatTy())
2938-
V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext()));
2939-
if (CFP->getType()->isDoubleTy())
2940-
V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext()));
2949+
if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
2950+
Type *Ty = nullptr;
2951+
if (CFP->getType()->isHalfTy())
2952+
Ty = Type::getInt16Ty(Ctx);
2953+
else if (CFP->getType()->isFloatTy())
2954+
Ty = Type::getInt32Ty(Ctx);
2955+
else if (CFP->getType()->isDoubleTy())
2956+
Ty = Type::getInt64Ty(Ctx);
29412957
// Don't handle long double formats, which have strange constraints.
2958+
return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty)) : nullptr;
29422959
}
29432960

29442961
// We can handle constant integers that are multiple of 8 bits.
2945-
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
2962+
if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
29462963
if (CI->getBitWidth() % 8 == 0) {
29472964
assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
2948-
29492965
if (!CI->getValue().isSplat(8))
29502966
return nullptr;
2951-
return ConstantInt::get(V->getContext(), CI->getValue().trunc(8));
2967+
return ConstantInt::get(Ctx, CI->getValue().trunc(8));
29522968
}
29532969
}
29542970

2955-
// A ConstantDataArray/Vector is splatable if all its members are equal and
2956-
// also splatable.
2957-
if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(V)) {
2958-
Value *Elt = CA->getElementAsConstant(0);
2959-
Value *Val = isBytewiseValue(Elt);
2960-
if (!Val)
2971+
auto Merge = [&](Value *LHS, Value *RHS) -> Value * {
2972+
if (LHS == RHS)
2973+
return LHS;
2974+
if (!LHS || !RHS)
29612975
return nullptr;
2976+
if (LHS == UndefInt8)
2977+
return RHS;
2978+
if (RHS == UndefInt8)
2979+
return LHS;
2980+
return nullptr;
2981+
};
29622982

2963-
for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I)
2964-
if (CA->getElementAsConstant(I) != Elt)
2983+
if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) {
2984+
Value *Val = UndefInt8;
2985+
for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I)
2986+
if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I)))))
29652987
return nullptr;
2988+
return Val;
2989+
}
29662990

2991+
if (isa<ConstantVector>(C)) {
2992+
Constant *Splat = cast<ConstantVector>(C)->getSplatValue();
2993+
return Splat ? isBytewiseValue(Splat) : nullptr;
2994+
}
2995+
2996+
if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
2997+
Value *Val = UndefInt8;
2998+
for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I)
2999+
if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I)))))
3000+
return nullptr;
29673001
return Val;
29683002
}
29693003

2970-
// Conceptually, we could handle things like:
2971-
// %a = zext i8 %X to i16
2972-
// %b = shl i16 %a, 8
2973-
// %c = or i16 %a, %b
2974-
// but until there is an example that actually needs this, it doesn't seem
2975-
// worth worrying about.
3004+
// Don't try to handle the handful of other constants.
29763005
return nullptr;
29773006
}
29783007

lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,9 @@ static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
347347
/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
348348
/// just replicate their input array and then pass on to memset_pattern16.
349349
static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
350+
// FIXME: This could check for UndefValue because it can be merged into any
351+
// other valid pattern.
352+
350353
// If the value isn't a constant, we can't promote it to being in a constant
351354
// array. We could theoretically do a store to an alloca or something, but
352355
// that doesn't seem worthwhile.
@@ -645,9 +648,13 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
645648

646649
if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
647650
if (ForMemset) {
651+
if (isa<UndefValue>(FirstSplatValue))
652+
FirstSplatValue = SecondSplatValue;
648653
if (FirstSplatValue != SecondSplatValue)
649654
continue;
650655
} else {
656+
if (isa<UndefValue>(FirstPatternValue))
657+
FirstPatternValue = SecondPatternValue;
651658
if (FirstPatternValue != SecondPatternValue)
652659
continue;
653660
}

lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
413413
if (!NextStore->isSimple()) break;
414414

415415
// Check to see if this stored value is of the same byte-splattable value.
416-
if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
416+
Value *StoredByte = isBytewiseValue(NextStore->getOperand(0));
417+
if (isa<UndefValue>(ByteVal) && StoredByte)
418+
ByteVal = StoredByte;
419+
if (ByteVal != StoredByte)
417420
break;
418421

419422
// Check to see if this store is to a constant offset from the start ptr.

test/Transforms/MemCpyOpt/fca2memcpy.ll

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,16 @@ define void @copyalias(%S* %src, %S* %dst) {
7373
ret void
7474
}
7575

76-
; If the store address is computed ina complex manner, make
76+
; If the store address is computed in a complex manner, make
7777
; sure we lift the computation as well if needed and possible.
7878
define void @addrproducer(%S* %src, %S* %dst) {
79-
; CHECK-LABEL: addrproducer
80-
; CHECK: %dst2 = getelementptr %S, %S* %dst, i64 1
81-
; CHECK: call void @llvm.memmove.p0i8.p0i8.i64
82-
; CHECK-NEXT: store %S undef, %S* %dst
79+
; CHECK-LABEL: addrproducer(
80+
; CHECK-NEXT: %[[DSTCAST:[0-9]+]] = bitcast %S* %dst to i8*
81+
; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i64 1
82+
; CHECK-NEXT: %[[DST2CAST:[0-9]+]] = bitcast %S* %dst2 to i8*
83+
; CHECK-NEXT: %[[SRCCAST:[0-9]+]] = bitcast %S* %src to i8*
84+
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 8 %[[DST2CAST]], i8* align 8 %[[SRCCAST]], i64 16, i1 false)
85+
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[DSTCAST]], i8 undef, i64 16, i1 false)
8386
; CHECK-NEXT: ret void
8487
%1 = load %S, %S* %src
8588
store %S undef, %S* %dst
@@ -89,7 +92,14 @@ define void @addrproducer(%S* %src, %S* %dst) {
8992
}
9093

9194
define void @aliasaddrproducer(%S* %src, %S* %dst, i32* %dstidptr) {
92-
; CHECK-LABEL: aliasaddrproducer
95+
; CHECK-LABEL: aliasaddrproducer(
96+
; CHECK-NEXT: %[[SRC:[0-9]+]] = load %S, %S* %src
97+
; CHECK-NEXT: %[[DSTCAST:[0-9]+]] = bitcast %S* %dst to i8*
98+
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[DSTCAST]], i8 undef, i64 16, i1 false)
99+
; CHECK-NEXT: %dstindex = load i32, i32* %dstidptr
100+
; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i32 %dstindex
101+
; CHECK-NEXT: store %S %[[SRC]], %S* %dst2
102+
; CHECK-NEXT: ret void
93103
%1 = load %S, %S* %src
94104
store %S undef, %S* %dst
95105
%dstindex = load i32, i32* %dstidptr
@@ -99,7 +109,16 @@ define void @aliasaddrproducer(%S* %src, %S* %dst, i32* %dstidptr) {
99109
}
100110

101111
define void @noaliasaddrproducer(%S* %src, %S* noalias %dst, i32* noalias %dstidptr) {
102-
; CHECK-LABEL: noaliasaddrproducer
112+
; CHECK-LABEL: noaliasaddrproducer(
113+
; CHECK-NEXT: %[[SRCCAST:[0-9]+]] = bitcast %S* %src to i8*
114+
; CHECK-NEXT: %[[LOADED:[0-9]+]] = load i32, i32* %dstidptr
115+
; CHECK-NEXT: %dstindex = or i32 %[[LOADED]], 1
116+
; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i32 %dstindex
117+
; CHECK-NEXT: %[[DST2CAST:[0-9]+]] = bitcast %S* %dst2 to i8*
118+
; CHECK-NEXT: %[[SRCCAST2:[0-9]+]] = bitcast %S* %src to i8*
119+
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[DST2CAST]], i8* align 8 %[[SRCCAST2]], i64 16, i1 false)
120+
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[SRCCAST]], i8 undef, i64 16, i1 false)
121+
; CHECK-NEXT: ret void
103122
%1 = load %S, %S* %src
104123
store %S undef, %S* %src
105124
%2 = load i32, i32* %dstidptr
Lines changed: 84 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,89 @@
11
; RUN: opt -memcpyopt -S < %s | FileCheck %s
22

3-
@cst = internal constant [3 x i32] [i32 -1, i32 -1, i32 -1], align 4
4-
53
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
6-
declare void @foo(i32*) nounwind
7-
8-
define void @test1() nounwind {
9-
%arr = alloca [3 x i32], align 4
10-
%arr_i8 = bitcast [3 x i32]* %arr to i8*
11-
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %arr_i8, i8* align 4 bitcast ([3 x i32]* @cst to i8*), i64 12, i1 false)
12-
%arraydecay = getelementptr inbounds [3 x i32], [3 x i32]* %arr, i64 0, i64 0
13-
call void @foo(i32* %arraydecay) nounwind
4+
5+
@undef = internal constant i32 undef, align 4
6+
define void @test_undef() nounwind {
7+
%a = alloca i32, align 4
8+
%i8 = bitcast i32* %a to i8*
9+
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (i32* @undef to i8*), i64 4, i1 false)
10+
ret void
11+
; CHECK-LABEL: @test_undef(
12+
; CHECK: call void @llvm.memset
13+
; CHECK-NOT: call void @llvm.memcpy
14+
; CHECK: ret void
15+
}
16+
17+
@i32x3 = internal constant [3 x i32] [i32 -1, i32 -1, i32 -1], align 4
18+
define void @test_i32x3() nounwind {
19+
%a = alloca [3 x i32], align 4
20+
%i8 = bitcast [3 x i32]* %a to i8*
21+
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast ([3 x i32]* @i32x3 to i8*), i64 12, i1 false)
22+
ret void
23+
; CHECK-LABEL: @test_i32x3(
24+
; CHECK: call void @llvm.memset
25+
; CHECK-NOT: call void @llvm.memcpy
26+
; CHECK: ret void
27+
}
28+
29+
@i32x3_undef = internal constant [3 x i32] [i32 -1, i32 undef, i32 -1], align 4
30+
define void @test_i32x3_undef() nounwind {
31+
%a = alloca [3 x i32], align 4
32+
%i8 = bitcast [3 x i32]* %a to i8*
33+
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast ([3 x i32]* @i32x3_undef to i8*), i64 12, i1 false)
34+
ret void
35+
; CHECK-LABEL: @test_i32x3_undef(
36+
; CHECK: call void @llvm.memset
37+
; CHECK-NOT: call void @llvm.memcpy
38+
; CHECK: ret void
39+
}
40+
41+
%struct.bitfield = type { i8, [3 x i8] }
42+
@bitfield = private unnamed_addr constant %struct.bitfield { i8 -86, [3 x i8] [i8 -86, i8 -86, i8 -86] }, align 4
43+
define void @test_bitfield() nounwind {
44+
%a = alloca %struct.bitfield, align 4
45+
%i8 = bitcast %struct.bitfield* %a to i8*
46+
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (%struct.bitfield* @bitfield to i8*), i64 4, i1 false)
47+
ret void
48+
; CHECK-LABEL: @test_bitfield(
49+
; CHECK: call void @llvm.memset
50+
; CHECK-NOT: call void @llvm.memcpy
51+
; CHECK: ret void
52+
}
53+
54+
@i1x16_zero = internal constant <16 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0>, align 4
55+
define void @test_i1x16_zero() nounwind {
56+
%a = alloca <16 x i1>, align 4
57+
%i8 = bitcast <16 x i1>* %a to i8*
58+
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (<16 x i1>* @i1x16_zero to i8*), i64 16, i1 false)
59+
ret void
60+
; CHECK-LABEL: @test_i1x16_zero(
61+
; CHECK: call void @llvm.memset
62+
; CHECK-NOT: call void @llvm.memcpy
63+
; CHECK: ret void
64+
}
65+
66+
; i1 isn't currently handled. Should it?
67+
@i1x16_one = internal constant <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, align 4
68+
define void @test_i1x16_one() nounwind {
69+
%a = alloca <16 x i1>, align 4
70+
%i8 = bitcast <16 x i1>* %a to i8*
71+
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (<16 x i1>* @i1x16_one to i8*), i64 16, i1 false)
72+
ret void
73+
; CHECK-LABEL: @test_i1x16_one(
74+
; CHECK-NOT: call void @llvm.memset
75+
; CHECK: call void @llvm.memcpy
76+
; CHECK: ret void
77+
}
78+
79+
@half = internal constant half 0xH0000, align 4
80+
define void @test_half() nounwind {
81+
%a = alloca half, align 4
82+
%i8 = bitcast half* %a to i8*
83+
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (half* @half to i8*), i64 2, i1 false)
1484
ret void
15-
; CHECK-LABEL: @test1(
16-
; CHECK: call void @llvm.memset
17-
; CHECK-NOT: call void @llvm.memcpy
18-
; CHECK: ret void
85+
; CHECK-LABEL: @test_half(
86+
; CHECK: call void @llvm.memset
87+
; CHECK-NOT: call void @llvm.memcpy
88+
; CHECK: ret void
1989
}

0 commit comments

Comments
 (0)