Skip to content
This repository was archived by the owner on Nov 1, 2021. It is now read-only.

Commit 2965315

Browse files
committed
CodeGen: Fix invalid bitcasts for atomic builtins
Currently clang assumes the temporary variables emitted during codegen of atomic builtins have address space 0, which is not true for target triple amdgcn---amdgiz and causes invalid bitcasts. This patch fixes that. Differential Revision: https://reviews.llvm.org/D38966 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@316000 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 942e003 commit 2965315

File tree

2 files changed

+43
-41
lines changed

2 files changed

+43
-41
lines changed

lib/CodeGen/CGAtomic.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -1226,7 +1226,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
12261226
return RValue::get(nullptr);
12271227

12281228
return convertTempToRValue(
1229-
Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()),
1229+
Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo(
1230+
Dest.getAddressSpace())),
12301231
RValTy, E->getExprLoc());
12311232
}
12321233

@@ -1298,7 +1299,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
12981299

12991300
assert(Atomics.getValueSizeInBits() <= Atomics.getAtomicSizeInBits());
13001301
return convertTempToRValue(
1301-
Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()),
1302+
Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo(
1303+
Dest.getAddressSpace())),
13021304
RValTy, E->getExprLoc());
13031305
}
13041306

test/CodeGenOpenCL/atomic-ops.cl

+39-39
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-opencl | opt -instnamer -S | FileCheck %s
1+
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-amdgizcl | opt -instnamer -S | FileCheck %s
22

33
// Also test serialization of atomic operations here, to avoid duplicating the test.
4-
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-opencl
5-
// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-opencl -emit-llvm -o - | opt -instnamer -S | FileCheck %s
4+
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-amdgizcl
5+
// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | opt -instnamer -S | FileCheck %s
66

77
#ifndef ALREADY_INCLUDED
88
#define ALREADY_INCLUDED
@@ -32,22 +32,22 @@ atomic_int j;
3232

3333
void fi1(atomic_int *i) {
3434
// CHECK-LABEL: @fi1
35-
// CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
35+
// CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
3636
int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
3737

38-
// CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst
38+
// CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst
3939
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device);
4040

41-
// CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} seq_cst
41+
// CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst
4242
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices);
4343

44-
// CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst
44+
// CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst
4545
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group);
4646
}
4747

4848
void fi2(atomic_int *i) {
4949
// CHECK-LABEL: @fi2
50-
// CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
50+
// CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
5151
__opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
5252
}
5353

@@ -56,7 +56,7 @@ void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *
5656
// CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(1)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
5757
__opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group);
5858

59-
// CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
59+
// CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(5)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
6060
__opencl_atomic_store(ip, 1, memory_order_seq_cst, memory_scope_work_group);
6161

6262
// CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(3)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
@@ -65,25 +65,25 @@ void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *
6565

6666
void fi3(atomic_int *i, atomic_uint *ui) {
6767
// CHECK-LABEL: @fi3
68-
// CHECK: atomicrmw and i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
68+
// CHECK: atomicrmw and i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
6969
int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group);
7070

71-
// CHECK: atomicrmw min i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
71+
// CHECK: atomicrmw min i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
7272
x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group);
7373

74-
// CHECK: atomicrmw max i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
74+
// CHECK: atomicrmw max i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
7575
x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group);
7676

77-
// CHECK: atomicrmw umin i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
77+
// CHECK: atomicrmw umin i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
7878
x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group);
7979

80-
// CHECK: atomicrmw umax i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
80+
// CHECK: atomicrmw umax i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
8181
x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group);
8282
}
8383

8484
bool fi4(atomic_int *i) {
8585
// CHECK-LABEL: @fi4(
86-
// CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32 addrspace(4)* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire
86+
// CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire
8787
// CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
8888
// CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
8989
// CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
@@ -100,16 +100,16 @@ void fi5(atomic_int *i, int scope) {
100100
// CHECK-NEXT: i32 4, label %[[opencl_subgroup:.*]]
101101
// CHECK-NEXT: ]
102102
// CHECK: [[opencl_workgroup]]:
103-
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") seq_cst
103+
// CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst
104104
// CHECK: br label %[[continue:.*]]
105105
// CHECK: [[opencl_device]]:
106-
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") seq_cst
106+
// CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst
107107
// CHECK: br label %[[continue]]
108108
// CHECK: [[opencl_allsvmdevices]]:
109-
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} seq_cst
109+
// CHECK: load atomic i32, i32* %{{.*}} seq_cst
110110
// CHECK: br label %[[continue]]
111111
// CHECK: [[opencl_subgroup]]:
112-
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") seq_cst
112+
// CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst
113113
// CHECK: br label %[[continue]]
114114
// CHECK: [[continue]]:
115115
int x = __opencl_atomic_load(i, memory_order_seq_cst, scope);
@@ -141,29 +141,29 @@ void fi6(atomic_int *i, int order, int scope) {
141141
// CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]]
142142
// CHECK-NEXT: ]
143143
// CHECK: [[MON_WG]]:
144-
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") monotonic
144+
// CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") monotonic
145145
// CHECK: [[MON_DEV]]:
146-
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") monotonic
146+
// CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") monotonic
147147
// CHECK: [[MON_ALL]]:
148-
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} monotonic
148+
// CHECK: load atomic i32, i32* %{{.*}} monotonic
149149
// CHECK: [[MON_SUB]]:
150-
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") monotonic
150+
// CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") monotonic
151151
// CHECK: [[ACQ_WG]]:
152-
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") acquire
152+
// CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") acquire
153153
// CHECK: [[ACQ_DEV]]:
154-
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") acquire
154+
// CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") acquire
155155
// CHECK: [[ACQ_ALL]]:
156-
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} acquire
156+
// CHECK: load atomic i32, i32* %{{.*}} acquire
157157
// CHECK: [[ACQ_SUB]]:
158-
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") acquire
158+
// CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") acquire
159159
// CHECK: [[SEQ_WG]]:
160-
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") seq_cst
160+
// CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst
161161
// CHECK: [[SEQ_DEV]]:
162-
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") seq_cst
162+
// CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst
163163
// CHECK: [[SEQ_ALL]]:
164-
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} seq_cst
164+
// CHECK: load atomic i32, i32* %{{.*}} seq_cst
165165
// CHECK: [[SEQ_SUB]]:
166-
// CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") seq_cst
166+
// CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst
167167
int x = __opencl_atomic_load(i, order, scope);
168168
}
169169

@@ -181,7 +181,7 @@ void ff2(atomic_float *d) {
181181

182182
float ff3(atomic_float *d) {
183183
// CHECK-LABEL: @ff3
184-
// CHECK: atomicrmw xchg i32 addrspace(4)* {{.*}} syncscope("workgroup") seq_cst
184+
// CHECK: atomicrmw xchg i32* {{.*}} syncscope("workgroup") seq_cst
185185
return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group);
186186
}
187187

@@ -198,10 +198,10 @@ void atomic_init_foo()
198198

199199
// CHECK-LABEL: @failureOrder
200200
void failureOrder(atomic_int *ptr, int *ptr2) {
201-
// CHECK: cmpxchg i32 addrspace(4)* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic
201+
// CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic
202202
__opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);
203203

204-
// CHECK: cmpxchg weak i32 addrspace(4)* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire
204+
// CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire
205205
__opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group);
206206
}
207207

@@ -279,11 +279,11 @@ int test_volatile(volatile atomic_int *i) {
279279
// CHECK-LABEL: @test_volatile
280280
// CHECK: %[[i_addr:.*]] = alloca i32
281281
// CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
282-
// CHECK-NEXT: store i32 addrspace(4)* %i, i32 addrspace(4)** %[[i_addr]]
283-
// CHECK-NEXT: %[[addr:.*]] = load i32 addrspace(4)*, i32 addrspace(4)** %[[i_addr]]
284-
// CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32 addrspace(4)* %[[addr]] syncscope("workgroup") seq_cst
285-
// CHECK-NEXT: store i32 %[[res]], i32* %[[atomicdst]]
286-
// CHECK-NEXT: %[[retval:.*]] = load i32, i32* %[[atomicdst]]
282+
// CHECK-NEXT: store i32* %i, i32* addrspace(5)* %[[i_addr]]
283+
// CHECK-NEXT: %[[addr:.*]] = load i32*, i32* addrspace(5)* %[[i_addr]]
284+
// CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32* %[[addr]] syncscope("workgroup") seq_cst
285+
// CHECK-NEXT: store i32 %[[res]], i32 addrspace(5)* %[[atomicdst]]
286+
// CHECK-NEXT: %[[retval:.*]] = load i32, i32 addrspace(5)* %[[atomicdst]]
287287
// CHECK-NEXT: ret i32 %[[retval]]
288288
return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
289289
}

0 commit comments

Comments
 (0)