Skip to content

Commit feef36d

Browse files
committedNov 13, 2023
Made CPU/GPU buffer initialization significantly faster with std::fill and enqueueFillBuffer
1 parent f7a001c commit feef36d

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed
 

‎src/opencl.hpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -243,11 +243,10 @@ template<typename T> class Memory {
243243
allocate_device_buffer(device, allocate_device);
244244
if(allocate_host) {
245245
host_buffer = new T[N*(ulong)d];
246-
for(ulong i=0ull; i<N*(ulong)d; i++) host_buffer[i] = value;
247246
initialize_auxiliary_pointers();
248247
host_buffer_exists = true;
249248
}
250-
write_to_device();
249+
reset(value);
251250
}
252251
inline Memory(Device& device, const ulong N, const uint dimensions, T* const host_buffer, const bool allocate_device=true) {
253252
if(!device.is_initialized()) print_error("No Device selected. Call Device constructor.");
@@ -328,8 +327,9 @@ template<typename T> class Memory {
328327
delete_host_buffer();
329328
}
330329
inline void reset(const T value=(T)0) {
331-
if(host_buffer_exists) for(ulong i=0ull; i<N*(ulong)d; i++) host_buffer[i] = value;
332-
write_to_device();
330+
if(host_buffer_exists) std::fill(host_buffer, host_buffer+range(), value); // faster than "for(ulong i=0ull; i<range(); i++) host_buffer[i] = value;"
331+
cl_queue.enqueueFillBuffer(device_buffer, value, 0ull, capacity()); // faster than "write_to_device();"
332+
cl_queue.finish();
333333
}
334334
inline const ulong length() const { return N; }
335335
inline const uint dimensions() const { return d; }

0 commit comments

Comments
 (0)