Skip to content

Commit cb1f251

Browse files
committed
cleanup and add debug
1 parent edd14e0 commit cb1f251

File tree

1 file changed

+52
-79
lines changed

1 file changed

+52
-79
lines changed

gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_calculation_cuda.cu

Lines changed: 52 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@
3838
#include <cublas_v2.h>
3939
#include <chrono>
4040

41+
// FOR DEBUG -- DEV VERSION
42+
#include <fstream>
43+
44+
std::ofstream debugLog("debug_timings.txt", std::ios::app);
45+
4146
#define BLOCK_SIZE 32
4247

4348
// Existing SAFE_CALL (for cudaError_t)
@@ -144,51 +149,6 @@ __device__ __host__ float unnormalized_sinc(float t)
144149
}
145150
}
146151

147-
__global__ void gpu_matrix_mult(const __half * a, const __half * b, __half * c, int m, int n, int k)
148-
{
149-
int row = blockIdx.y * blockDim.y + threadIdx.y;
150-
int col = blockIdx.x * blockDim.x + threadIdx.x;
151-
152-
if (col < k && row < m)
153-
{
154-
__half sum = __float2half(0.0f);
155-
for (int i = 0; i < n; ++i)
156-
{
157-
__half valA = a[row * n + i];
158-
__half valB = b[i * k + col];
159-
sum = __hadd(sum, __hmul(valA, valB));
160-
}
161-
c[row * k + col] = sum;
162-
}
163-
}
164-
165-
__global__ void gpu_matrix_mult_float(float * a, float * b, float * c, int m, int n, int k)
166-
{
167-
int row = blockIdx.y * blockDim.y + threadIdx.y;
168-
int col = blockIdx.x * blockDim.x + threadIdx.x;
169-
float sum = 0;
170-
if (col < k && row < m)
171-
{
172-
for (int i = 0; i < n; i++)
173-
{
174-
sum += a[row * n + i] * b[i * k + col];
175-
}
176-
c[row * k + col] = sum;
177-
}
178-
}
179-
180-
__global__ void gpu_diag_matrix_mult(float * Val, int * RowPtr, float * diagVals, int total_rows)
181-
{
182-
const int row = threadIdx.x + blockIdx.x * blockDim.x;
183-
if (row < total_rows)
184-
{
185-
for (int i = RowPtr[row]; i < RowPtr[row + 1]; i++)
186-
{
187-
Val[i] = diagVals[row] * Val[i];
188-
}
189-
}
190-
}
191-
192152
__global__ void reduce_beams_kernel(
193153
const thrust::complex<float> * __restrict__ d_P_Beams, float * d_P_Beams_F_real,
194154
float * d_P_Beams_F_imag, int nBeams, int nFreq, int nRaysSkipped)
@@ -460,7 +420,6 @@ CArray2D sonar_calculation_wrapper(
460420

461421
if (!memory_initialized)
462422
{
463-
std::cout << "Initializing..." << std::endl;
464423
SAFE_CALL(
465424
cudaMalloc((void **)&d_depth_image, depth_image.step * depth_image.rows), "depth malloc");
466425
SAFE_CALL(
@@ -482,8 +441,6 @@ CArray2D sonar_calculation_wrapper(
482441
"P_Beams malloc device");
483442
SAFE_CALL(cudaMalloc(&d_P_Beams_F_real, sizeof(float) * nBeams * nFreq), "beam real malloc");
484443
SAFE_CALL(cudaMalloc(&d_P_Beams_F_imag, sizeof(float) * nBeams * nFreq), "beam imag malloc");
485-
std::cout << "Middle..." << std::endl;
486-
487444
SAFE_CALL(
488445
cudaMallocHost((void **)&P_Ray_real, P_Ray_Bytes), "CUDA MallocHost Failed for P_Ray_real");
489446
SAFE_CALL(
@@ -507,8 +464,6 @@ CArray2D sonar_calculation_wrapper(
507464
SAFE_CALL(
508465
cudaMallocHost((void **)&beamCorrector_lin_h, beamCorrector_lin_Bytes),
509466
"CUDA MallocHost Failed for beamCorrector_lin_h");
510-
std::cout << "DONE..." << std::endl;
511-
512467
SAFE_CALL(
513468
cudaMalloc((void **)&d_P_Beams_Cor_real, P_Beams_Cor_Bytes),
514469
"CUDA Malloc Failed for d_P_Beams_Cor_real");
@@ -524,8 +479,6 @@ CArray2D sonar_calculation_wrapper(
524479
SAFE_CALL(
525480
cudaMalloc((void **)&d_beamCorrector_lin, beamCorrector_lin_Bytes),
526481
"CUDA Malloc Failed for d_beamCorrector_lin");
527-
std::cout << "realdone..." << std::endl;
528-
529482
memory_initialized = true;
530483
}
531484

@@ -594,14 +547,20 @@ CArray2D sonar_calculation_wrapper(
594547
SAFE_CALL(
595548
cudaMemcpy(P_Beams, d_P_Beams, P_Beams_Bytes, cudaMemcpyDeviceToHost), "CUDA Memcpy Failed");
596549

597-
// For calc time measure
598550
if (debugFlag)
599551
{
600552
stop = std::chrono::high_resolution_clock::now();
601553
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
602-
printf(
603-
"GPU Sonar Computation Time %lld/100 [s]\n",
604-
static_cast<long long int>(duration.count() / 10000));
554+
555+
long long dcount = duration.count();
556+
float ms = static_cast<float>(dcount) / 1000.0f;
557+
558+
printf("GPU Sonar Computation Time %lld/100 [s]\n", dcount / 10000);
559+
printf("GPU Sonar Summation Time: %.3f ms\n", ms);
560+
561+
debugLog << "GPU Sonar Computation Time " << dcount / 10000 << "/100 [s]\n";
562+
debugLog << "GPU Sonar Summation Time: " << ms << " ms\n";
563+
605564
start = std::chrono::high_resolution_clock::now();
606565
}
607566

@@ -641,9 +600,15 @@ CArray2D sonar_calculation_wrapper(
641600
{
642601
stop = std::chrono::high_resolution_clock::now();
643602
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
644-
printf(
645-
"Sonar Ray Summation %lld/100 [s]\n", static_cast<long long int>(duration.count() / 10000));
646-
printf("Sonar Ray Summation Time: %.3f ms\n", static_cast<float>(duration.count()) / 1000.0f);
603+
604+
long long dcount = duration.count();
605+
606+
printf("Sonar Ray Summation %lld/100 [s]\n", dcount / 10000);
607+
printf("Sonar Ray Summation Time: %.3f ms\n", static_cast<float>(dcount) / 1000.0f);
608+
609+
debugLog << "Sonar Ray Summation " << dcount / 10000 << "/100 [s]\n";
610+
debugLog << "Sonar Ray Summation Time: " << static_cast<float>(dcount) / 1000.0f << " ms\n";
611+
647612
start = std::chrono::high_resolution_clock::now();
648613
}
649614

@@ -747,15 +712,21 @@ CArray2D sonar_calculation_wrapper(
747712
P_Beams_Cor_imag_h[f * nBeams + beam] / beamCorrectorSum);
748713
}
749714
}
750-
// For calc time measure
715+
751716
if (debugFlag)
752717
{
753718
stop = std::chrono::high_resolution_clock::now();
754719
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
755-
printf(
756-
"GPU Window & Correction %lld/100 [s]\n",
757-
static_cast<long long int>(duration.count() / 10000));
758-
printf("GPU Window & Correction: %.3f ms\n", static_cast<float>(duration.count()) / 1000.0f);
720+
721+
long long dcount = duration.count();
722+
723+
printf("GPU Window & Correction %lld/100 [s]\n", dcount / 10000);
724+
printf("GPU Window & Correction: %.3f ms\n", static_cast<float>(dcount) / 1000.0f);
725+
726+
// Write to file
727+
debugLog << "GPU Window & Correction " << dcount / 10000 << "/100 [s]\n";
728+
debugLog << "GPU Window & Correction: " << static_cast<float>(dcount) / 1000.0f << " ms\n";
729+
759730
start = std::chrono::high_resolution_clock::now();
760731
}
761732

@@ -797,14 +768,6 @@ CArray2D sonar_calculation_wrapper(
797768
}
798769
}
799770

800-
if (debugFlag)
801-
{
802-
stop = std::chrono::high_resolution_clock::now();
803-
std::chrono::duration<double> duration = stop - start;
804-
printf("GPU LOOP Computation Time: %.6f seconds\n", duration.count());
805-
start = std::chrono::high_resolution_clock::now();
806-
}
807-
808771
// --- Device side input data allocation and initialization
809772
cufftComplex * deviceInputData;
810773
SAFE_CALL(
@@ -862,14 +825,22 @@ CArray2D sonar_calculation_wrapper(
862825
}
863826
}
864827

865-
// For calc time measure
866828
if (debugFlag)
867829
{
868830
stop = std::chrono::high_resolution_clock::now();
869831
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
870-
printf("GPU FFT Calc Time Time: %.3f ms\n", static_cast<float>(duration.count()) / 1000.0f);
871-
printf(
872-
"GPU FFT Calc Time %lld/100 [s]\n", static_cast<long long int>(duration.count() / 10000));
832+
833+
long long dcount = duration.count();
834+
float ms = static_cast<float>(dcount) / 1000.0f;
835+
836+
printf("GPU FFT Calc Time: %.3f ms\n", ms);
837+
printf("GPU FFT Calc Time %lld/100 [s]\n", dcount / 10000);
838+
839+
// Write to file
840+
debugLog << "GPU FFT Calc Time: " << ms << " ms\n";
841+
debugLog << "GPU FFT Calc Time " << dcount / 10000 << "/100 [s]\n";
842+
843+
start = std::chrono::high_resolution_clock::now();
873844
}
874845

875846
auto total_stop_time = std::chrono::high_resolution_clock::now();
@@ -878,9 +849,11 @@ CArray2D sonar_calculation_wrapper(
878849

879850
if (debugFlag)
880851
{
881-
printf(
882-
"Total Sonar Calculation Wrapper Time: %.3f ms\n",
883-
static_cast<float>(total_duration.count()) / 1000.0f);
852+
float ms = static_cast<float>(total_duration.count()) / 1000.0f;
853+
854+
printf("Total Sonar Calculation Wrapper Time: %.3f ms\n", ms);
855+
856+
debugLog << "Total Sonar Calculation Wrapper Time: " << ms << " ms\n";
884857
}
885858

886859
return P_Beams_F;

0 commit comments

Comments
 (0)