I have a simple CUDA kernel, which checks the distance squares D2 in set of (x_inp,y_inp,z_inp) points given as an input. I want to know, HOW MANY pairs could be formed with the j-th point in case of a limitation, that their distance must be less than 1000000. I want to store number of such cases in N_OK. But the N_OK is always 1. Is there another way how to get this information?
__global__void GPUCode( float* x_inp, float* y_inp, float* z_inp, int N)
{
const uint bid = blockIdx.y * gridDim.x + blockIdx.x;
const uint tid = threadIdx.x;
const uint idx = bid * blockDim.x + tid;
if (idx >= N) return;
float x = x.inp[idx];
float y = y.inp[idx];
float z = z.inp[idx];
for(int j = 0; j < N; j++)
{
int N_OK = 0;
if (j != idx)
{
float dx = (float)(x.inp[j] - x);
float dy = (float)(y.inp[j] - y);
float dz = (float)(z.inp[j] - z);
float D2 = dx*dx + dy*dy + dz*dz;
if (D2 < 1000000)
{
N_OK = N_OK + 1;
printf("%f \n", D2);
}
}
printf("%d \n", N_OK);
}
}
Aucun commentaire:
Enregistrer un commentaire