我做了一個簡單的CUDA內核,由於某些原因無法啓動,我不明白。 下面你看到我的全球變量。內核啓動失敗內核參數的原因
unsigned int volume[256*256*256];//contains volume data of source
unsigned int target[256*256*256];//contains volume data of target
unsigned int* d_volume=NULL;//source data on device
unsigned int* d_target=NULL;//target data on device
下一個函數是內核啓動器。
void launch_kernel(){
cudaMalloc(&d_volume,256*256*256*sizeof(unsigned int));
cudaMemcpy(d_volume, volume, 256*256*256*sizeof(unsigned int),cudaMemcpyHostToDevice);
cudaMalloc(&d_target,256*256*256*sizeof(unsigned int));
cudaMemcpy(d_target, target, 256*256*256*sizeof(unsigned int),cudaMemcpyHostToDevice);
dim3 threads(256,1,1);
dim3 blocks(256,256,1);
simple_kernel<<<blocks,threads>>>(d_volume,d_target);
cudaError_t cudaResult;
cudaResult = cudaGetLastError();
if (cudaResult != cudaSuccess)
{
cout<<"kernel failed"<<endl;
}
cudaMemcpy(volume, d_volume, 256*256*256*sizeof(int),cudaMemcpyDeviceToHost);
cudaFree(d_volume);
cudaMemcpy(target, d_target 256*256*256*sizeof(int),cudaMemcpyDeviceToHost);
cudaFree(d_target);
}
問題似乎是d_target
原因,如果我推出這樣的內核:
simple_kernel<<<blocks,threads>>>(d_volume,d_volume);
它可以正常使用(傳遞到設備必須傳遞的值),並沒有出現任何信息。任何想法爲什麼會發生? 內核聲明如下。
__global__ void simple_kernel(unsigned int* src,unsigned int* tgt){
//i dont think it matters what it is for.
int x = threadIdx.x;
int y = blockIdx.x;
int z = blockIdx.y;
if(x!=0 || x!=255 || y!=0 || y!=255 || z!=0 || z!=255 ){//in bound of memory allocated
if(src[x*256*256+y*256+z]==tgt[x*256*256+y*256+z])
if(tgt[(x+1)*256*256+y*256+z]==1 || tgt[(x-1)*256*256+y*256+z]==1 || tgt[(x-1)*256*256+(y+1)*256+z] ||tgt[(x-1)*256*256+(y-1)*256+z])
src[x*256*256+y*256+z]=1;
else
src[x*256*256+y*256+z]=0;
}
}
'cudaGetLastError()'返回的錯誤代碼是什麼? – stuhlo 2013-04-06 00:24:34
cout << cudaGetErrorString(cudaGetLastError())<< endl;返回:「沒有錯誤」 – 2013-04-06 00:52:06
「'啓動失敗'是什麼意思? – stuhlo 2013-04-06 00:56:15