0
我在cuda中爲Visual Studio 2010中的圖像處理製作算法。在我的編碼中,我遇到了處理線程和cuda塊的問題。所以我的C和CUDA示例代碼如下,C代碼工作正常,但CUDA代碼不能正常工作。 我的C代碼Cuda的線程和塊不能正常工作
void checkGpuBlockValue(unsigned int *a,unsigned int *b,int length)
{
for(int i=0;i<length;i++){
b[i]=a[i]+i;
}
}
int main()
{
const int range=1000;
unsigned int *a=new unsigned int[range];
unsigned int *b=new unsigned int[range];
for(int i=0;i<range;i++)
{
a[i]=i;
}
checkGpuBlockValue(a,b,range);
for(int j=0;j<range;j++)
{
cout<<"b["<<j<<"] = "<<b[j]<<std::endl;
}
}
輸出=
OutPut :
b[0] = 0
b[1] = 2
b[2] = 4
b[3] = 6
b[4] = 8
.
.
.
.
.
b[996] = 1992
b[997] = 1994
b[998] = 1996
b[999] = 1998
能正常工作。
我的CUDA代碼(工作不正常)是;
__global__
void checkGpuBlockValue(unsigned int *a,unsigned int *b,int length)
{
unsigned int i = (blockIdx.x * blockDim.x) + threadIdx.x;
if(i<length){
b[i]=a[i]+i;
}
}
int main()
{
const int range=1000;
unsigned int *a=new unsigned int[range];
unsigned int *b=new unsigned int[range];
unsigned int *dev_a;
unsigned int *dev_b;
for(int i=0;i<range;i++)
{
a[i]=i;
}
cudaMalloc((void**)&dev_a, range* sizeof(unsigned int));
cudaMalloc((void**)&dev_b, range* sizeof(unsigned int));
cudaMemcpy(dev_a, a, range, cudaMemcpyHostToDevice);
cudaMemcpy(dev_b, a, range, cudaMemcpyHostToDevice);
static const int BLOCK_WIDTH = 8;
//1024 is the maximum number of threads per block for modern GPUs.
int x = static_cast<int>(ceilf(static_cast<float>(range)/BLOCK_WIDTH));
const dim3 grid (x,1);
const dim3 block(BLOCK_WIDTH,1);
checkGpuBlockValue<<<grid,block>>>(dev_a,dev_b,range);
cudaDeviceSynchronize();
cudaMemcpy(b, dev_b, range, cudaMemcpyDeviceToHost);
for(int j=0;j<range;j++)
{
cout<<"b["<<j<<"] = "<<b[j]<<std::endl;
}
cudaFree(dev_a);
cudaFree(dev_b);
}
OUT PUT是:
Out Put =
b[0] = 0
b[1] = 2
b[2] = 4
b[3] = 6
.
.
.
.
.
b[242] = 484
b[243] = 486
b[244] = 488
b[245] = 490
b[246] = 492
b[247] = 494
b[248] = 496
b[249] = 498
b[250] = 3452816845
b[251] = 3452816845
b[252] = 3452816845
b[253] = 3452816845
b[254] = 3452816845
b[255] = 3452816845
b[256] = 3452816845
.
.
.
.
.
.
b[996] = 3452816845
b[997] = 3452816845
b[998] = 3452816845
b[999] = 3452816845
在我的代碼1M在INT * a和比添加puting值0到1000 *一個帶有值從0到1000和結果在int值存儲* b。 所以我的代碼在0到249(高達250)循環中工作良好,但在250之後它給出了錯誤的值。 那麼我在這裏做什麼錯了?請給我建議。
謝謝薩加爾先生。現在工作正常。我接受我的錯誤。 – Jay