0
在這個簡短的例子中,我試圖通過一個帶有指針的struct
init在cuda設備內存中傳遞一個表。複製到主機 - >設備,設備 - >主機似乎可行,但在`_ global _ function nothing works. Values for
dA`爲空,我無法更改它們。傳遞一個指向CUDA設備內存的指針init init
我不知道如何從價值A
複製到dA
。如果我使用這樣的基本表格3210它可以工作,但這裏不是我想要做的。這是代碼:
#include<assert.h>
#include <cuda.h>
#include <stdio.h>
#include <iostream>
#include <iomanip>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
#define N 5// side of matrix containing data
#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
typedef struct {float re,im;} fcomplex;
__global__ void kernel(fcomplex * da)
{
int x = threadIdx.x;
int y = threadIdx.y;
int i = (N*y) + x;
//da[i].re += 2;
printf("%f \n",da[i].re);
}
int main(int argc, char * argv[])
{
fcomplex *dA,**A,**B;
A= (fcomplex **)malloc(N * sizeof(fcomplex*));
B=(fcomplex **)malloc(N * sizeof(fcomplex* ));
for (int i = 0; i < N; i++){
A[i] = (fcomplex *)malloc(N * sizeof(fcomplex));
B[i] = (fcomplex *)malloc(N * sizeof(fcomplex));
}
for (int i = 0; i < N; i++)
{ for (int d= 0; d < N; d++)
{
A[i][d].re = i*d;
A[i][d].im = i*d;
}
}
checkCudaErrors(cudaMalloc((void **)&dA, (size_t)(sizeof(fcomplex)*N*N)));
checkCudaErrors(cudaMemcpy(dA,A,N*N*sizeof(fcomplex),cudaMemcpyHostToDevice));
const dim3 blockSize(N,N);
const dim3 gridSize(1,1);
kernel<<<gridSize,blockSize>>>(dA);
checkCudaErrors(cudaThreadSynchronize());
checkCudaErrors(cudaGetLastError());
checkCudaErrors(cudaMemcpy(B, dA, sizeof(fcomplex)*N*N, cudaMemcpyDeviceToHost));
for (int i = 0; i < N; i++)
{ for (int d= 0; d < N; d++)
{
printf("%f-%f\n",A[i][d].re,B[i][d].re);
printf("%f-%f\n",A[i][d].im,B[i][d].im);
}
}
//verify(A,B,N);
free(A);
free(B);
cudaFree(dA);
//cudaFree(dB);
}
void verify(fcomplex ** A, fcomplex ** B, int size)
{
for (int i = 0; i < size; i++)
{ for (int d= 0; d < size; d++)
{
assert(A[i][d].re==B[i][d].re);
}
}
printf("Correct!");
}
完美,非常感謝。 :) – volty41 2013-04-30 11:18:27