0
我在嘗試弄清楚如何從GPU檢索3D陣列時遇到問題。 我想爲主機代碼中的3d數組分配內存,調用內核,數組將被填充,然後將主機代碼中的3D數組檢索到mexFunction(主代碼)中的返回變量。CUDA檢索3D陣列
我已經做了幾次嘗試,這是我的最新代碼。結果都是'0',他們應該是'7'。誰能告訴我我哪裏出錯了?這可能與3D參數有關,我不認爲我完全理解這一部分。
simulate3DArrays.cpp
/* Device code */
__global__ void simulate3DArrays(cudaPitchedPtr devPitchedPtr,
int width,
int height,
int depth)
{
int threadId;
threadId = (blockIdx.x * blockDim.x) + threadIdx.x;
size_t pitch = devPitchedPtr.pitch;
for (int widthIndex = 0; widthIndex < width; widthIndex++) {
for (int heightIndex = 0; heightIndex < height; heightIndex++) {
*((double*)(((char*)devPitchedPtr.ptr + threadId * pitch * height) + heightIndex * pitch) + widthIndex) = 7.0;
}
}
}
mexFunction.cu
/* Host code */
#include <stdio.h>
#include "mex.h"
/* Kernel function */
#include "simulate3DArrays.cpp"
/* Define some constants. */
#define width 5
#define height 9
#define depth 6
void displayMemoryAvailability(mxArray **MatlabMemory);
void mexFunction(int nlhs,
mxArray *plhs[],
int nrhs,
mxArray *prhs[])
{
double *output;
mwSize ndim3 = 3;
mwSize dims3[] = {height, width, depth};
plhs[0] = mxCreateNumericArray(ndim3, dims3, mxDOUBLE_CLASS, mxREAL);
output = mxGetPr(plhs[0]);
cudaExtent extent = make_cudaExtent(width * sizeof(double), height, depth);
cudaPitchedPtr devicePointer;
cudaMalloc3D(&devicePointer, extent);
simulate3DArrays<<<1,depth>>>(devicePointer, width, height, depth);
cudaMemcpy3DParms deviceOuput = { 0 };
deviceOuput.srcPtr.ptr = devicePointer.ptr;
deviceOuput.srcPtr.pitch = devicePointer.pitch;
deviceOuput.srcPtr.xsize = width;
deviceOuput.srcPtr.ysize = height;
deviceOuput.dstPtr.ptr = output;
deviceOuput.dstPtr.pitch = devicePointer.pitch;
deviceOuput.dstPtr.xsize = width;
deviceOuput.dstPtr.ysize = height;
deviceOuput.kind = cudaMemcpyDeviceToHost;
/* copy 3d array back to 'ouput' */
cudaMemcpy3D(&deviceOuput);
return;
} /* End Mexfunction */
您使用的每個API調用都返回一個錯誤代碼。您應該檢查所有這些以查看是否發生錯誤。它將幫助您以更高的精度確定確切的問題。 – talonmies 2012-08-09 05:38:05