我們有以下結構定義CUDA:主機存儲器指針不會被複制到設備存儲器
typedef struct PurchaseOrder
{
char* Value1;
double Value2;
double* Value3;
int Value3Length;
__device__ int GetValue3Length() { return Value3Length; }
__device__ double GetValue3(int i) { return Value3[i]; }
__device__ void SetValue3(int i, double value) { Value3[i] = value; }
};
的PurchaseOrder的數據(結構的數組)從C#應用程序被整理成下面的C DLL函數
int RunMonteCarlo(PurchaseOrder *hostPurchaseOrders, int length) {
PurchaseOrder *devPurchaseOrders;
// display the results
for (int i = 0; i < length; i++)
{
//printf("\n\nAddress: %u",hostPurchaseOrders+i);
printf("\n\nIndex: %d", i);
printf("\nValue1: %s",(hostPurchaseOrders+i)->Value1);
printf("\nValue2: %f",(hostPurchaseOrders+i)->Value2);
for(int j = 0; j < (hostPurchaseOrders+i)->Value3Length; j++)
{
printf("\nValue3[%d]: %fl", j, (hostPurchaseOrders+i)->Value3[j]);
}
}
// allocate the memory on the GPU
HANDLE_ERROR(cudaMalloc((void**)&devPurchaseOrders, length * sizeof(PurchaseOrder)));
// copy the array 'PurchaseOrder' to the GPU
HANDLE_ERROR(cudaMemcpy(devPurchaseOrders, hostPurchaseOrders, length * sizeof(PurchaseOrder), cudaMemcpyHostToDevice));
// Run the kernel code
MonteCarloKernel<<<60,32>>>(devPurchaseOrders, length);
// copy the array 'PurchaseOrders' back from the GPU to the CPU
HANDLE_ERROR(cudaMemcpy(hostPurchaseOrders, devPurchaseOrders, length * sizeof(PurchaseOrder), cudaMemcpyDeviceToHost));
// free the memory allocated on the GPU
HANDLE_ERROR(cudaFree(devPurchaseOrders));
return 0;
}
__global__ void MonteCarloKernel(PurchaseOrder *purchaseorders, long length) {
int i = threadIdx.x + blockIdx.x * blockDim.x;
int stride = blockDim.x * gridDim.x;
while (i < length)
{
purchaseorders[i].SetAAUS(1.11);
for (int j=0; j < purchaseorders[i].GetValue3Length(); j++)
{
//purchaseorders[i].SetValue3(j,1.0);
}
i += stride;
}
}
數據經過正確的編組,正如printf代碼在開始時驗證的那樣。
然而,值3(雙陣列)似乎不會被複制到內核設備存儲器作爲行個PurchaseOrders [I] .SetValue3(J,1.0)的應用程序崩潰。
我應該怎麼做才能解決這個問題呢?
當應用程序崩潰時,控制檯窗口剛剛關閉。我可以使用什麼調試技術來獲得一些有意義的消息?