2013-06-02 124 views
0

我寫了一個前景提取內核在Matlab中使用,沒有打印任何東西,所以我將它移植到純Cuda C中,並將大部分邏輯取出。這件事沒有做任何事情,甚至沒有在返回之前打印cuPrintf語句,爲什麼?內核不啓動?

#include <cuda.h> 
#include <stdio.h>  /* printf, scanf, NULL */ 
#include <stdlib.h>  /* calloc, exit, free */ 
#include "cuPrintf.cu" 
#include "utils.h" 
#include <time.h>  /* clock_t, clock, CLOCKS_PER_SEC */ 



__global__ void foreground_extract(  unsigned char* inputImageRed, 
             unsigned char* inputImageGreen, 
             unsigned char* inputImageBlue, 

             unsigned char* outputImageRed, 
             unsigned char* outputImageGreen, 
             unsigned char* outputImageBlue,           

             const int xDim, 
             const int yDim) 
{ 


    cuPrintf("print something \n"); 
    //x = col, y = row 
    //xDim = col_dim, yDim = row_dim 
    int x = threadIdx.x + blockIdx.x * blockDim.x; 
    int y = threadIdx.y + blockIdx.y * blockDim.y; 
    int offset = x + y *blockDim.x *gridDim.x; 

    int nnodes = xDim*yDim; 
    if (offset >= nnodes) return; 


    //test equality 

    outputImageRed[offset] = inputImageRed[offset]; 
    outputImageGreen[offset] = inputImageGreen[offset]; 
    outputImageBlue[offset] = inputImageBlue[offset]; 

    cuPrintf("print something here too \n"); 
    cuPrintf("%d \n", outputImageRed[offset]); 

} 

int main() 
{ 

     int xDim = 3; 
     int yDim = 3; 

             unsigned char* h_inputImageRed; 
             unsigned char* h_inputImageGreen; 
             unsigned char* h_inputImageBlue; 

             unsigned char* h_outputImageRed; 
             unsigned char* h_outputImageGreen; 
             unsigned char* h_outputImageBlue; 


        h_inputImageRed = (unsigned char*) calloc ((xDim*yDim), sizeof(unsigned char)); 
        h_inputImageGreen = (unsigned char*) calloc ((xDim*yDim), sizeof(unsigned char)); 
        h_inputImageBlue = (unsigned char*) calloc ((xDim*yDim), sizeof(unsigned char)); 

        h_outputImageRed = (unsigned char*) calloc ((xDim*yDim), sizeof(unsigned char)); 
        h_outputImageGreen = (unsigned char*) calloc ((xDim*yDim), sizeof(unsigned char)); 
        h_outputImageBlue = (unsigned char*) calloc ((xDim*yDim), sizeof(unsigned char)); 


     //initiate input only 
     unsigned char init =0; 
     for (int i=0; i<(xDim*yDim);i++){ 

              h_inputImageRed[i] = init; 
              h_inputImageGreen[i] = init; 
              h_inputImageBlue[i] = init; 

              init++; 

              printf("%d\n", h_inputImageRed[i]); 

     } 

             //device arrays 
             unsigned char* d_inputImageRed; 
             unsigned char* d_inputImageGreen; 
             unsigned char* d_inputImageBlue; 

             unsigned char* d_outputImageRed; 
             unsigned char* d_outputImageGreen; 
             unsigned char* d_outputImageBlue; 


    //cudaMallocs 

    checkCudaErrors(cudaMalloc((void**)&d_inputImageRed, (sizeof(unsigned char)*xDim*yDim))); 
    checkCudaErrors(cudaMalloc((void**)&d_inputImageGreen, (sizeof(unsigned char)*xDim*yDim))); 
    checkCudaErrors(cudaMalloc((void**)&d_inputImageBlue, (sizeof(unsigned char)*xDim*yDim))); 

    checkCudaErrors(cudaMalloc((void**)&d_outputImageRed, (sizeof(unsigned char)*xDim*yDim))); 
    checkCudaErrors(cudaMalloc((void**)&d_outputImageGreen, (sizeof(unsigned char)*xDim*yDim))); 
    checkCudaErrors(cudaMalloc((void**)&d_outputImageBlue, (sizeof(unsigned char)*xDim*yDim))); 

    //cudaMemcpys, Host to Device 

    checkCudaErrors(cudaMemcpy(d_inputImageRed, h_inputImageRed, (sizeof(unsigned char)*xDim*yDim), cudaMemcpyHostToDevice)); 
    checkCudaErrors(cudaMemcpy(d_inputImageGreen, h_inputImageGreen, (sizeof(unsigned char)*xDim*yDim), cudaMemcpyHostToDevice)); 
    checkCudaErrors(cudaMemcpy(d_inputImageBlue, h_inputImageBlue, (sizeof(unsigned char)*xDim*yDim), cudaMemcpyHostToDevice)); 

    checkCudaErrors(cudaMemcpy(d_outputImageRed, h_outputImageRed, (sizeof(unsigned char)*xDim*yDim), cudaMemcpyHostToDevice)); 
    checkCudaErrors(cudaMemcpy(d_outputImageGreen, h_outputImageGreen, (sizeof(unsigned char)*xDim*yDim), cudaMemcpyHostToDevice)); 
    checkCudaErrors(cudaMemcpy(d_outputImageBlue, h_outputImageBlue, (sizeof(unsigned char)*xDim*yDim), cudaMemcpyHostToDevice)); 

    cudaPrintfInit(); 

    int gridSizeX = ceil(float(xDim/8)); 
    int gridSizeY = ceil(float(yDim/8)); 
    int gridSizeZ = 1; 

    int blockSizeX=8; 
    int blockSizeY=8; 
    int blockSizeZ=1; 

    const dim3 gridSize(gridSizeX,gridSizeY,gridSizeZ); 
    const dim3 blockSize(blockSizeX,blockSizeY,blockSizeZ); 

    foreground_extract <<< gridSize, blockSize >>>(d_inputImageRed, 
                d_inputImageGreen, 
                d_inputImageBlue, 

                d_outputImageRed, 
                d_outputImageGreen, 
                d_outputImageBlue, 

                xDim,yDim); 


     cudaPrintfDisplay(stdout,true); 
     cudaPrintfEnd(); 

     checkCudaErrors(cudaMemcpy(h_outputImageRed, d_outputImageRed, (sizeof(unsigned char)*xDim*yDim), cudaMemcpyDeviceToHost)); 
     checkCudaErrors(cudaMemcpy(h_outputImageGreen, d_outputImageGreen, (sizeof(unsigned char)*xDim*yDim), cudaMemcpyDeviceToHost)); 
     checkCudaErrors(cudaMemcpy(h_outputImageBlue, d_outputImageBlue, (sizeof(unsigned char)*xDim*yDim), cudaMemcpyDeviceToHost)); 

     //free gpu data 
    checkCudaErrors(cudaFree(d_outputImageRed)); 
    checkCudaErrors(cudaFree(d_outputImageGreen)); 
    checkCudaErrors(cudaFree(d_outputImageBlue)); 
    checkCudaErrors(cudaFree(d_inputImageRed)); 
    checkCudaErrors(cudaFree(d_inputImageGreen)); 
    checkCudaErrors(cudaFree(d_inputImageBlue)); 

    //free host data 
    free(h_outputImageRed); 
    free(h_outputImageGreen); 
    free(h_outputImageBlue); 
    free(h_inputImageRed); 
    free(h_inputImageGreen); 
    free(h_inputImageBlue); 



     while(true){} 
     return 0; 
} 

回答

3

你的內核沒有啓動,這就是爲什麼你從內核的printf沒有輸出。 如果你在內核啓動上做了適當的cuda error checking,你會發現這一點。

內核啓動返回的錯誤是invalid configuration argument

您正在傳遞無效值gridSize.xgridSize.y

如果你想看看它們是什麼,請在調用內核之前將它們打印出來。 (一般調試提示。)

讓我們來看看這條線,因爲它不是做你認爲:

int gridSizeX = ceil(float(xDim/8)); 
          ^^ 
           both values inside the parenthesis are *integers* 

您還沒有投任何這些值(xDim8)的一個float。所以主機編譯器使用整數除法解決圓括號內的數量。 3/8的整數除法爲零。此後沒有任何變化的價值。還是零。

+0

謝謝。不知道關於內核啓動時的錯誤檢查,我剛剛使用了checkCudaErrors,就像你上面看到的那樣。有幫助和徹底的答案,雖然也許你可能試圖聽起來不那麼沉悶?是? :) – andandandand

+0

那裏。稍微不太敏捷。 –