2016-12-03 131 views
-1

我嘗試使用CUDA和Qt來模糊圖像。 我用NPP庫,nppiFilterGauss_8u_C1R的偉大工程CUDA NPP GaussFilter破壞圖像

void cuda_npp_gauss_filter_qt(uchar* pSourceData, uchar* pResultData, const int &ImageLineStep, const int &ImageWidth, const int &ImageHeight) 
{ 
     NppiSize oSizeROI; 

     oSizeROI.width = ImageWidth; 
     oSizeROI.height = ImageHeight; 

     Npp32s SourceStep = ImageLineStep; 
     Npp32s DestinationStep = ImageLineStep; 

     size_t AllocationSizeInBytes = ImageLineStep * ImageHeight; 
     Npp8u *pSource, *pDestination; 

     cudaMalloc<Npp8u>(&pSource,AllocationSizeInBytes); 
     cudaMalloc<Npp8u>(&pDestination,AllocationSizeInBytes); 

     cudaMemcpy(pSource, pSourceData, AllocationSizeInBytes, cudaMemcpyHostToDevice); 

     nppiFilterGauss_8u_C1R(pSource, SourceStep, pDestination, DestinationStep, oSizeROI, NPP_MASK_SIZE_15_X_15); 

     cudaMemcpy(pResultData, pDestination, AllocationSizeInBytes, cudaMemcpyDeviceToHost); 
} 

但nppiFilterGaussAdvanced_8u_C1R破壞圖像

void cuda_npp_gauss_filter_qt_advanced(uchar* pSourceData, uchar* pResultData, const int &ImageLineStep, const int &ImageWidth, const int &ImageHeight, const int &Radius) 
{ 
     NppiSize oSizeROI; 

     oSizeROI.width = ImageWidth; 
     oSizeROI.height = ImageHeight; 

     Npp32s SourceStep = ImageLineStep; 
     Npp32s DestinationStep = ImageLineStep; 

     size_t AllocationSizeInBytes = ImageLineStep * ImageHeight; 
     Npp8u *pSource, *pDestination; 

     cudaMalloc<Npp8u>(&pSource,AllocationSizeInBytes); 
     cudaMalloc<Npp8u>(&pDestination,AllocationSizeInBytes); 

     //------------------------------------------------------- 

     int nFilterTaps = 2*((int)((float)ceil(Radius) + 0.5F)) + 1; 

     //------------------------------------------------------- 

     //------------------------------------------------------- 
     //-------------- Gaussian kernel ------------------------ 

     double sigma = 0.3*((nFilterTaps-1)*0.5 - 1) + 0.8; 

     cv::Mat GaussianKernel = cv::getGaussianKernel(nFilterTaps, sigma, CV_32F); 

     //------------------------------------------------------- 
     //------------------------------------------------------- 

     cudaMemcpy(pSource, pSourceData, AllocationSizeInBytes, cudaMemcpyHostToDevice); 

     nppiFilterGaussAdvanced_8u_C1R (pSource, SourceStep, pDestination, DestinationStep, oSizeROI, nFilterTaps, (Npp32f*)GaussianKernel.data); 

     cudaMemcpy(pResultData, pDestination, AllocationSizeInBytes, cudaMemcpyDeviceToHost); 
    } 

我使用OpenCV的創建高斯內核。

Xubuntu 16.04.1,Qt 5.7-1,CUDA 8.044,OpenCV 2.4.9。 謝謝。

+2

1.在SO上,類型問題「爲什麼這個代碼不工作?」預計將包括[mcve]。你所顯示的不是一個完整的代碼,也不是一個MCVE。 2.你應該總是做適當的錯誤檢查。檢查所有npp調用npp錯誤,檢查所有cuda調用CUDA錯誤,並在*請求其他人尋求幫助之前使用'cuda-memcheck'運行你的代碼。即使您不瞭解錯誤輸出,對於其他嘗試幫助您的人也會有所幫助。 3.我認爲高斯內核數據應該是一個設備指針,而不是主機指針。您應該將該數據傳輸到設備內存 –

+2

您還需要將工作偏移到圖像的中心。您無法獲得等於源圖像大小和目標圖像大小的ROI大小,因爲應用高斯內核時,它需要每邊的元素半徑。用'cuda-memcheck'運行你的代碼肯定會指出問題。您需要按照我所指示的方向進行調整,直到'cuda-memcheck'報告沒有錯誤。那麼至少你會知道你並沒有要求NPP訪問數據越界。 –

回答

3

NPP功能需要在設備上分配內存。 OpenCV Mat(在這種情況下爲GaussianKernel)默認分配給主機。

所以下面這行代碼變得無效。

nppiFilterGaussAdvanced_8u_C1R (pSource, SourceStep, pDestination, DestinationStep, oSizeROI, nFilterTaps, (Npp32f*)GaussianKernel.data); 

參數(Npp32f*)GaussianKernel.data在傳遞給NPP函數之前應該被複制到設備。它可能是這樣實現的:

float* GaussianKernel_d; 
size_t GaussianKernelBytes = GaussianKernel.step() * GaussianKernel.rows; 

cudaMalloc<float>(&GaussianKernel_d, GaussianKernelBytes); 
cudaMemcpy(GaussianKernel_d, GaussianKernel.data, GaussianKernelBytes, cudaMemcpyHostToDevice); 

nppiFilterGaussAdvanced_8u_C1R (pSource, SourceStep, pDestination, DestinationStep, oSizeROI, nFilterTaps, GaussianKernel_d); 
0

感謝您的幫助。它現在有效。

//------------------------------------------------------- 
//-------------- Gaussian kernel ------------------------ 

    double sigma = 0.3*((nFilterTaps-1)*0.5 - 1) + 0.8; 

    cv::Mat GaussianKernel = cv::getGaussianKernel(nFilterTaps, sigma, CV_32F); 

    Npp32f* pGaussianKernel; 

    size_t GaussianKernelBytes = GaussianKernel.step * GaussianKernel.rows; 

    cudaMalloc<Npp32f>(&pGaussianKernel, GaussianKernelBytes); 

    cudaMemcpy(pGaussianKernel, GaussianKernel.data, GaussianKernelBytes, cudaMemcpyHostToDevice); 

//------------------------------------------------------- 
//------------------------------------------------------- 

cudaMemcpy(pSource, pSourceData, AllocationSizeInBytes, cudaMemcpyHostToDevice); 

nppiFilterGaussAdvanced_8u_C1R (pSource, SourceStep, pDestination, DestinationStep, oSizeROI, nFilterTaps, pGaussianKernel); 

cudaMemcpy(pResultData, pDestination, AllocationSizeInBytes, cudaMemcpyDeviceToHost); 
+0

很高興知道:)。你可以考慮接受答案。 – sgarizvi