2013-01-16 115 views
3

此問題以前已被問過,但提問者沒有提供足夠的信息並且沒有回答,我對該程序感到好奇。在灰度jpg圖像上應用Sobel邊緣檢測與CUDA和OpenCV

Original Question Link

我試圖做一個Sobel邊緣檢測同時使用的OpenCV和CUDA庫, 的X方向的索貝爾內核

-1 0 1 
-2 0 2 
-1 0 1 

我已經在我的項目3個文件

main.cpp 
CudaKernel.cu 
CudaKernel.h 

main.cpp

#include <stdlib.h> 
#include <iostream> 
#include <string.h> 
#include <Windows.h> 
#include <opencv2\core\core.hpp> 
#include <opencv2\highgui\highgui.hpp> 
#include <opencv2\gpu\gpu.hpp> 
#include <cuda_runtime.h> 
#include <cuda_gl_interop.h> 
#include "CudaKernel.h" 

using namespace cv; 
using namespace std; 


int main(int argc, char** argv) 
{ 
    IplImage* image; 

    try 
    { 
     image = cvLoadImage("4555472_460s.jpg", CV_LOAD_IMAGE_GRAYSCALE); 
     gpu::DeviceInfo info = gpu::getDevice(); 
     cout << info.name() << endl; 
     cout << "Stream Processor : "<< info.multiProcessorCount() << endl; 
     cout << "Total Graphic Memory :" << info.totalMemory()/1048576 << " MB" << endl; 
    } 
    catch (const cv::Exception* ex) 
    { 
     cout << "Error: " << ex->what() << endl; 
    } 
    if(!image) 
     { 
      cout << "Could not open or find the image" << std::endl ; 
      return -1; 
     } 


    IplImage* image2=cvCreateImage(cvGetSize(image),IPL_DEPTH_32F,image->nChannels); 
    IplImage* image3=cvCreateImage(cvGetSize(image),IPL_DEPTH_32F,image->nChannels); 

    unsigned char * pseudo_input=(unsigned char *)image->imageData; 
    float *output=(float*)image2->imageData; 
    float *input=(float*)image3->imageData; 
    int s=image->widthStep/sizeof(float); 
     for(int w=0;w<=(image->height);w++) 
      for(int h=0;h<(image->width*image->nChannels);h++) 
      { 
       input[w*s+h]= pseudo_input[w*s+h]; 
      } 


    Pixel *fagget = (unsigned char*) image->imageData; 
    kernelcall(input, output, image->width,image->height, image->widthStep); 

// cv::namedWindow("Display window", CV_WINDOW_AUTOSIZE);// Create a window for display. 
    cvShowImage("Original Image", image); // Show our image inside it. 
    cvShowImage("Sobeled Image", image2); 
    waitKey(0); // Wait for a keystroke in the window 
    return 0; 

} 

CudaKernel.cu

#include<cuda.h> 
#include<iostream> 
#include "CudaKernel.h" 
using namespace std; 
#define CudaSafeCall(err) __cudaSafeCall(err, __FILE__, __LINE__) 
#define CudaCheckError() __cudaCheckError(__FILE__, __LINE__) 
#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) 


texture <float,2,cudaReadModeElementType> tex1; 
texture<unsigned char, 2> tex; 
static cudaArray *array = NULL; 
static cudaArray *cuArray = NULL; 


//Kernel for x direction sobel 
__global__ void implement_x_sobel(float* garbage,float* output,int width,int height,int widthStep) 
{ 
    int x=blockIdx.x*blockDim.x+threadIdx.x; 
    int y=blockIdx.y*blockDim.y+threadIdx.y; 

    float output_value=((0*tex2D(tex1,x,y))+(2*tex2D(tex1,x+1,y))+(-2*tex2D(tex1,x- 1,y))+(0*tex2D(tex1,x,y+1))+(1*tex2D(tex1,x+1,y+1))+(-1*tex2D(tex1,x-1,y+1))+ (1*tex2D(tex1,x+1,y-1))+(0*tex2D(tex1,x,y-1))+(-1*tex2D(tex1,x-1,y-1))); 
    output[y*widthStep+x]=output_value; 
} 


inline void __checkCudaErrors(cudaError err, const char *file, const int line) 
{ 
    if(cudaSuccess != err) { 
     fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", 
       file, line, (int)err, cudaGetErrorString(err)); 
     exit(-1); 
    } 
} 

//Host Code 
inline void __cudaSafeCall(cudaError err, const char *file, const int line) 
{ 
#ifdef CUDA_ERROR_CHECK 
if (cudaSuccess != err) 
{ 
    printf("cudaSafeCall() failed at %s:%i : %s\n", 
      file, line, cudaGetErrorString(err)); 
    exit(-1); 
}  
#endif 

return; 
} 
inline void __cudaCheckError(const char *file, const int line) 
{ 
#ifdef CUDA_ERROR_CHECK 
cudaError err = cudaGetLastError(); 
if (cudaSuccess != err) 
{ 
    printf("cudaCheckError() failed at %s:%i : %s\n", 
      file, line, cudaGetErrorString(err)); 
    exit(-1); 
} 
#endif 

return; 
} 

void kernelcall(float* input,float* output,int width,int height,int widthStep){ 
    //cudaChannelFormatDesc channelDesc=cudaCreateChannelDesc(32,32,0,0,cudaChannelFormatKindFloat); 
    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>(); 
    //cudaArray *cuArray; 
    CudaSafeCall(cudaMallocArray(&cuArray,&channelDesc,width,height)); 
    cudaMemcpyToArray(cuArray,0,0,input,widthStep*height,cudaMemcpyHostToDevice); 

    tex1.addressMode[0]=cudaAddressModeClamp; 
    tex1.addressMode[1]=cudaAddressModeClamp; 
    tex1.filterMode=cudaFilterModeLinear; 
    cudaBindTextureToArray(tex1,cuArray,channelDesc); 
    tex1.normalized=false; 
    float * D_output_x; 
    float * garbage=NULL; 
    CudaSafeCall(cudaMalloc(&D_output_x,widthStep*height)); 
    dim3 blocksize(16,16); 
    dim3 gridsize; 
    gridsize.x=(width+blocksize.x-1)/blocksize.x; 
    gridsize.y=(height+blocksize.y-1)/blocksize.y; 

    implement_x_sobel<<<gridsize,blocksize>>>(garbage,D_output_x,width,height,widthStep/sizeof(float)); 
    cudaThreadSynchronize(); 
    CudaCheckError(); 
    CudaSafeCall(cudaMemcpy(output,D_output_x,height*widthStep,cudaMemcpyDeviceToHost)); 
    cudaFree(D_output_x); 
    cudaFree(garbage); 
    cudaFreeArray(cuArray); 
} 

結果真的搞砸了,它沒有像原始圖像的所有

結果:

Incorrect Result

我改變了一些代碼行到

float *pseudo_input=(float *)image->imageData; 
float *output=(float*)image2->imageData; 
float *input=(float*)image3->imageData; 
float *inputnormalized=(float *)image4->imageData; 

int s=image->widthStep/sizeof(float); 
for(int w=0;w<=(image->height);w++) 
    for(int h=0;h<(image->width*image->nChannels);h++) 
    { 
     input[w*s+h]= pseudo_input[w*s+h]; 
    } 


kernelcall(input, output, image->width,image->height, image->widthStep); 

cvNormalize(input,inputnormalized,0,255,NORM_MINMAX, CV_8UC1); 

cvShowImage("Original Image", image); // Show our image inside it. 
cvShowImage("Sobeled Image", image2); 

但現在我得到一個未處理的異常錯誤。

+1

一般來說,建議您在開始賞金一個問題,而不是問一個可能的重複。 –

+0

如果你只是搜索一個gpu sobel過濾器,gpu opencv提供了幾個過濾器函數,看起來他們有一個[sobel過濾器](http://docs.opencv.org/modules/gpu/doc/image_filtering.html#gpu -createderivfilter-gpu)。 – hubs

回答

4

的OpenCV規則號1:

不要直接通過底層數據指針訪問圖像數據,除非 絕對必要的,e.g複製數據到GPU。參考(ME:P)

錯誤/建議:

  1. 通過經由圖像數據 指針循環圖像轉換代替,使用cvConvert改變圖像數據類型。循環非常容易出錯。

  2. 當調用您傳遞float圖像的 數據指針命名kernelcall功能,但經過 原來的8位圖像的widthStep。這是導致錯誤結果的主要原因,因爲 它會導致內核中的索引不正確。

  3. 當在具有不同寬度步距的兩個傾斜指針之間執行存儲器複製時,總是在CUDA運行時中使用可用的 二維存儲器複製函數,例如, cudaMemcpy2D,cudaMemcpy2DToArray等。在您的情況下,cuArray內部具有未知的widthstep,並且輸入IplImagecuArray具有不同的widthStep。

  4. 避免不必要的標題,賦值和標識符聲明。

  5. 在CUDA內核中添加綁定檢查,以便只有那些線程執行內存讀/寫操作才能進入圖像內部。它可能會導致一點點的分歧,但它比無效的內存讀取/寫入更好。

修改過的代碼(測試):

Main.cpp的

#include <iostream> 
#include <opencv2/opencv.hpp> 
#include "CudaKernel.h" 

using namespace cv; 
using namespace std; 

int main(int argc, char** argv) 
{ 
    IplImage* image; 

    image = cvLoadImage("4555472_460s.jpg", CV_LOAD_IMAGE_GRAYSCALE); 

    if(!image) 
    { 
     cout << "Could not open or find the image" << std::endl; 
     return -1; 
    } 


    IplImage* image2 = cvCreateImage(cvGetSize(image),IPL_DEPTH_32F,image->nChannels); 
    IplImage* image3 = cvCreateImage(cvGetSize(image),IPL_DEPTH_32F,image->nChannels); 

    //Convert the input image to float 
    cvConvert(image,image3); 

    float *output = (float*)image2->imageData; 
    float *input = (float*)image3->imageData; 

    kernelcall(input, output, image->width,image->height, image3->widthStep); 

    //Normalize the output values from 0.0 to 1.0 
    cvScale(image2,image2,1.0/255.0); 

    cvShowImage("Original Image", image); 
    cvShowImage("Sobeled Image", image2); 
    cvWaitKey(0); 
    return 0; 
} 

CudaKernel.cu

#include<cuda.h> 
#include<iostream> 
#include "CudaKernel.h" 

using namespace std; 

#define CudaSafeCall(err) __cudaSafeCall(err, __FILE__, __LINE__) 
#define CudaCheckError() __cudaCheckError(__FILE__, __LINE__) 
#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) 


texture <float,2,cudaReadModeElementType> tex1; 

static cudaArray *cuArray = NULL; 

//Kernel for x direction sobel 
__global__ void implement_x_sobel(float* output,int width,int height,int widthStep) 
{ 
    int x = blockIdx.x * blockDim.x + threadIdx.x; 
    int y = blockIdx.y * blockDim.y + threadIdx.y; 

    //Make sure that thread is inside image bounds 
    if(x<width && y<height) 
    { 
     float output_value = (-1*tex2D(tex1,x-1,y-1)) + (0*tex2D(tex1,x,y-1)) + (1*tex2D(tex1,x+1,y-1)) 
          + (-2*tex2D(tex1,x-1,y)) + (0*tex2D(tex1,x,y)) + (2*tex2D(tex1,x+1,y)) 
          + (-1*tex2D(tex1,x-1,y+1)) + (0*tex2D(tex1,x,y+1)) + (1*tex2D(tex1,x+1,y+1)); 

     output[y*widthStep+x]=output_value; 
    } 

} 


inline void __checkCudaErrors(cudaError err, const char *file, const int line) 
{ 
    if(cudaSuccess != err) { 
     fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", 
      file, line, (int)err, cudaGetErrorString(err)); 
     exit(-1); 
    } 
} 

//Host Code 
inline void __cudaSafeCall(cudaError err, const char *file, const int line) 
{ 
#ifdef CUDA_ERROR_CHECK 
    if (cudaSuccess != err) 
    { 
     printf("cudaSafeCall() failed at %s:%i : %s\n", 
      file, line, cudaGetErrorString(err)); 
     exit(-1); 
    }  
#endif 

    return; 
} 
inline void __cudaCheckError(const char *file, const int line) 
{ 
#ifdef CUDA_ERROR_CHECK 
    cudaError err = cudaGetLastError(); 
    if (cudaSuccess != err) 
    { 
     printf("cudaCheckError() failed at %s:%i : %s\n", 
      file, line, cudaGetErrorString(err)); 
     exit(-1); 
    } 
#endif 

    return; 
} 

void kernelcall(float* input,float* output,int width,int height,int widthStep) 
{ 
    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>(); 

    CudaSafeCall(cudaMallocArray(&cuArray,&channelDesc,width,height)); 

    //Never use 1D memory copy if host and device pointers have different widthStep. 
    // You don't know the width step of CUDA array, so its better to use cudaMemcpy2D... 
    cudaMemcpy2DToArray(cuArray,0,0,input,widthStep,width * sizeof(float),height,cudaMemcpyHostToDevice); 

    cudaBindTextureToArray(tex1,cuArray,channelDesc); 

    float * D_output_x; 
    CudaSafeCall(cudaMalloc(&D_output_x,widthStep*height)); 

    dim3 blocksize(16,16); 
    dim3 gridsize; 
    gridsize.x=(width+blocksize.x-1)/blocksize.x; 
    gridsize.y=(height+blocksize.y-1)/blocksize.y; 

    implement_x_sobel<<<gridsize,blocksize>>>(D_output_x,width,height,widthStep/sizeof(float)); 

    cudaThreadSynchronize(); 
    CudaCheckError(); 

    //Don't forget to unbind the texture 
    cudaUnbindTexture(tex1); 

    CudaSafeCall(cudaMemcpy(output,D_output_x,height*widthStep,cudaMemcpyDeviceToHost)); 

    cudaFree(D_output_x); 
    cudaFreeArray(cuArray); 
} 
+0

你的代碼工作完美,更簡單,謝謝指出我的錯誤!我真的很感激它 – user1979092

+0

你的代碼是非常有幫助的。但我有一個問題:爲什麼使用float數組來存儲輸入和輸出?我可以使用unsigned char數組嗎? – hakunami

+0

因爲OP打算使用它,所以我使用了float。是的,你可以使用'unsigned char'。在這種情況下,您必須創建深度爲「IPL_DEPTH_8U」的主機映像「image2」和「image3」。 – sgarizvi

0
Here:- 

unsigned char * pseudo_input=(unsigned char *)image->imageData; 
float *output=(float*)image2->imageData; 
float *input=(float*)image3->imageData; 
int s=image->widthStep/sizeof(float); 
    for(int w=0;w<=(image->height);w++) 
     for(int h=0;h<(image->width*image->nChannels);h++) 
     { 
      input[w*s+h]= pseudo_input[w*s+h]; 
     } 

輸入是浮子*和pseudo_input是UCHAR *。將所有內容轉換爲浮動狀態然後處理最後使用cvNormalize和NORM_MINMAX在0到255之間進行歸一化以獲得正確的結果。

+0

好的我改變了代碼,但現在我得到一個未處理的異常錯誤 – user1979092

+0

正常化處理後。使用cvNormalize(輸入,輸入標準化,0,255,NORM_MINMAX,CV_8UC1) –

+0

所以在Cuda內核調用後規範化它? – user1979092