2016-06-14 31 views
1

我有一些代碼來創建一個HSV面具,看起來大約是這樣(從this Japanese-language page拍攝):有效的方式來產生一個三通道LUT掩蓋

void colorExtraction(const cv::gpu::GpuMat &src, 
        cv::gpu::GpuMat  *dst) 
{ 
    cv::Mat lut(256, 1, CV_8UC3); 
    for (int i = 0; i < 256; i++) 
    { 
     cv::Vec3b thisHSV; 
     thisHSV[0] = inHRange(i) ? 255 : 0; 
     thisHSV[1] = inSRange(i) ? 255 : 0; 
     thisHSV[2] = inVRange(i) ? 255 : 0; 

     lut.at<cv::Vec3b>(i) = thisHSV; 
    } 

    /* apply LUT to input image */ 
    cv::gpu::GpuMat extracted(src.size(), CV_8UC3); 
    cv::gpu::LUT(src, lut, extracted); 

    /* divide image into each channel */ 
    std::vector<cv::gpu::GpuMat> channels; 
    cv::gpu::split(extracted, channels); 

    /* create mask */ 
    cv::gpu::bitwise_and(channels[0], channels[1], *dst); 
    cv::gpu::bitwise_and(*dst, channels[2], *dst); 
} 

這工作,但儘管操作是主要集中在GPU,它比我想要的要慢,可能是由於一些中間值。我懷疑可能有一種很好的方法可以將這一切摺疊成一兩個電話,但我不知道它會是什麼。編寫我自己的CUDA內核當然是一種選擇,但我想檢查一下,看看我是否不需要重新發明輪子。

回答

1

要自行答案,我結束了我的編碼自己的內核在一個單一的呼叫做LUT()split()和兩個bitwise_and() S:

__global__ void colorExtractionKernel(cv::gpu::PtrStepSz<uchar3> const src, 
             cv::gpu::PtrStepSz<unsigned char> dst, 
             cv::gpu::PtrStepSz<uchar3> const lut) 
{ 
    unsigned int row = blockIdx.y * blockDim.y + threadIdx.y; 
    unsigned int col = blockIdx.x * blockDim.x + threadIdx.x; 

    // Extract post-LUT hsv flags 
    uchar3 srcHSV = src(row, col); 
    unsigned char h = lut(srcHSV.x, 0).x; 
    unsigned char s = lut(srcHSV.y, 0).y; 
    unsigned char v = lut(srcHSV.z, 0).z; 

    // Result pixel is the AND of the pixels 
    dst(row, col) = (h & s & v); 
} 

void colorExtraction_cuda(const cv::gpu::GpuMat &src, // input HSV image 
          cv::gpu::GpuMat  &dst, // specified color extracted binarized image 
          cv::Mat const   &lut) // Look-up thresholds 
{ 
    cudaStream_t thisStream; 
    gpuErrChk(cudaStreamCreate(&thisStream)); 

    dim3 Threads(32, 16); 
    dim3 Blocks((src.cols + Threads.x - 1)/Threads.x, (src.rows + Threads.y - 1)/Threads.y); 

    cv::gpu::GpuMat gpuLut(lut); 

    colorExtractionKernel<<<Blocks, Threads, 0, thisStream>>>(src, dst, gpuLut); 

    gpuErrChk(cudaStreamSynchronize(thisStream)); 
    gpuErrChk(cudaStreamDestroy(thisStream)); 
    gpuErrChk(cudaGetLastError()); 
} 
相關問題