2014-05-23 55 views
0

相比,OpenCV GPU對象檢測速度慢並且檢測次數更少下面是來自OpenCV的對象檢測代碼的CPU和GPU實現。相比於CPU版本與CPU版本

2)檢測速度慢相比,代碼的CPU版本相同的分類

1)GPU實現緩慢

任何想法,爲什麼會這樣? CODE

#include <windows.h> 
#include <mmsystem.h> 
#pragma comment(lib, "winmm.lib") 

#include <opencv2/objdetect/objdetect.hpp> 
#include <opencv2/highgui/highgui.hpp> 
#include <opencv2/imgproc/imgproc.hpp> 

#include <iostream> 
#include <stdio.h> 

using namespace std; 
using namespace cv; 

int main(int argc, const char** argv) 
{ 
    //create the cascade classifier object used for the face detection 
    CascadeClassifier face_cascade; 
    //use the haarcascade_frontalface_alt.xml library 
    face_cascade.load("C:/cascades/haarcascade_frontalface_alt_tree.xml"); 

    //setup video capture device and link it to the first capture device 
    VideoCapture captureDevice; 
    captureDevice.open(3); 

    //setup image files used in the capture process 
    Mat captureFrame; 
    Mat grayscaleFrame; 

    //create a window to present the results 
    namedWindow("outputCapture", 1); 

    //create a loop to capture and find faces 
    while(true) 
    { 
     //capture a new image frame 
     captureDevice>>captureFrame; 

     //convert captured image to gray scale and equalize 
     cvtColor(captureFrame, grayscaleFrame, CV_BGR2GRAY); 
     equalizeHist(grayscaleFrame, grayscaleFrame); 

    //create a vector array to store the face found 
    std::vector<Rect> faces; 

    //find faces and store them in the vector array 
    face_cascade.detectMultiScale(grayscaleFrame, faces); 

    //draw a rectangle for all found faces in the vector array on the original image 
    for(int i = 0; i < (int)faces.size(); i++) 
    { 
     Scalar color(0, 0, 255); 

     Point pt1(faces[i].x + faces[i].width, faces[i].y + faces[i].height); 
     Point pt2(faces[i].x, faces[i].y); 

     rectangle(captureFrame, pt1, pt2, color, 1, 8, 0); 

     string text = "Adam yuzi"; 
     int fontFace = FONT_HERSHEY_TRIPLEX; 
     double fontScale = 1; 
     int thickness = 2; 

     putText(captureFrame, text, pt2, fontFace, fontScale, color, thickness); 
     //PlaySound(TEXT("C:/cascades/adam.wav"), NULL, SND_FILENAME | SND_SYNC); 
     // the correct code 
     //Sleep(1000); 
     //break; 
     //cout<<char(7); 
     } 
     //print the output 
     imshow("outputCapture", captureFrame); 

     //pause for 33ms 
     waitKey(33); 
    } 
    return 0; 
} 

CPU版本和GPU版本實現is provided in this sample ink GPU版本代碼的

// WARNING: this sample is under construction! Use it on your own risk. 
#if defined _MSC_VER && _MSC_VER >= 1400 
#pragma warning(disable : 4100) 
#endif 


#include <iostream> 
#include <iomanip> 
#include "opencv2/contrib/contrib.hpp" 
#include "opencv2/objdetect/objdetect.hpp" 
#include "opencv2/highgui/highgui.hpp" 
#include "opencv2/imgproc/imgproc.hpp" 
#include "opencv2/cuda.hpp" 
#include "opencv2/cudaimgproc.hpp" 
#include "opencv2/cudawarping.hpp" 

using namespace std; 
using namespace cv; 
using namespace cv::cuda; 

static void help() 
{ 
    cout << "Usage: ./cascadeclassifier_gpu \n\t--cascade <cascade_file>\n\t(<image>|-- video <video>|--camera <camera_id>)\n" 
      "Using OpenCV version " << CV_VERSION << endl << endl; 
} 


static void convertAndResize(const Mat& src, Mat& gray, Mat& resized, double scale) 
{ 
    if (src.channels() == 3) 
    { 
     cv::cvtColor(src, gray, COLOR_BGR2GRAY); 
    } 
    else 
    { 
     gray = src; 
    } 

    Size sz(cvRound(gray.cols * scale), cvRound(gray.rows * scale)); 

    if (scale != 1) 
    { 
     cv::resize(gray, resized, sz); 
    } 
    else 
    { 
     resized = gray; 
    } 
} 

static void convertAndResize(const GpuMat& src, GpuMat& gray, GpuMat& resized, double  scale) 
{ 
    if (src.channels() == 3) 
    { 
     cv::cuda::cvtColor(src, gray, COLOR_BGR2GRAY); 
    } 
    else 
    { 
     gray = src; 
    } 

    Size sz(cvRound(gray.cols * scale), cvRound(gray.rows * scale)); 

    if (scale != 1) 
    { 
     cv::cuda::resize(gray, resized, sz); 
    } 
    else 
    { 
     resized = gray; 
    } 
} 
static void matPrint(Mat &img, int lineOffsY, Scalar fontColor, const string &ss) 
{ 
    int fontFace = FONT_HERSHEY_DUPLEX; 
    double fontScale = 0.8; 
    int fontThickness = 2; 
    Size fontSize = cv::getTextSize("T[]", fontFace, fontScale, fontThickness, 0); 

    Point org; 
    org.x = 1; 
    org.y = 3 * fontSize.height * (lineOffsY + 1)/2; 
    putText(img, ss, org, fontFace, fontScale, Scalar(0,0,0), 5*fontThickness/2, 16); 
    putText(img, ss, org, fontFace, fontScale, fontColor, fontThickness, 16); 
} 


static void displayState(Mat &canvas, bool bHelp, bool bGpu, bool bLargestFace, bool  bFilter, double fps) 
{ 
    Scalar fontColorRed = Scalar(255,0,0); 
    Scalar fontColorNV = Scalar(118,185,0); 

    ostringstream ss; 
    ss << "FPS = " << setprecision(1) << fixed << fps; 
    matPrint(canvas, 0, fontColorRed, ss.str()); 
    ss.str(""); 
    ss << "[" << canvas.cols << "x" << canvas.rows << "], " << 
     (bGpu ? "GPU, " : "CPU, ") << 
     (bLargestFace ? "OneFace, " : "MultiFace, ") << 
     (bFilter ? "Filter:ON" : "Filter:OFF"); 
    matPrint(canvas, 1, fontColorRed, ss.str()); 

    // by Anatoly. MacOS fix. ostringstream(const string&) is a private 
    // matPrint(canvas, 2, fontColorNV, ostringstream("Space - switch GPU/CPU")); 
    if (bHelp) 
    { 
     matPrint(canvas, 2, fontColorNV, "Space - switch GPU/CPU"); 
     matPrint(canvas, 3, fontColorNV, "M - switch OneFace/MultiFace"); 
     matPrint(canvas, 4, fontColorNV, "F - toggle rectangles Filter"); 
     matPrint(canvas, 5, fontColorNV, "H - toggle hotkeys help"); 
     matPrint(canvas, 6, fontColorNV, "1/Q - increase/decrease scale"); 
    } 
    else 
    { 
     matPrint(canvas, 2, fontColorNV, "H - toggle hotkeys help"); 
    } 
} 


int main(int argc, const char *argv[]) 
{ 
    if (argc == 1) 
    { 
     help(); 
     return -1; 
    } 

    if (getCudaEnabledDeviceCount() == 0) 
    { 
     return cerr << "No GPU found or the library is compiled without CUDA support"  << endl, -1; 
    } 

    cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice()); 

    string cascadeName; 
    string inputName; 
    bool isInputImage = false; 
    bool isInputVideo = false; 
    bool isInputCamera = false; 

    for (int i = 1; i < argc; ++i) 
    { 
     if (string(argv[i]) == "--cascade") 
      cascadeName = argv[++i]; 
     else if (string(argv[i]) == "--video") 
     { 
      inputName = argv[++i]; 
      isInputVideo = true; 
     } 
     else if (string(argv[i]) == "--camera") 
     { 
      inputName = argv[++i]; 
      isInputCamera = true; 
     } 
     else if (string(argv[i]) == "--help") 
     { 
      help(); 
      return -1; 
     }  
     else if (!isInputImage) 
     { 
      inputName = argv[i]; 
      isInputImage = true; 
     } 
     else 
     { 
      cout << "Unknown key: " << argv[i] << endl; 
      return -1; 
     } 
    } 

    CascadeClassifier_CUDA cascade_gpu; 
    if (!cascade_gpu.load(cascadeName)){ 
     return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName <<  "\"" << endl, help(), -1; 
    } 

    CascadeClassifier cascade_cpu; 
    if (!cascade_cpu.load(cascadeName)) { 
     return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName <<  "\"" << endl, help(), -1; 
    } 

    VideoCapture capture; 
    Mat image; 

    if (isInputImage) { 
     image = imread(inputName); 
     CV_Assert(!image.empty()); 
     } 
    else if (isInputVideo) { 
     capture.open(inputName); 
     CV_Assert(capture.isOpened()); 
    } 
else { 
     capture.open(atoi(inputName.c_str())); 
     CV_Assert(capture.isOpened()); 
    } 

    namedWindow("result", 1); 

    Mat frame, frame_cpu, gray_cpu, resized_cpu, faces_downloaded, frameDisp; 
    vector<Rect> facesBuf_cpu; 

    GpuMat frame_gpu, gray_gpu, resized_gpu, facesBuf_gpu; 

/* parameters */ 
    bool useGPU = true; 
    double scaleFactor = 1.0; 
    bool findLargestObject = false; 
    bool filterRects = true; 
    bool helpScreen = false; 

    int detections_num; 
    for (;;) { 
     if (isInputCamera || isInputVideo)  { 
      capture >> frame; 
      if (frame.empty())   { 
       break; 
      } 
     } 

     (image.empty() ? frame : image).copyTo(frame_cpu); 
     frame_gpu.upload(image.empty() ? frame : image); 

     convertAndResize(frame_gpu, gray_gpu, resized_gpu, scaleFactor); 
     convertAndResize(frame_cpu, gray_cpu, resized_cpu, scaleFactor); 

     TickMeter tm; 
     tm.start(); 

    if (useGPU)  { 
      //cascade_gpu.visualizeInPlace = true; 
      cascade_gpu.findLargestObject = findLargestObject; 

      detections_num = cascade_gpu.detectMultiScale(resized_gpu, facesBuf_gpu,  1.2, 
                  (filterRects ||  findLargestObject) ? 4 : 0); 
      facesBuf_gpu.colRange(0, detections_num).download(faces_downloaded); 
     } 
     else  { 
      Size minSize = cascade_gpu.getClassifierSize(); 
      cascade_cpu.detectMultiScale(resized_cpu, facesBuf_cpu, 1.2, 
             (filterRects || findLargestObject) ? 4 : 0, 
             (findLargestObject ?  CASCADE_FIND_BIGGEST_OBJECT : 0) 
              | CASCADE_SCALE_IMAGE, 
             minSize); 
      detections_num = (int)facesBuf_cpu.size(); 
     } 

     if (!useGPU && detections_num)  { 
      for (int i = 0; i < detections_num; ++i)   { 
       rectangle(resized_cpu, facesBuf_cpu[i], Scalar(255)); 
      } 
     } 

     if (useGPU)  { 
      resized_gpu.download(resized_cpu); 
      for (int i = 0; i < detections_num; ++i)  { 
       rectangle(resized_cpu, faces_downloaded.ptr<cv::Rect>()[i],  Scalar(255)); 
      } 
     } 

      tm.stop(); 
     double detectionTime = tm.getTimeMilli(); 
     double fps = 1000/detectionTime; 
     //print detections to console 
     cout << setfill(' ') << setprecision(2); 
     cout << setw(6) << fixed << fps << " FPS, " << detections_num << " det"; 
    if ((filterRects || findLargestObject) && detections_num > 0)  { 
      Rect *faceRects = useGPU ? faces_downloaded.ptr<Rect>() : &facesBuf_cpu[0]; 
      for (int i = 0; i < min(detections_num, 2); ++i)   { 
       cout << ", [" << setw(4) << faceRects[i].x 
        << ", " << setw(4) << faceRects[i].y 
         << ", " << setw(4) << faceRects[i].width 
         << ", " << setw(4) << faceRects[i].height << "]"; 
        } 
      } 
      cout << endl; 

      cv::cvtColor(resized_cpu, frameDisp, COLOR_GRAY2BGR); 
      displayState(frameDisp, helpScreen, useGPU, findLargestObject, filterRects,  fps); 
      imshow("result", frameDisp); 

      char key = (char)waitKey(5); 
      if (key == 27)  { 
       break; 
      }  
      switch (key)   { 
      case ' ': 
       useGPU = !useGPU; 
       break; 
      case 'm': 
      case 'M': 
       findLargestObject = !findLargestObject; 
       break; 
      case 'f': 
       case 'F': 
       filterRects = !filterRects; 
       break; 
      case '1': 
       scaleFactor *= 1.05; 
       break; 
       case 'q': 
      case 'Q': 
       scaleFactor /= 1.05; 
       break; 
      case 'h': 
      case 'H': 
       helpScreen = !helpScreen; 
       break; 
      } 
     } 
     return 0; 
    } 

注意:我沒有寫這個代碼,我把CPU version fromGPU version from here。我也張貼我的觀察in

回答

1

試試這個代碼,它工作正常,我:

#define _CRT_SECURE_NO_DEPRECATE 
#include <stdio.h> 
#include <direct.h> 
#include "fstream" 
#include "iostream" 
#include <vector> 
#include "opencv2/core/core.hpp" 
#include "opencv2/core/gpumat.hpp" 
#include "opencv2/core/opengl_interop.hpp" 
#include "opencv2/gpu/gpu.hpp" 
#include "opencv2/ml/ml.hpp" 
#include "opencv2/highgui/highgui.hpp" 
#include "opencv2/contrib/contrib.hpp" 
#include "opencv2/video/tracking.hpp" 
#include "opencv2/imgproc/imgproc.hpp" 

using namespace std; 
using namespace cv; 
using namespace cv::gpu; 

cv::gpu::CascadeClassifier_GPU cascade_gpu; 

//------------------------------------------------------------------------------------------------------------- 
vector<Rect> detect_faces(Mat& image) 
{ 
     vector<Rect> res; 
     bool findLargestObject = true; 
     bool filterRects = true; 
     int detections_num; 
     Mat faces_downloaded; 
     Mat im(image.size(),CV_8UC1); 
     GpuMat facesBuf_gpu; 
     if(image.channels()==3) 
     { 
       cvtColor(image,im,CV_BGR2GRAY); 
     } 
     else 
     { 
       image.copyTo(im); 
     } 
     GpuMat gray_gpu(im); 

     cascade_gpu.visualizeInPlace = false; 
     cascade_gpu.findLargestObject = findLargestObject; 
     detections_num = cascade_gpu.detectMultiScale(gray_gpu, facesBuf_gpu, 1.2,(filterRects || findLargestObject) ? 4 : 0,Size(image.cols/4,image.rows/4)); 


     if(detections_num==0){return res;} 

     facesBuf_gpu.colRange(0, detections_num).download(faces_downloaded); 
     Rect *faceRects = faces_downloaded.ptr<Rect>(); 

     for(int i=0;i<detections_num;i++) 
     { 
       res.push_back(faceRects[i]); 
     } 
     gray_gpu.release(); 
     facesBuf_gpu.release(); 
     return res; 
} 
//----------------------------------------------------------------------------------------------------------------- 

//---------------------------------------------------------------------- 
// MAIN 
//---------------------------------------------------------------------- 
int main(int argc, char * argv[]) 
{ 
     cv::gpu::printShortCudaDeviceInfo(cv::gpu::getDevice()); 
     cascade_gpu.load("haarcascade_frontalface_alt2.xml"); 
     Mat frame,img; 
     namedWindow("frame"); 
     VideoCapture capture(0); 
     capture >> frame; 
     vector<Rect> rects; 
     if (capture.isOpened()) 
     { 
       while(waitKey(20)!=27) // Exit by escape press 
       { 
         capture >> frame; 
         cvtColor(frame,img,CV_BGR2GRAY); 
         rects=detect_faces(img); 
         if(rects.size()>0) 
         { 
           cv::rectangle(frame,rects[0],CV_RGB(255,0,0)); 
         } 
         imshow("frame",frame); 
       } 
     } 

     return 0; 
} 
+0

感謝安德烈。我調整了detectMultiScale(...)函數的參數。 – Tariq

+0

不客氣。 這是從我的一個項目中剪下來的,還有一個需要完成的任務和圖像參數。這就是爲什麼它不適用於你的項目中的默認值。 –

+0

對我來說,cascade_gpu.load調用永遠不會完成。程序似乎在運行和咀嚼內存,load()永遠不會返回。任何想法爲什麼會發生?我使用相同的XML文件 – lzt