2016-02-24 118 views
0

我有一個程序在這裏用文本二值化圖像。在程序的一部分中,您可以啓用檢測文本輪廓的裁剪功能。但是,某些情況下,沒有檢測到所有文字輪廓檢測圖像中的文字輪廓

如果使用-d論證程序將文本繪製輪廓作爲輸出圖像中的矩形不裁剪

文本輪廓檢測邏輯(和矩形圖)是在detect_text_box功能

命令
/var/txtbin /var/in.png -d /var/out.png 

代碼
/* 
* Compile 
* # g++ txtbin.cpp -o txtbin `pkg-config opencv --cflags --libs` 
* 
* Get opencv version 
* # pkg-config --modversion opencv 
* 
* Run 
* # ./txtbin input.jpg output.png 
*/ 

#include "string" 
#include "fstream" 
#include "/var/bin/opencv/include/opencv2/opencv.hpp" 
//#include "/usr/include/opencv2/opencv.hpp" 
#include "/usr/include/boost/tuple/tuple.hpp" 

using namespace std; 
using namespace cv; 
using namespace boost; 

void CalcBlockMeanVariance(Mat& Img, Mat& Res, float blockSide=21, float contrast=0.01){ 
    /* 
    * blockSide: set greater for larger fonts in image and vice versa 
    * contrast: set smaller for lower contrast image 
    */ 

    Mat I; 
    Img.convertTo(I, CV_32FC1); 
    Res = Mat::zeros(Img.rows/blockSide, Img.cols/blockSide, CV_32FC1); 
    Mat inpaintmask; 
    Mat patch; 
    Mat smallImg; 
    Scalar m, s; 

    for(int i = 0; i < Img.rows - blockSide; i += blockSide){ 
     for(int j = 0; j < Img.cols - blockSide; j += blockSide){ 
      patch = I(Range(i, i + blockSide + 1), Range(j, j + blockSide + 1)); 
      meanStdDev(patch, m, s); 

      if(s[0] > contrast){ 
       Res.at<float>(i/blockSide, j/blockSide) = m[0]; 
      } 
      else{ 
       Res.at<float>(i/blockSide, j/blockSide) = 0; 
      } 
     } 
    } 

    resize(I, smallImg, Res.size()); 

    threshold(Res, inpaintmask, 0.02, 1.0, THRESH_BINARY); 

    Mat inpainted; 
    smallImg.convertTo(smallImg, CV_8UC1, 255); 

    inpaintmask.convertTo(inpaintmask, CV_8UC1); 
    inpaint(smallImg, inpaintmask, inpainted, 5, INPAINT_TELEA); 

    resize(inpainted, Res, Img.size()); 
    Res.convertTo(Res, CV_32FC1, 1.0/255.0); 
} 

tuple<int, int, int, int> detect_text_box(string input, Mat& res, bool draw_contours=false){ 
    Mat large = imread(input); 

    bool test_output = false; 

    int 
     top = large.rows, 
     bottom = 0, 
     left = large.cols, 
     right = 0; 

    int 
     rect_bottom, 
     rect_right; 

    Mat rgb; 
    // downsample and use it for processing 
    pyrDown(large, rgb); 
    pyrDown(rgb, rgb); 
    Mat small; 
    cvtColor(rgb, small, CV_BGR2GRAY); 
    // morphological gradient 
    Mat grad; 
    Mat morphKernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3)); 
    morphologyEx(small, grad, MORPH_GRADIENT, morphKernel); 
    // binarize 
    Mat bw; 
    threshold(grad, bw, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU); 
    // connect horizontally oriented regions 
    Mat connected; 
    morphKernel = getStructuringElement(MORPH_RECT, Size(9, 1)); 
    morphologyEx(bw, connected, MORPH_CLOSE, morphKernel); 
    // find contours 
    Mat mask = Mat::zeros(bw.size(), CV_8UC1); 
    vector<vector<Point> > contours; 
    vector<Vec4i> hierarchy; 
    findContours(connected, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0)); 

    Scalar color = Scalar(0, 255, 0); 
    Scalar color2 = Scalar(0, 0, 255); 
    int thickness = 2; 

    // filter contours 
    for(int idx = 0; idx >= 0; idx = hierarchy[idx][0]){ 
     Rect rect = boundingRect(contours[idx]); 
     Mat maskROI(mask, rect); 
     maskROI = Scalar(0, 0, 0); 
     // fill the contour 
     drawContours(mask, contours, idx, Scalar(255, 255, 255), CV_FILLED); 
     // ratio of non-zero pixels in the filled region 
     double r = (double)countNonZero(maskROI)/(rect.width * rect.height); 

     // assume at least 25% of the area is filled if it contains text 
     if (r > 0.25 && 
     (rect.height > 8 && rect.width > 8) // constraints on region size 
     // these two conditions alone are not very robust. better to use something 
     //like the number of significant peaks in a horizontal projection as a third condition 
     ){ 
      if(draw_contours){ 
       rectangle(res, Rect(rect.x * 4, rect.y * 4, rect.width * 4, rect.height * 4), color, thickness); 
      } 

      if(test_output){ 
       rectangle(rgb, rect, color, thickness); 
      } 

      if(rect.y < top){ 
       top = rect.y; 
      } 
      rect_bottom = rect.y + rect.height; 
      if(rect_bottom > bottom){ 
       bottom = rect_bottom; 
      } 
      if(rect.x < left){ 
       left = rect.x; 
      } 
      rect_right = rect.x + rect.width; 
      if(rect_right > right){ 
       right = rect_right; 
      } 
     } 
    } 

    if(draw_contours){ 
     rectangle(res, Point(left * 4, top * 4), Point(right * 4, bottom * 4), color2, thickness); 
    } 

    if(test_output){ 
     rectangle(rgb, Point(left, top), Point(right, bottom), color2, thickness); 
     imwrite(string("test_text_contours.jpg"), rgb); 
    } 

    return make_tuple(left * 4, top * 4, (right - left) * 4, (bottom - top) * 4); 
} 

int main(int argc, char* argv[]){ 
    string input; 
    string output = "output.png"; 

    int 
     width = 0, 
     height = 0, 
     blockside = 9; 

    bool 
     crop = false, 
     draw = false; 

    float margin = 0; 

    cout << "OpenCV version: " << CV_VERSION << endl; 

    // Return error if arguments are missing 
    if(argc < 3){ 
     cerr << "\nUsage: txtbin input [options] output\n\n" 
      "Options:\n" 
      "\t-w <number>   -- set max width (keeps aspect ratio)\n" 
      "\t-h <number>   -- set max height (keeps aspect ratio)\n" 
      "\t-c     -- crop text content contour\n" 
      "\t-m <number>   -- add margins (number in %)\n" 
      "\t-b <number>   -- set blockside\n" 
      "\t-d     -- draw text content contours (debugging)\n" << endl; 
     return 1; 
    } 

    // Parse arguments 
    for(int i = 1; i < argc; i++){ 
     if(i == 1){ 
      input = string(argv[i]); 

      // Return error if input file is invalid 
      ifstream stream(input.c_str()); 
      if(!stream.good()){ 
       cerr << "Error: Input file is invalid!" << endl; 
       return 1; 
      } 
     } 
     else if(string(argv[i]) == "-w"){ 
      width = atoi(argv[++i]); 
     } 
     else if(string(argv[i]) == "-h"){ 
      height = atoi(argv[++i]); 
     } 
     else if(string(argv[i]) == "-c"){ 
      crop = true; 
     } 
     else if(string(argv[i]) == "-m"){ 
      margin = atoi(argv[++i]); 
     } 
     else if(string(argv[i]) == "-b"){ 
      blockside = atoi(argv[++i]); 
     } 
     else if(string(argv[i]) == "-d"){ 
      draw = true; 
     } 
     else if(i == argc - 1){ 
      output = string(argv[i]); 
     } 
    } 

    Mat Img = imread(input, CV_LOAD_IMAGE_GRAYSCALE); 
    Mat res; 
    Img.convertTo(Img, CV_32FC1, 1.0/255.0); 
    CalcBlockMeanVariance(Img, res, blockside); 
    res = 1.0 - res; 
    res = Img + res; 
    threshold(res, res, 0.85, 1, THRESH_BINARY); 

    int 
     txt_x, 
     txt_y, 
     txt_width, 
     txt_height; 

    if(crop || draw){ 
     tie(txt_x, txt_y, txt_width, txt_height) = detect_text_box(input, res, draw); 
    } 

    if(crop){ 
     //res = res(Rect(txt_x, txt_y, txt_width, txt_height)).clone(); 
     res = res(Rect(txt_x, txt_y, txt_width, txt_height)); 
    } 

    if(margin){ 
     int border = res.cols * margin/100; 
     copyMakeBorder(res, res, border, border, border, border, BORDER_CONSTANT, Scalar(255, 255, 255)); 
    } 

    float 
     width_input = res.cols, 
     height_input = res.rows; 

    bool resized = false; 

    // Downscale image 
    if(width > 0 && width_input > width){ 
     float scale = width_input/width; 
     width_input /= scale; 
     height_input /= scale; 
     resized = true; 
    } 
    if(height > 0 && height_input > height){ 
     float scale = height_input/height; 
     width_input /= scale; 
     height_input /= scale; 
     resized = true; 
    } 
    if(resized){ 
     resize(res, res, Size(round(width_input), round(height_input))); 
    } 

    imwrite(output, res * 255); 

    return 0; 
} 

圖像1輸入

enter image description here

圖像1個輸出

enter image description here

圖像2輸入

enter image description here

圖像2輸出

enter image description here

更新

我把你的代碼的類內,但得到一個錯誤

的類名爲XYcut並在下面的代碼我得到一個編譯錯誤

int n_labels = partition(filteredRects, labels, [max_distance2](const cv::Rect& lhs, const cv::Rect& rhs){ 
    if(XYcut::ed2(lhs.tl(), cv::Point(rhs.br().x, rhs.tl().y)) < max_distance2){ 
     return true; 
    } 
    if(XYcut::ed2(rhs.tl(), cv::Point(lhs.br().x, lhs.tl().y)) < max_distance2){ 
     return true; 
    } 
    return false; 
}); 

錯誤

error: ‘this’ was not captured for this lambda function 
    if(XYcut::ed2(lhs.tl(), cv::Point(rhs.br().x, rhs.tl().y)) < max_distance2){ 

如何引用ed2方法XYcut類..

類和方法

這是方法

回答

4

我只是想提出一個不同的方法。它基於XY-Cut算法,並且由於您的文本是軸對齊的,所以它工作得很好。


您的輸入圖像,計算XY-剪切,並得到邊界框:

enter image description here

你看,你確定正確的字符組,但不是全部的話。所以,我們首先除去很小的矩形,這只是噪音:

enter image description here

然後我們組的矩形,是非常接近對方。您可以使用cv::partition與這個適當的謂詞:

enter image description here

現在你必須爲每個字的邊界框。你最終可以得到每一行的邊界框。關於你的第二圖像,你會得到:

enter image description here

這是我使用的代碼:

#include <opencv2\opencv.hpp> 
#include <vector> 
using namespace std; 
using namespace cv; 

Mat3b dbg; 

vector<Rect> XYCut_projH(const Mat1b& src, Rect roi) 
{ 
    rectangle(dbg, roi, Scalar(255, 0, 0)); 

    Mat1b projH; 
    reduce(src(roi), projH, 1, CV_REDUCE_MAX); 

    vector<Rect> rects; 

    bool bOut = true; 
    vector<int> coords; 

    for (int i = 0; i < projH.rows; ++i) 
    { 
     if (bOut && projH(i) > 0) 
     { 
      coords.push_back(i); 
      bOut = false; 
     } 
     else if (!bOut && projH(i) == 0) 
     { 
      coords.push_back(i); 
      bOut = true; 
     } 
    } 

    if (!bOut) 
    { 
     coords.push_back(projH.rows); 
    } 


    for (int i = 0; i < coords.size() - 1; i += 2) 
    { 
     Rect r(0, coords[i], src.cols, coords[i + 1] - coords[i]); 
     r = (r + roi.tl()) & roi; 
     rects.push_back(r); 

     rectangle(dbg, r, Scalar(0, 255, 0)); 
    } 

    if ((rects.size() == 1) && (rects[0] == roi)) 
    { 
     return vector<Rect>(); 
    } 

    return rects; 
} 

vector<Rect> XYCut_projV(const Mat1b& src, Rect roi) 
{ 
    rectangle(dbg, roi, Scalar(255, 0, 0)); 

    Mat1b projV; 
    reduce(src(roi), projV, 0, CV_REDUCE_MAX); 

    vector<Rect> rects; 

    bool bOut = true; 
    vector<int> coords; 

    for (int i = 0; i < projV.cols; ++i) 
    { 
     if (bOut && projV(i) > 0) 
     { 
      coords.push_back(i); 
      bOut = false; 
     } 
     else if (!bOut && projV(i) == 0) 
     { 
      coords.push_back(i); 
      bOut = true; 
     } 
    } 

    if (!bOut) 
    { 
     coords.push_back(projV.cols); 
    } 

    for (int i = 0; i < coords.size() - 1; i += 2) 
    { 
     Rect r(coords[i], 0, coords[i + 1] - coords[i], src.rows); 
     r = (r + roi.tl()) & roi; 
     rects.push_back(r); 

     rectangle(dbg, r, Scalar(0, 255, 0)); 
    } 

    if ((rects.size() == 1) && (rects[0] == roi)) 
    { 
     return vector<Rect>(); 
    } 

    return rects; 
} 

void XYCut_step(const Mat1b& src, Rect roi, vector<Rect>& rects, bool bAlternate) 
{ 
    vector<Rect> step; 
    if (bAlternate) 
    { 
     step = XYCut_projH(src, roi); 

     if (step.empty()) 
     { 
      rects.push_back(roi); 
      return; 
     } 
    } 
    else 
    { 
     step = XYCut_projV(src, roi); 

     if (step.empty()) 
     { 
      rects.push_back(roi); 
      return; 
     } 
    } 

    for (int i = 0; i < step.size(); ++i) 
    { 
     XYCut_step(src, step[i], rects, !bAlternate); 
    } 
} 

void XYCut(const Mat1b& src, vector<Rect>& rects) 
{ 
    bool bAlternate = true; 
    Rect roi(0, 0, src.cols, src.rows); 

    XYCut_step(src, roi, rects, bAlternate); 
} 

int ed2(const Point& lhs, const Point& rhs) 
{ 
    return (lhs.x - rhs.x)*(lhs.x - rhs.x) + (lhs.y - rhs.y)*(lhs.y - rhs.y); 
} 

int main() 
{ 
    // Load image 
    Mat1b img = imread("path_to_image", IMREAD_GRAYSCALE); 
    cvtColor(img, dbg, COLOR_GRAY2BGR); 

    // invert image, if needed 
    img = ~img; 

    // Apply XY Cut 
    vector<Rect> rects; 
    XYCut(img, rects); 

    // Show XY results 
    Mat3b xyres; 
    cvtColor(img, xyres, COLOR_GRAY2BGR); 
    for (int i = 0; i < rects.size(); ++i) 
    { 
     rectangle(xyres, rects[i], Scalar(0, 0, 255), 2); 
    } 

    //imshow("XY-Cut Result", xyres); 
    //waitKey(1); 

    // Remove small bounding boxes (noise) 
    int min_area = 10; 
    vector<Rect> filteredRects; 
    for (const auto& r : rects) 
    { 
     if (r.area() > min_area) 
     { 
      filteredRects.push_back(r); 
     } 
    } 

    // Show Filtered results 
    Mat3b filtres; 
    cvtColor(img, filtres, COLOR_GRAY2BGR); 
    for (int i = 0; i < filteredRects.size(); ++i) 
    { 
     rectangle(filtres, filteredRects[i], Scalar(255, 0, 0), 2); 
    } 

    //imshow("Filtered Result", filtres); 
    //waitKey(1); 

    // Group near rectangles 
    int max_distance = 10; 

    vector<int> labels; 
    int max_distance2 = max_distance*max_distance; 
    int n_labels = partition(filteredRects, labels, [max_distance2](const Rect& lhs, const Rect& rhs) 
    { 
     if (ed2(lhs.tl(), Point(rhs.br().x, rhs.tl().y)) < max_distance2) { return true; } 
     if (ed2(rhs.tl(), Point(lhs.br().x, lhs.tl().y)) < max_distance2) { return true; } 
     return false; 
    }); 

    // Make a bounding box for rects grouped together 
    vector<vector<Point>> pts(n_labels); 
    for (int i = 0; i < filteredRects.size(); ++i) 
    { 
     pts[labels[i]].push_back(filteredRects[i].tl()); 
     pts[labels[i]].push_back(filteredRects[i].br()); 
    } 

    // Show Grouped results 
    vector<Rect> groupedRects(n_labels); 
    for (int i = 0; i < pts.size(); ++i) 
    { 
     groupedRects[i] = boundingRect(pts[i]); 
    } 


    // Show Grouped results 
    Mat3b groupres; 
    cvtColor(img, groupres, COLOR_GRAY2BGR); 
    for (int i = 0; i < groupedRects.size(); ++i) 
    { 
     rectangle(groupres, groupedRects[i], Scalar(0, 255, 0), 2); 
    } 


    //imshow("Grouped Result", groupres); 
    //waitKey(1); 




    return 0; 
} 
+0

當我嘗試編譯代碼我得到這個'#G ++ org.cpp -o ./test/test'pkg-config opencv --cflags --libs' org.cpp:In function'int main()': org.cpp:175:22:error:ISO C++禁止聲明'r'沒有類型[-fpermissive] (const auto&r:Rects) ^ org.cpp:175:26:error:range-based'for循環不允許在C++ 98模式 for(const auto&r:rects) ^' – clarkk

+0

您需要啓用C++ 11功能:「-std = C++ 11」 – Miki

+0

有沒有一種快速合併所有的矩形到一個矩形? – clarkk