2015-12-21 33 views
3

我需要二值化文本影像。它工作得很好,但在某些情況下,輸出爲空(白圖像)文本OpenCV的雙穩態圖像

代碼

/* 
* Compile 
* # g++ txtbin.cpp -o txtbin `pkg-config opencv --cflags --libs` 
* 
* Run 
* # ./txtbin input.jpg output.png 
*/ 

#include "string" 
#include "fstream" 
#include "/usr/include/opencv2/opencv.hpp" 
#include "/usr/include/boost/tuple/tuple.hpp" 

using namespace std; 
using namespace cv; 
using namespace boost; 

void CalcBlockMeanVariance(Mat& Img, Mat& Res, float blockSide=21, float contrast=0.01){ 
    /* 
    * blockSide: set greater for larger fonts in image 
    * contrast: set smaller for lower contrast image 
    */ 

    Mat I; 
    Img.convertTo(I, CV_32FC1); 
    Res = Mat::zeros(Img.rows/blockSide, Img.cols/blockSide, CV_32FC1); 
    Mat inpaintmask; 
    Mat patch; 
    Mat smallImg; 
    Scalar m, s; 

    for(int i = 0; i < Img.rows - blockSide; i += blockSide){ 
     for(int j = 0; j < Img.cols - blockSide; j += blockSide){ 
      patch = I(Range(i, i + blockSide + 1), Range(j, j + blockSide + 1)); 
      meanStdDev(patch, m, s); 

      if(s[0] > contrast){ 
       Res.at<float>(i/blockSide, j/blockSide) = m[0]; 
      } 
      else{ 
       Res.at<float>(i/blockSide, j/blockSide) = 0; 
      } 
     } 
    } 

    resize(I, smallImg, Res.size()); 

    threshold(Res, inpaintmask, 0.02, 1.0, THRESH_BINARY); 

    Mat inpainted; 
    smallImg.convertTo(smallImg, CV_8UC1, 255); 

    inpaintmask.convertTo(inpaintmask, CV_8UC1); 
    inpaint(smallImg, inpaintmask, inpainted, 5, INPAINT_TELEA); 

    resize(inpainted, Res, Img.size()); 
    Res.convertTo(Res, CV_32FC1, 1.0/255.0); 
} 

tuple<int, int, int, int> detect_text_box(string input, Mat& res, bool draw_contours=false){ 
    Mat large = imread(input); 

    bool test_output = false; 

    int 
     top = large.rows, 
     bottom = 0, 
     left = large.cols, 
     right = 0; 

    int 
     rect_bottom, 
     rect_right; 

    Mat rgb; 
    // downsample and use it for processing 
    pyrDown(large, rgb); 
    Mat small; 
    cvtColor(rgb, small, CV_BGR2GRAY); 
    // morphological gradient 
    Mat grad; 
    Mat morphKernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3)); 
    morphologyEx(small, grad, MORPH_GRADIENT, morphKernel); 
    // binarize 
    Mat bw; 
    threshold(grad, bw, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU); 
    // connect horizontally oriented regions 
    Mat connected; 
    morphKernel = getStructuringElement(MORPH_RECT, Size(9, 1)); 
    morphologyEx(bw, connected, MORPH_CLOSE, morphKernel); 
    // find contours 
    Mat mask = Mat::zeros(bw.size(), CV_8UC1); 
    vector<vector<Point> > contours; 
    vector<Vec4i> hierarchy; 
    findContours(connected, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0)); 
    // filter contours 
    for(int idx = 0; idx >= 0; idx = hierarchy[idx][0]){ 
     Rect rect = boundingRect(contours[idx]); 
     Mat maskROI(mask, rect); 
     maskROI = Scalar(0, 0, 0); 
     // fill the contour 
     drawContours(mask, contours, idx, Scalar(255, 255, 255), CV_FILLED); 
     // ratio of non-zero pixels in the filled region 
     double r = (double)countNonZero(maskROI)/(rect.width * rect.height); 

     // assume at least 45% of the area is filled if it contains text 
     if (r > 0.45 && 
     (rect.height > 8 && rect.width > 8) // constraints on region size 
     // these two conditions alone are not very robust. better to use something 
     //like the number of significant peaks in a horizontal projection as a third condition 
     ){ 
      if(draw_contours){ 
       rectangle(res, Rect(rect.x * 2, rect.y * 2, rect.width * 2, rect.height * 2), Scalar(0, 255, 0), 2); 
      } 

      if(test_output){ 
       rectangle(rgb, rect, Scalar(0, 255, 0), 2); 
      } 

      if(rect.y < top){ 
       top = rect.y; 
      } 
      rect_bottom = rect.y + rect.height; 
      if(rect_bottom > bottom){ 
       bottom = rect_bottom; 
      } 
      if(rect.x < left){ 
       left = rect.x; 
      } 
      rect_right = rect.x + rect.width; 
      if(rect_right > right){ 
       right = rect_right; 
      } 
     } 
    } 

    if(draw_contours){ 
     rectangle(res, Point(left * 2, top * 2), Point(right * 2, bottom * 2), Scalar(0, 0, 255), 2); 
    } 

    if(test_output){ 
     rectangle(rgb, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 2); 
     imwrite(string("test_text_contours.jpg"), rgb); 
    } 

    return make_tuple(left * 2, top * 2, (right - left) * 2, (bottom - top) * 2); 
} 

int main(int argc, char* argv[]){ 
    string input; 
    string output = "output.png"; 

    int 
     width = 0, 
     height = 0; 

    bool 
     crop = false, 
     draw = false; 

    float margin = 0; 

    // Return error if arguments are missing 
    if(argc < 3){ 
     cerr << "\nUsage: txtbin input [options] output\n\n" 
      "Options:\n" 
      "\t-w <number>   -- set max width (keeps aspect ratio)\n" 
      "\t-h <number>   -- set max height (keeps aspect ratio)\n" 
      "\t-c     -- crop text content contour\n" 
      "\t-m <number>   -- add margins (number in %)\n" 
      "\t-d     -- draw text content contours (debugging)\n" << endl; 
     return 1; 
    } 

    // Parse arguments 
    for(int i = 1; i < argc; i++){ 
     if(i == 1){ 
      input = string(argv[i]); 

      // Return error if input file is invalid 
      ifstream stream(input.c_str()); 
      if(!stream.good()){ 
       cerr << "Error: Input file is invalid!" << endl; 
       return 1; 
      } 
     } 
     else if(string(argv[i]) == "-w"){ 
      width = atoi(argv[++i]); 
     } 
     else if(string(argv[i]) == "-h"){ 
      height = atoi(argv[++i]); 
     } 
     else if(string(argv[i]) == "-c"){ 
      crop = true; 
     } 
     else if(string(argv[i]) == "-m"){ 
      margin = atoi(argv[++i]); 
     } 
     else if(string(argv[i]) == "-d"){ 
      draw = true; 
     } 
     else if(i == argc - 1){ 
      output = string(argv[i]); 
     } 
    } 

    Mat Img = imread(input, CV_LOAD_IMAGE_GRAYSCALE); 
    Mat res; 
    Img.convertTo(Img, CV_32FC1, 1.0/255.0); 
    CalcBlockMeanVariance(Img, res); 
    res = 1.0 - res; 
    res = Img + res; 
    threshold(res, res, 0.85, 1, THRESH_BINARY); 

    int 
     txt_x, 
     txt_y, 
     txt_width, 
     txt_height; 

    if(crop || draw){ 
     tie(txt_x, txt_y, txt_width, txt_height) = detect_text_box(input, res, draw); 
    } 

    if(crop){ 
     //res = res(Rect(txt_x, txt_y, txt_width, txt_height)).clone(); 
     res = res(Rect(txt_x, txt_y, txt_width, txt_height)); 
    } 

    if(margin){ 
     int border = res.cols * margin/100; 
     copyMakeBorder(res, res, border, border, border, border, BORDER_CONSTANT, Scalar(255, 255, 255)); 
    } 

    float 
     width_input = res.cols, 
     height_input = res.rows; 

    bool resized = false; 

    // Downscale image 
    if(width > 0 && width_input > width){ 
     float scale = width_input/width; 
     width_input /= scale; 
     height_input /= scale; 
     resized = true; 
    } 
    if(height > 0 && height_input > height){ 
     float scale = height_input/height; 
     width_input /= scale; 
     height_input /= scale; 
     resized = true; 
    } 
    if(resized){ 
     resize(res, res, Size(round(width_input), round(height_input))); 
    } 

    imwrite(output, res * 255); 

    return 0; 
} 

enter image description here

enter image description here

enter image description here

enter image description here

enter image description here

enter image description here

enter image description here

enter image description here

+0

所述第一圖像中的文本相比非常小的第二圖像中的文字。我可以看到這是一個不正確閾值的例子,它對於你的算法來說太小了。你有沒有嘗試調整任何閾值? – GPPK

+0

其他圖像文字更小的輸出不是空白..它不是我誰寫的代碼,所以我幾乎不知道在哪裏調整 – clarkk

回答

3

好:) 設置blockSide較小(7實例),它會給你造成的圖像,如下圖所示。它取決於字體大小,較小的字體需要較小的塊大小,否則文本將被過濾掉,並且會得到空的圖像。

#include <iostream> 
#include <vector> 
#include <stdio.h> 
#include <stdarg.h> 
#include "/usr/include/opencv2/opencv.hpp" 
#include "fstream" 
#include "iostream" 
using namespace std; 
using namespace cv; 

void CalcBlockMeanVariance(Mat& Img,Mat& Res,float blockSide=9) // blockSide - the parameter (set greater for larger font on image) 
{ 
    Mat I; 
    Img.convertTo(I,CV_32FC1); 
    Res=Mat::zeros(Img.rows/blockSide,Img.cols/blockSide,CV_32FC1); 
    Mat inpaintmask; 
    Mat patch; 
    Mat smallImg; 
    Scalar m,s; 

    for(int i=0;i<Img.rows-blockSide;i+=blockSide) 
    {  
     for (int j=0;j<Img.cols-blockSide;j+=blockSide) 
     { 
      patch=I(Range(i,i+blockSide+1),Range(j,j+blockSide+1)); 
      cv::meanStdDev(patch,m,s); 
      if(s[0]>0.01) // Thresholding parameter (set smaller for lower contrast image) 
      { 
       Res.at<float>(i/blockSide,j/blockSide)=m[0]; 
      }else 
      { 
       Res.at<float>(i/blockSide,j/blockSide)=0; 
      }   
     } 
    } 

    cv::resize(I,smallImg,Res.size()); 

    cv::threshold(Res,inpaintmask,0.02,1.0,cv::THRESH_BINARY); 

    Mat inpainted; 
    smallImg.convertTo(smallImg,CV_8UC1,255); 

    inpaintmask.convertTo(inpaintmask,CV_8UC1); 
    inpaint(smallImg, inpaintmask, inpainted, 5, INPAINT_TELEA); 

    cv::resize(inpainted,Res,Img.size()); 
    Res.convertTo(Res,CV_32FC1,1.0/255.0); 

} 

int main(int argc, char** argv) 
{ 
    namedWindow("Img"); 
    namedWindow("Edges"); 
    //Mat Img=imread("D:\\ImagesForTest\\BookPage.JPG",0); 
    Mat Img=imread("test2.jpg",0); 
    Mat res; 
    Img.convertTo(Img,CV_32FC1,1.0/255.0); 
    CalcBlockMeanVariance(Img,res); 
    res=1.0-res; 
    res=Img+res; 
    imshow("Img",Img); 
    cv::threshold(res,res,0.85,1,cv::THRESH_BINARY); 
    cv::resize(res,res,cv::Size(res.cols/2,res.rows/2)); 
    imwrite("result.jpg",res*255); 
    imshow("Edges",res); 
    waitKey(0); 

    return 0; 
} 

enter image description here

enter image description here

enter image description here

enter image description here

+0

是不是有一個適合所有人?這是一項自動化服務...無法爲每個圖像設置單獨的設置 – clarkk

+0

當標準設置適用於大多數圖像時,您可以對圖像大小(寬度)進行標準化,並且文本不會太大地變化其大小。 –

+0

又增加了一個例子img3 :)文字模糊了.. – clarkk