我需要二值化文本影像。它工作得很好，但在某些情況下，輸出爲空（白圖像）文本OpenCV的雙穩態圖像

代碼

/* 
* Compile 
* # g++ txtbin.cpp -o txtbin `pkg-config opencv --cflags --libs` 
* 
* Run 
* # ./txtbin input.jpg output.png 
*/ 

#include "string" 
#include "fstream" 
#include "/usr/include/opencv2/opencv.hpp" 
#include "/usr/include/boost/tuple/tuple.hpp" 

using namespace std; 
using namespace cv; 
using namespace boost; 

void CalcBlockMeanVariance(Mat& Img, Mat& Res, float blockSide=21, float contrast=0.01){ 
    /* 
    * blockSide: set greater for larger fonts in image 
    * contrast: set smaller for lower contrast image 
    */ 

    Mat I; 
    Img.convertTo(I, CV_32FC1); 
    Res = Mat::zeros(Img.rows/blockSide, Img.cols/blockSide, CV_32FC1); 
    Mat inpaintmask; 
    Mat patch; 
    Mat smallImg; 
    Scalar m, s; 

    for(int i = 0; i < Img.rows - blockSide; i += blockSide){ 
     for(int j = 0; j < Img.cols - blockSide; j += blockSide){ 
      patch = I(Range(i, i + blockSide + 1), Range(j, j + blockSide + 1)); 
      meanStdDev(patch, m, s); 

      if(s[0] > contrast){ 
       Res.at<float>(i/blockSide, j/blockSide) = m[0]; 
      } 
      else{ 
       Res.at<float>(i/blockSide, j/blockSide) = 0; 
      } 
     } 
    } 

    resize(I, smallImg, Res.size()); 

    threshold(Res, inpaintmask, 0.02, 1.0, THRESH_BINARY); 

    Mat inpainted; 
    smallImg.convertTo(smallImg, CV_8UC1, 255); 

    inpaintmask.convertTo(inpaintmask, CV_8UC1); 
    inpaint(smallImg, inpaintmask, inpainted, 5, INPAINT_TELEA); 

    resize(inpainted, Res, Img.size()); 
    Res.convertTo(Res, CV_32FC1, 1.0/255.0); 
} 

tuple<int, int, int, int> detect_text_box(string input, Mat& res, bool draw_contours=false){ 
    Mat large = imread(input); 

    bool test_output = false; 

    int 
     top = large.rows, 
     bottom = 0, 
     left = large.cols, 
     right = 0; 

    int 
     rect_bottom, 
     rect_right; 

    Mat rgb; 
    // downsample and use it for processing 
    pyrDown(large, rgb); 
    Mat small; 
    cvtColor(rgb, small, CV_BGR2GRAY); 
    // morphological gradient 
    Mat grad; 
    Mat morphKernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3)); 
    morphologyEx(small, grad, MORPH_GRADIENT, morphKernel); 
    // binarize 
    Mat bw; 
    threshold(grad, bw, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU); 
    // connect horizontally oriented regions 
    Mat connected; 
    morphKernel = getStructuringElement(MORPH_RECT, Size(9, 1)); 
    morphologyEx(bw, connected, MORPH_CLOSE, morphKernel); 
    // find contours 
    Mat mask = Mat::zeros(bw.size(), CV_8UC1); 
    vector<vector<Point> > contours; 
    vector<Vec4i> hierarchy; 
    findContours(connected, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0)); 
    // filter contours 
    for(int idx = 0; idx >= 0; idx = hierarchy[idx][0]){ 
     Rect rect = boundingRect(contours[idx]); 
     Mat maskROI(mask, rect); 
     maskROI = Scalar(0, 0, 0); 
     // fill the contour 
     drawContours(mask, contours, idx, Scalar(255, 255, 255), CV_FILLED); 
     // ratio of non-zero pixels in the filled region 
     double r = (double)countNonZero(maskROI)/(rect.width * rect.height); 

     // assume at least 45% of the area is filled if it contains text 
     if (r > 0.45 && 
     (rect.height > 8 && rect.width > 8) // constraints on region size 
     // these two conditions alone are not very robust. better to use something 
     //like the number of significant peaks in a horizontal projection as a third condition 
     ){ 
      if(draw_contours){ 
       rectangle(res, Rect(rect.x * 2, rect.y * 2, rect.width * 2, rect.height * 2), Scalar(0, 255, 0), 2); 
      } 

      if(test_output){ 
       rectangle(rgb, rect, Scalar(0, 255, 0), 2); 
      } 

      if(rect.y < top){ 
       top = rect.y; 
      } 
      rect_bottom = rect.y + rect.height; 
      if(rect_bottom > bottom){ 
       bottom = rect_bottom; 
      } 
      if(rect.x < left){ 
       left = rect.x; 
      } 
      rect_right = rect.x + rect.width; 
      if(rect_right > right){ 
       right = rect_right; 
      } 
     } 
    } 

    if(draw_contours){ 
     rectangle(res, Point(left * 2, top * 2), Point(right * 2, bottom * 2), Scalar(0, 0, 255), 2); 
    } 

    if(test_output){ 
     rectangle(rgb, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 2); 
     imwrite(string("test_text_contours.jpg"), rgb); 
    } 

    return make_tuple(left * 2, top * 2, (right - left) * 2, (bottom - top) * 2); 
} 

int main(int argc, char* argv[]){ 
    string input; 
    string output = "output.png"; 

    int 
     width = 0, 
     height = 0; 

    bool 
     crop = false, 
     draw = false; 

    float margin = 0; 

    // Return error if arguments are missing 
    if(argc < 3){ 
     cerr << "\nUsage: txtbin input [options] output\n\n" 
      "Options:\n" 
      "\t-w <number>   -- set max width (keeps aspect ratio)\n" 
      "\t-h <number>   -- set max height (keeps aspect ratio)\n" 
      "\t-c     -- crop text content contour\n" 
      "\t-m <number>   -- add margins (number in %)\n" 
      "\t-d     -- draw text content contours (debugging)\n" << endl; 
     return 1; 
    } 

    // Parse arguments 
    for(int i = 1; i < argc; i++){ 
     if(i == 1){ 
      input = string(argv[i]); 

      // Return error if input file is invalid 
      ifstream stream(input.c_str()); 
      if(!stream.good()){ 
       cerr << "Error: Input file is invalid!" << endl; 
       return 1; 
      } 
     } 
     else if(string(argv[i]) == "-w"){ 
      width = atoi(argv[++i]); 
     } 
     else if(string(argv[i]) == "-h"){ 
      height = atoi(argv[++i]); 
     } 
     else if(string(argv[i]) == "-c"){ 
      crop = true; 
     } 
     else if(string(argv[i]) == "-m"){ 
      margin = atoi(argv[++i]); 
     } 
     else if(string(argv[i]) == "-d"){ 
      draw = true; 
     } 
     else if(i == argc - 1){ 
      output = string(argv[i]); 
     } 
    } 

    Mat Img = imread(input, CV_LOAD_IMAGE_GRAYSCALE); 
    Mat res; 
    Img.convertTo(Img, CV_32FC1, 1.0/255.0); 
    CalcBlockMeanVariance(Img, res); 
    res = 1.0 - res; 
    res = Img + res; 
    threshold(res, res, 0.85, 1, THRESH_BINARY); 

    int 
     txt_x, 
     txt_y, 
     txt_width, 
     txt_height; 

    if(crop || draw){ 
     tie(txt_x, txt_y, txt_width, txt_height) = detect_text_box(input, res, draw); 
    } 

    if(crop){ 
     //res = res(Rect(txt_x, txt_y, txt_width, txt_height)).clone(); 
     res = res(Rect(txt_x, txt_y, txt_width, txt_height)); 
    } 

    if(margin){ 
     int border = res.cols * margin/100; 
     copyMakeBorder(res, res, border, border, border, border, BORDER_CONSTANT, Scalar(255, 255, 255)); 
    } 

    float 
     width_input = res.cols, 
     height_input = res.rows; 

    bool resized = false; 

    // Downscale image 
    if(width > 0 && width_input > width){ 
     float scale = width_input/width; 
     width_input /= scale; 
     height_input /= scale; 
     resized = true; 
    } 
    if(height > 0 && height_input > height){ 
     float scale = height_input/height; 
     width_input /= scale; 
     height_input /= scale; 
     resized = true; 
    } 
    if(resized){ 
     resize(res, res, Size(round(width_input), round(height_input))); 
    } 

    imwrite(output, res * 255); 

    return 0; 
}

來源

2015-12-21 clarkk

所述第一圖像中的文本相比非常小的第二圖像中的文字。我可以看到這是一個不正確閾值的例子，它對於你的算法來說太小了。你有沒有嘗試調整任何閾值？ – GPPK

其他圖像文字更小的輸出不是空白..它不是我誰寫的代碼，所以我幾乎不知道在哪裏調整 – clarkk

好:) 設置blockSide較小（7實例），它會給你造成的圖像，如下圖所示。它取決於字體大小，較小的字體需要較小的塊大小，否則文本將被過濾掉，並且會得到空的圖像。

#include <iostream> 
#include <vector> 
#include <stdio.h> 
#include <stdarg.h> 
#include "/usr/include/opencv2/opencv.hpp" 
#include "fstream" 
#include "iostream" 
using namespace std; 
using namespace cv; 

void CalcBlockMeanVariance(Mat& Img,Mat& Res,float blockSide=9) // blockSide - the parameter (set greater for larger font on image) 
{ 
    Mat I; 
    Img.convertTo(I,CV_32FC1); 
    Res=Mat::zeros(Img.rows/blockSide,Img.cols/blockSide,CV_32FC1); 
    Mat inpaintmask; 
    Mat patch; 
    Mat smallImg; 
    Scalar m,s; 

    for(int i=0;i<Img.rows-blockSide;i+=blockSide) 
    {  
     for (int j=0;j<Img.cols-blockSide;j+=blockSide) 
     { 
      patch=I(Range(i,i+blockSide+1),Range(j,j+blockSide+1)); 
      cv::meanStdDev(patch,m,s); 
      if(s[0]>0.01) // Thresholding parameter (set smaller for lower contrast image) 
      { 
       Res.at<float>(i/blockSide,j/blockSide)=m[0]; 
      }else 
      { 
       Res.at<float>(i/blockSide,j/blockSide)=0; 
      }   
     } 
    } 

    cv::resize(I,smallImg,Res.size()); 

    cv::threshold(Res,inpaintmask,0.02,1.0,cv::THRESH_BINARY); 

    Mat inpainted; 
    smallImg.convertTo(smallImg,CV_8UC1,255); 

    inpaintmask.convertTo(inpaintmask,CV_8UC1); 
    inpaint(smallImg, inpaintmask, inpainted, 5, INPAINT_TELEA); 

    cv::resize(inpainted,Res,Img.size()); 
    Res.convertTo(Res,CV_32FC1,1.0/255.0); 

} 

int main(int argc, char** argv) 
{ 
    namedWindow("Img"); 
    namedWindow("Edges"); 
    //Mat Img=imread("D:\\ImagesForTest\\BookPage.JPG",0); 
    Mat Img=imread("test2.jpg",0); 
    Mat res; 
    Img.convertTo(Img,CV_32FC1,1.0/255.0); 
    CalcBlockMeanVariance(Img,res); 
    res=1.0-res; 
    res=Img+res; 
    imshow("Img",Img); 
    cv::threshold(res,res,0.85,1,cv::THRESH_BINARY); 
    cv::resize(res,res,cv::Size(res.cols/2,res.rows/2)); 
    imwrite("result.jpg",res*255); 
    imshow("Edges",res); 
    waitKey(0); 

    return 0; 
}