2016-01-23 110 views
-1

我是OpenCV的初學者,我需要刪除圖像中的水平和垂直線條,以便只保留文本(這些行在提取ocr文本時會造成麻煩)。我正在嘗試從營養成分表中提取文本。誰能幫我?從圖像中刪除行

Nutrient Fact Table

+0

而不是將線看作是一個「障礙物」,你有沒有嘗試將它們當作輪廓或使用邊緣檢測器來傳遞線條形成的矩形內容?例如。 「營養信息...」將是一個盒子,而宏觀營養素分解將是另一個盒子 –

+0

@TrésDuBiel是的,我試過了,但一些營養素事實表中有營養素和它的價值之間的垂直線,如脂肪| 2.7g,創建障礙物之間的垂直線 –

+0

對於線條檢測,您可以使用[hough lines](http://docs.opencv.org/2.4/doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.html) OpenCV的。 – seleciii44

回答

2

這是一個有趣的問題,所以我給它一個鏡頭。下面我會告訴你如何提取和刪除水平和垂直線。你可以從中推斷出來。另外,爲了節省時間,我沒有預處理圖像,以便像應該那樣突出背景,這是一個改進的途徑。

其結果是:result 代碼(編輯:附加的垂直線):

#include <iostream> 
#include <opencv2/opencv.hpp> 
using namespace std; 
using namespace cv; 
int main(int, char** argv) 
{ 
    // Load the image 
    Mat src = imread(argv[1]); 
    // Check if image is loaded fine 
    if(!src.data) 
     cerr << "Problem loading image!!!" << endl; 
    Mat gray; 
    if (src.channels() == 3) 
    { 
     cvtColor(src, gray, CV_BGR2GRAY); 
    } 
    else 
    { 
     gray = src; 
    } 

    //inverse binary img 
    Mat bw; 
    //this will hold the result, image to be passed to OCR 
    Mat fin; 
    //I find OTSU binarization best for text. 
    //Would perform better if background had been cropped out 
    threshold(gray, bw, 0, 255, THRESH_BINARY_INV | THRESH_OTSU); 
    threshold(gray, fin, 0, 255, THRESH_BINARY | THRESH_OTSU); 
    imshow("binary", bw); 
    Mat dst; 
    Canny(fin, dst, 50, 200, 3); 
    Mat str = getStructuringElement(MORPH_RECT, Size(3,3)); 
    dilate(dst, dst, str, Point(-1, -1), 3); 
    imshow("dilated_canny", dst); 
    //bitwise_and w/ canny image helps w/ background noise 
    bitwise_and(bw, dst, dst); 
    imshow("and", dst); 
    Mat horizontal = dst.clone(); 
    Mat vertical = dst.clone(); 
    fin = ~dst; 

    //Image that will be horizontal lines 
    Mat horizontal = bw.clone(); 
    //Selected this value arbitrarily 
    int horizontalsize = horizontal.cols/30; 
    Mat horizontalStructure = getStructuringElement(MORPH_RECT, Size(horizontalsize,1)); 
    erode(horizontal, horizontal, horizontalStructure, Point(-1, -1)); 
    dilate(horizontal, horizontal, horizontalStructure, Point(-1, -1), 1); 
    imshow("horizontal_lines", horizontal); 

    //Need to find horizontal contours, so as to not damage letters 
    vector<Vec4i> hierarchy; 
    vector<vector<Point> >contours; 
    findContours(horizontal, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_NONE); 
    for (const auto& c : contours) 
    { 
     Rect r = boundingRect(c); 

     float percentage_height = (float)r.height/(float)src.rows; 
     float percentage_width = (float)r.width/(float)src.cols; 

     //These exclude contours that probably are not dividing lines 
     if (percentage_height > 0.05) 
      continue; 

     if (percentage_width < 0.50) 
      continue; 
     //fills in line with white rectange 
     rectangle(fin, r, Scalar(255,255,255), CV_FILLED); 
    } 

    int verticalsize = vertical.rows/30; 
    Mat verticalStructure = getStructuringElement(MORPH_RECT, Size(1,verticalsize)); 
    erode(vertical, vertical, verticalStructure, Point(-1, -1)); 
    dilate(vertical, vertical, verticalStructure, Point(-1, -1), 1); 
    imshow("verticalal", vertical); 

    findContours(vertical, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_NONE); 
    for (const auto& c : contours) 
    { 
     Rect r = boundingRect(c); 

     float percentage_height = (float)r.height/(float)src.rows; 
     float percentage_width = (float)r.width/(float)src.cols; 

     //These exclude contours that probably are not dividing lines 
     if (percentage_width > 0.05) 
      continue; 

     if (percentage_height < 0.50) 
      continue; 
     //fills in line with white rectange 
     rectangle(fin, r, Scalar(255,255,255), CV_FILLED); 
    } 

    imshow("Result", fin); 
    waitKey(0); 
    return 0; 
} 

這種方法的侷限性是,該線需要是直的。由於底線的曲線,它在「能量」中略微削減爲「E」。也許像建議的那樣(我從來沒有使用過)檢測到hough線,可以設計出類似但更穩健的方法。另外,用矩形填充線條可能不是最好的方法。