RANSAC線性迴歸（魯棒線配合）

該文件包括用於RANSAC線性迴歸與使用的OpenCV單元測試C++代碼。一個典型的用線擬合是RANSAC線性迴歸（魯棒線配合）

float x[N], float y[N]; 
bool inliers[N]; 
float RMSE = RANSAC_line(x, y, npoints, param, NITERATIONS, MAX_ER, inliers);

這裏是文件

/* 
* RANSAC.h 
* 
* Created on: Mar 4, 2013 
*  Author: vivanchenko 
* 
*  Description: line is represented by equation 
*  a*x+b*y=d, where a=cos(alpha), b=sin(alpha), alpha - the angle 
*  between a horizontal axis and a line normal, and d is the distance 
*  from the origin to the line; 
* 
*  Representing a line in such a way is well suited for fitting error 
*  evaluation and allows to avoid some marginal cases that arise from 
*  a traditional line representation y=ax+b 
*/ 

#ifndef RANSAC_H_ 
#define RANSAC_H_ 
#include <string.h> 
#include <assert.h> 
#include <stdio.h> // NULL symbol 
#include "cv.h" 
#include "highgui.h" 

using namespace std; 
using namespace cv; 

#define SQR(a) ((a)*(a)) 
#define MAX_FLOAT (std::numeric_limits<float>::digits) 
#define LARGE_NUMBER (MAX_FLOAT/2) 
#define SMALL_NUMBER (1.0F/LARGE_NUMBER) 
#define SIGN(x) ((x)>0?1:(-1)) 

// unit test params 
const int NDATA_UNIT_TEST = 100; 
const float TRUE_PARAM_UNIT_TEST[2] = {0.9f, -27.0f}; 
const float NOISE_LEVEL_UNIT_TEST = 1.0f; 
const float OUTLIER_PROPORTION_UNIT_TEST = 0.3f; 
const float SHIFT_OUTLIERS_UNIT_TEST = 10*NOISE_LEVEL_UNIT_TEST; // shift introduced by outliers 
const int UNIT_TEST_IMG_WIDTH = 800; 
const int UNIT_TEST_IMG_HEIGHT = 800; 

// conversion to string for many types 
template <class T> 
string toString(T a) { 
    stringstream ss (stringstream::in | stringstream::out); 
    ss << a; 
    return(ss.str()); 
} 

// outputs redctangle that includes the data 
cv::Rect dataRange(float* x, float* y, const int N) { 

    float minx = x[0]; 
    float miny = y[0]; 
    float maxx = x[0]; 
    float maxy = y[0]; 

    for (int i=1; i<N; i++) { 
     if (minx > x[i]) 
      minx = x[i]; 
     if (miny > y[i]) 
      miny = y[i]; 
     if (maxx < x[i]) 
      maxx = x[i]; 
     if (maxy < y[i]) 
      maxy = y[i]; 
    } 
    Rect_<float> rect(minx, miny, maxx-minx+1, maxy-miny+1); 

    return rect; 
} 

// simple linear transformation 
inline float linear(float src, float mult_val, float plus_val) { 
    return(src*mult_val+plus_val); 
} 

// shows points, marks inliers, draws a fit line (Y points up); 
void showPoints(Mat& image, float* x, float* y, const int N, 
     bool* inlrs, float* param) { 

    bool* inliers = inlrs; 
    if (inlrs==NULL) { 
     inliers = new bool[N]; 
     std::fill_n(inliers, N, true); 
    } 

    // image dimensions 
    int h = image.rows; 
    int w = image.cols; 
    const int pad = 10; // border padding 

    // range of data 
    Rect_<float> range = dataRange(x, y, N); 
    string str = "x: " + toString(range.x) + ".." + toString(range.x+range.width) + 
      "; y: " + toString(range.y) + ".." + toString(range.y+range.height) + 
      "; data: y = " + toString(TRUE_PARAM_UNIT_TEST[0]) + "x + " + 
      toString(TRUE_PARAM_UNIT_TEST[1]) + " + noise"; 
    putText(image, str, Point(20, 20), FONT_HERSHEY_PLAIN, 1, Scalar(255)); 

    // maping data to image 
    float dataToImg = min((h-2*pad)/range.height, 
      (w-2*pad)/range.width); 

    // inverse y direction 
    int x0 = pad; 
    int y0 = h-pad; 

    // points 
    for (int i=0; i<N; i++) { 

     // adjust for origin 
     float datax = x[i]-range.x; 
     float datay = y[i]-range.y; 

     // transform to image coordiantes 
     int imgx = linear(datax, dataToImg, x0) + 0.5f; 
     int imgy = linear(datay, -dataToImg, y0) + 0.5f; 

     int thickness = inliers[i]?2:1; 
     circle(image, Point(imgx, imgy), 2, Scalar(255), thickness); 
    } 

    // line points 
    float datax1 = range.x; 
    float datax2 = range.x+range.width-1; 
    float datay1 = linear(datax1, param[0], param[1]); 
    float datay2 = linear(datax2, param[0], param[1]); 

    // adjust for origin 
    datax1-=range.x; 
    datax2-=range.x; 
    datay1-=range.y; 
    datay2-=range.y; 

    // transform to image coordiantes 
    int x1 = linear(datax1, dataToImg, x0) + 0.5f; 
    int x2 = linear(datax2, dataToImg, x0) + 0.5f; 
    int y1 = linear(datay1, -dataToImg, y0) + 0.5f; // Y points up 
    int y2 = linear(datay2, -dataToImg, y0) + 0.5f; 

    cv::line(image, Point(x1, y1), Point(x2, y2), Scalar(255), 1); 

    if (inlrs==NULL) 
     delete inliers; 
} 

// Root mean sqaure error (RMSE) of line fit y=param[0]*x+param[1] 
float errorLine(float* x, float* y, int N, float* param, 
     const bool* inliers = NULL) { 

    if (N<=0 || x==NULL || y==NULL || param==NULL) 
     return MAX_FLOAT; 

    int ninliers = (inliers==NULL?N:0); 
    double RMSE = 0; 

    // error accumulation loop 
    for (int i = 0; i < N; i++) { 

     if (inliers!=NULL) { 
      if (inliers[i]) 
       ninliers++; 
      else 
       continue; 
     } 

     float y_predicted = param[0] * x[i] + param[1]; 
     RMSE += SQR(y[i] - y_predicted); 
    } 

    if (ninliers==0) 
     return LARGE_NUMBER; 
    else 
     return sqrt(RMSE/ninliers); 
} 

// simple line fit using sum of squared differences 
// inliers are used to specify a subset of points for a fit 
float fitLine(float* x, float* y, int N, float* param, 
     const bool* inliers = NULL, const bool debug = false) { 

    if (N<=0 || x==NULL || y==NULL || param==NULL) { 
     if (debug) 
      cout<<"ERROR fit line quadratic: N<=0 || x==NULL || y==NULL || param==NULL"<<endl; 
     return MAX_FLOAT; 
    } 

    int ninliers = (inliers==NULL?N:0); 
    double sum_x = 0; 
    double sum_y = 0; 
    double sum_xy = 0; 
    double sum_x2 = 0; 

    // create specific sums of x, y, xy, x^2 
    for (int i = 0; i < N; i++) { 

     // use inliers only 
     if (inliers!=NULL) { 
      if (!inliers[i]) 
       continue; 
      else 
       ninliers++; 
     } 

     sum_x += x[i]; 
     sum_y += y[i]; 
     sum_xy += x[i] * y[i]; 
     sum_x2 += x[i] * x[i]; 
    } 

    if(ninliers < 2) { 
     if (debug) 
      cout<<"ERROR fit line quadratic: less than 2 data points"<<endl; 
     return MAX_FLOAT; 
    } 

    // means 
    double mean_x = sum_x/ninliers; 
    double mean_y = sum_y/ninliers; 

    float varx = sum_x2 - sum_x * mean_x; 
    float cov = sum_xy - sum_x * mean_y; 
    // eliminate bias: variance is e a bit underestimated since df=N-1, see 
    // http://davidmlane.com/hyperstat/B16616.html) 
// if (ninliers>1) 
//  varx *= (float)ninliers/(ninliers-1); 


    // quadratic fit 
    if (abs(varx) < SMALL_NUMBER) { 
     if (debug) 
      cout<<"ERROR fit line quadratic: zero variance" <<endl; 
     return MAX_FLOAT; 
    } 

    // see http://easycalculation.com/statistics/learn-regression.php 
    param[0] = cov/varx; 
    param[1] = mean_y - param[0] * mean_x; 

    return errorLine(x, y, N, param, inliers); 
} 

// RANSAC line fit (number of inliers has priority over RMSE). 
float RANSAC_line(float* x, float* y, const int N, float* param, 
     const int niter = 10, const float maxError = 1.0f, 
     bool* inlrs = NULL, bool debug = false) { 

    if (x==NULL || y==NULL || N<2) { 
     if (debug) 
      cout<<"x==NULL || y==NULL || N<2" <<endl; 
     return MAX_FLOAT; 
    } 
    srand (time(NULL)); 

    // internal stopping criterions 
    const float RMSE_OK = 0.1f; 
    const float INLIERS_RATIO_OK = 0.7f; 

    int ninliers, best_ninliers = 0; 
    float inliers_ratio = 0; 
    float RMSE, bestRMSE = MAX_FLOAT; 
    float cur_param[2]; 

    bool* inliers = inlrs; 
    if (inlrs==NULL) { 
     inliers = new bool[N]; 
     std::fill_n(inliers, N, true); 
    } 

    // iterations 
    int iter; 
    for (iter = 0; iter<niter; iter++) { 

     // 1. select a random set of 2 inliers 
     int i1 = rand() % N; // [0, N[ 
     int i2 = i1; 
     while (i2==i1) 
      i2 = rand() % N; 

     // 2. select minimum number of points (2) 
     float x1 = x[i1]; 
     float x2 = x[i2]; 
     float y1 = y[i1]; 
     float y2 = y[i2]; 

     // TODO: we may parameterize the line differently (alpha, d) 
     if (abs(x1-x2) < SMALL_NUMBER) 
      cur_param[0] = LARGE_NUMBER * SIGN(cur_param[0]); 
     else 
      cur_param[0] = (y1-y2)/(x1-x2); 
     cur_param[1] = y1-cur_param[0]*x1; 
     if (abs(cur_param[0]) < SMALL_NUMBER) // flat line? 
      cur_param[0] = SMALL_NUMBER * SIGN(cur_param[0]); 


     // 3. determine inliers using the whole set 
     for (int i=0; i<N; i++) { 
      float y_fit = cur_param[0]*x[i] + cur_param[1]; 
      float x_fit = (y[i] - cur_param[1])/cur_param[0]; 
      if (max(abs(y[i]-y_fit), abs(x[i]-x_fit)) < maxError) { // block distance 
       inliers[i] = true; 
      } else { 
       inliers[i] = false; 
      } 
     } 

     // 4. re-calculate params via quadratic fit on all inliers 
     RMSE = fitLine(x, y, N, cur_param, (const bool*)inliers); 
     if (abs(cur_param[0]) < SMALL_NUMBER) // flat line? 
      cur_param[0] = SMALL_NUMBER * SIGN(cur_param[0]); 

     // 5. re-calculate inliers 
     ninliers = 0; 
     for (int i=0; i<N; i++) { 
      float y_fit = cur_param[0]*x[i] + cur_param[1]; 
      float x_fit = (y[i] - cur_param[1])/cur_param[0]; 
      if (max(abs(y[i]-y_fit), abs(x[i]-x_fit)) < maxError) { 
       inliers[i] = true; 
       ninliers++; 
      } else { 
       inliers[i] = false; 
      } 
     } 

     // 6. calculate the error 
     RMSE = errorLine(x, y, N, cur_param, (const bool*)inliers); 

     // found a better solution? 
     if (best_ninliers < ninliers) { 
      best_ninliers = ninliers; 
      param[0] = cur_param[0]; 
      param[1] = cur_param[1]; 
      bestRMSE = RMSE; 
     } 

     // 7. check exit condition 
     inliers_ratio = (float)best_ninliers/N; 
     if (RMSE < RMSE_OK && inliers_ratio > INLIERS_RATIO_OK) { 

      if (debug) 
       cout<<"Breaking early after "<< iter+1<<" iterations"<<endl; 

      break; 
     } 
    } // iterations 

    if (debug) 
     cout<<"inliers ratio = "<< inliers_ratio <<endl; 

    // 8. recreate inliers for the best parameters 
    for (int i=0; i<N; i++) { 
     float y_fit = param[0]*x[i] + param[1]; 
     float x_fit = (y[i] - param[1])/param[0]; 
     if (max(abs(y[i]-y_fit), abs(x[i]-x_fit)) < maxError) { // block distance 
      inliers[i] = true; 
     } else { 
      inliers[i] = false; 
     } 
    } 

    if (inlrs==NULL) 
     delete inliers; 
    return bestRMSE; 
} 

// generates the data give the random seed 
void generateData(float* x, float* y, unsigned int seed = 0, bool hasOutl = false) { 

    // initialize pseudo-random generator 
    srand (seed); 

    for (int i=0; i<NDATA_UNIT_TEST; i++) { 

     // uniform noise (negative and positive -50..50) 
     float noise = (float)(rand() % 100-50) * 
       NOISE_LEVEL_UNIT_TEST/100.0f; 
     //cout<<noise<<"; "; 

     // shift 
     int noutlisers = (float)NDATA_UNIT_TEST * OUTLIER_PROPORTION_UNIT_TEST; 
     int start = NDATA_UNIT_TEST/2-noutlisers/2; // put outliers in the middle 
     bool outlier = (i > start) && (i < start + noutlisers); 
     if (hasOutl && outlier) 
      noise += SHIFT_OUTLIERS_UNIT_TEST ; 

     x[i] = i-NDATA_UNIT_TEST/2; // center x data around 0 
     y[i] = TRUE_PARAM_UNIT_TEST[0]*x[i]+TRUE_PARAM_UNIT_TEST[1]+noise; 
     //cout<<"x, y = "<<x[i]<<"; "<<y[i]<<endl; 

    } 

} 

// generates the data from pasted numbers 
void generateDataPaste(float* x, float* y) { 

    const int n = 63; 

    // compiler will generate an error if n is inaccurate 
    float data[n][2] = { 
      {1, 1}, 
      {2, 1}, 
      {3, 1}, 
      {4, 1}, 
      {5, 1}, 
      {6, 1}, 
      {7, 1}, 
      {8, 1}, 
      {9, 1}, 
      {2, 2}, 
      {3, 2}, 
      {4, 2}, 
      {5, 2}, 
      {6, 2}, 
      {7, 2}, 
      {8, 2}, 
      {9, 2}, 
      {10, 2}, 
      {11, 2}, 
      {12, 2}, 
      {13, 2}, 
      {14, 2}, 
      {15, 2}, 
      {16, 2}, 
      {17, 2}, 
      {8, 3}, 
      {10, 3}, 
      {12, 3}, 
      {13, 3}, 
      {14, 3}, 
      {15, 3}, 
      {16, 3}, 
      {17, 3}, 
      {18, 3}, 
      {9, 4}, 
      {10, 4}, 
      {11, 4}, 
      {12, 4}, 
      {13, 4}, 
      {14, 4}, 
      {15, 4}, 
      {16, 4}, 
      {17, 4}, 
      {18, 4}, 
      {19, 4}, 
      {20, 4}, 
      {21, 4}, 
      {22, 4}, 
      {23, 4}, 
      {24, 4}, 
      {25, 4}, 
      {17, 5}, 
      {18, 5}, 
      {19, 5}, 
      {20, 5}, 
      {21, 5}, 
      {22, 5}, 
      {23, 5}, 
      {24, 5}, 
      {25, 5}, 
      {26, 5}, 
      {27, 5}, 
      {28, 5}}; 

    // crop or repeate data to get a required sample size 
    for (int i=0; i<NDATA_UNIT_TEST; i++) { 
     x[i] = data[i%n][0]; 
     y[i] = data[i%n][1]; 
    } 

} 

// unit test of quadratic line fit 
bool uniTest_fitLine_QUADRATIC(int col = 0, int row = 0) { 

    const bool hasOutliers = false; // cannot handle outliers well 
    const bool dataFromParam = false; // either data from param or pasted ones 

    string str = hasOutliers?" with outliers":" with no outliers"; 
    cout<<"unit-test_fitLine()"<<str<<endl; 
    bool res = false; 

    // opencv window 
    Mat img(UNIT_TEST_IMG_HEIGHT, UNIT_TEST_IMG_WIDTH, CV_8U); 
    cv::namedWindow("QUADRATIC", CV_WINDOW_AUTOSIZE); 
    cv::moveWindow("QUADRATIC", col*(img.cols+50), row*(img.rows+50)); 

    // create data 
    float x[NDATA_UNIT_TEST], y[NDATA_UNIT_TEST]; 
    if (dataFromParam) 
     generateData(x, y, time(NULL), hasOutliers); 
    else 
     generateDataPaste(x, y); 

    // fit the line 
    float param[2]; 
    float er = fitLine(x, y, NDATA_UNIT_TEST, param) ; 
    if (er < NOISE_LEVEL_UNIT_TEST || !dataFromParam) 
     res = true; 

    // print the result 
    cout<<"a, b = "<<param[0]<<"; "<<param[1]<<"; RMSE = "<<er<<endl; 

    if (dataFromParam) { 
     cout<<"True a = "<<TRUE_PARAM_UNIT_TEST[0]<<"; b = "<< 
       TRUE_PARAM_UNIT_TEST[1]<<endl; 
     cout<<"combined param error = "<< 
       abs(TRUE_PARAM_UNIT_TEST[0]-param[0])+ 
       abs(TRUE_PARAM_UNIT_TEST[1]-param[1])<<endl; 
     if (res) 
      cout<<"===passed"<<endl; 
     else 
      cout<<"===FAIL!"<<endl; 
     cout<<endl; 
    } 
    // visualize 
    showPoints(img, x, y, NDATA_UNIT_TEST, NULL, param); 
    imshow("QUADRATIC", img); 
    cv::waitKey(10); 

    return res; 
} 

// unit test of RANSAC line fit 
bool uniTest_fitLine_RANSAC(int col = 0, int row = 0) { 

    const bool hasOutliers = true; // handles outliers gracefully 
    const bool dataFromParam = false; // either data from param or pasted ones 

    string str = hasOutliers?" with outliers":" with no outliers"; 
    cout<<"unit-test_RANSAC()"<<str<<endl; 
    bool res = false; 

    // opencv window 
    Mat img(UNIT_TEST_IMG_HEIGHT, UNIT_TEST_IMG_WIDTH, CV_8U); 
    cv::namedWindow("RANSAC", CV_WINDOW_AUTOSIZE); 
    cv::moveWindow("RANSAC", col*(img.cols+50), row*(img.rows+50)); 

    // create data 
    float x[NDATA_UNIT_TEST], y[NDATA_UNIT_TEST]; 
    if (dataFromParam) 
     generateData(x, y, time(NULL), hasOutliers); 
    else 
     generateDataPaste(x, y); 

    // parameters 
    float param[2]; 
    int niter = 20; 
    float maxError = 1.0f; 
    bool inliers[NDATA_UNIT_TEST]; 
    bool debug = true; 
    float er; 

    // function call 
    er = RANSAC_line(x, y, NDATA_UNIT_TEST, param, niter, maxError, 
      inliers, debug); 
    if (er < NOISE_LEVEL_UNIT_TEST || !dataFromParam) 
     res = true; 

    // print the result 
    cout<<"outliers: "; 
    int noutliers = 0; 
    for (int i=0; i<NDATA_UNIT_TEST; i++) { 
     if (!inliers[i]) { 
      noutliers++; 
      cout<<i<<"; "; 
      if (noutliers % 30==0) 
       cout<<endl; 
     } 
    } 
    cout<<" overall "<<noutliers<<endl; 
    cout<<"a, b = "<<param[0]<<"; "<<param[1]<<"; RMSE = "<<er<<endl; 

    if (dataFromParam) { 
     cout<<"True a = "<<TRUE_PARAM_UNIT_TEST[0]<<"; b = "<< 
       TRUE_PARAM_UNIT_TEST[1]<<endl; 
     cout<<"combined param error = "<< 
       abs(TRUE_PARAM_UNIT_TEST[0]-param[0])+ 
       abs(TRUE_PARAM_UNIT_TEST[1]-param[1])<<endl; 
     if (res) 
      cout<<"===passed"<<endl; 
     else 
      cout<<"===FAIL!"<<endl; 
     cout<<endl; 
    } 

    // visualize 
    showPoints(img, x, y, NDATA_UNIT_TEST, inliers, param); 
    imshow("RANSAC", img); 
    cv::waitKey(10); 

    return res; 
} 

#endif /* RANSAC_H_ */

的代碼提供耐受高達異常值的90％，而很快找到解決辦法的能力。

來源

2013-04-05 Vlad

問題是什麼，這將肯定會失敗？ – 2013-05-31 15:58:11

問題是什麼？這是一個問答網站，而不是博客或CodeProject。 – John 2013-05-31 19:56:55

你是對的，這是不是一個博客，但我問的問題是極其重要的。如果到目前爲止沒有這樣的問題，我必須創建（並回答）它，特別是因爲它經常用於計算機視覺。我猜想，興趣和觀點的數量本身就是可以證明的。 – Vlad 2015-01-23 07:30:10

而不是使用：
RMSE += SQR(y[i] - y_predicted);爲點，以計算出的線的距離
你應該只是使用點的實際距離爲符合
abs(y_i-cur_m*x_i-cur_b)/sqrt(cur_m*cur_m+1)或
abs(pt.y-cur_m*pt.x-cur_b)* pow(cur_m*cur_m+1,-.5)（少師）
或至少約：
abs(y_i-cur_m*x_i-cur_b)/cur_m

來源

2013-11-18 16:58:37 Chris

我同意，最好誤差度量是正交於直線或平面的距離。這意味着公式可以表示爲正常* [X，Y] = d代替Y = PARAM [0] * X + PARAM [1]。考慮到這一點，我更新了RMSE計算，見下文。 – Vlad 2013-12-05 18:44:37

其實，代碼使用y = mx + c雖然意見建議行正常和角度。我剛纔更改此爲筆者彷彿線的角度接近PI/2

來源

2014-06-14 14:28:03

RANSAC線性迴歸（魯棒線配合）

回答

相關問題