我使用cv::EM
算法做圖像流高斯混合模型的分類。然而,雖然像素分類成不同的模型中使用EM::prediction
方法,我發現實在是太緩慢,使用約3秒,一個600×800的圖像。另一方面,由OpenCV提供的MOG background subtractor
非常快速地執行這部分,僅使用約30ms。所以我決定用其執行方法取代EM::prediction
部分。但是,我不知道如何改變它。OpenCV的:加快EM算法預測
,我使用到prediction
部分的代碼如下:
cv::Mat floatSource;
source.convertTo (floatSource, CV_32F);
cv::Mat samples (source.rows * source.cols, 3, CV_32FC1);
int idx = 0;
for (int y = 0; y < source.rows; y ++)
{
cv::Vec3f* row = floatSource.ptr <cv::Vec3f> (y);
for (int x = 0; x < source.cols; x ++)
{
samples.at<cv::Vec3f> (idx++, 0) = row[x];
}
}
cv::EMParams params(2); // num of mixture we use is 2 here
cv::ExpectationMaximization em (samples, cv::Mat(), params);
cv::Mat means = em.getMeans();
cv::Mat weight = em.getWeights();
const int fgId = weights.at<float>(0) > weights.at<flaot>(1) ? 0:1;
idx = 0;
for (int y = 0; y < source.rows; y ++)
{
for (int x = 0; x < source.cols; x ++)
{
const int result = cvRound (em.predict (samples.row (idx++), NULL);
}
}
的部分碼我從「cvbgfg_gaussmix.cpp」找到EM prediction
是這樣的:
static void process8uC3 (BackgroundSubtractorMOG& obj, const Mat& image, Mat& fgmask, double learningRate)
{
int x, y, k, k1, rows = image.rows, cols = image.cols;
float alpha = (float)learningRate, T = (float)obj.backgroundRatio, vT = (float)obj.varThreshold;
int K = obj.nmixtures;
const float w0 = (float)CV_BGFG_MOG_WEIGHT_INIT;
const float sk0 = (float)(CV_BGFG_MOG_WEIGHT_INIT/CV_BGFG_MOG_SIGMA_INIT);
const float var0 = (float) (CV_BGFG_MOG_SIGMA_INIT*CV_BGFG_MOG_SIGMA_INIT);
for (y = 0; y < rows; y ++)
{
const uchar* src = image.ptr<uchar>(y);
uchar* dst = fgmask.ptr<uchar>(y);
MixData<Vec3f>* mptr = (MixData<Vec3f>*)obj.bgmodel.ptr(y);
for (x = 0; x < cols; x++, mptr += K)
{
float wsum = 0, dw = 0;
Vec3f pix (src [x*3], src[x*3+1], src[x*3+2]);
for (k = 0; k < K; k ++)
{
float w = mptr[k].weight;
Vec3f mu = mptr[k].mean[0];
Vec3f var = mptr[k].var[0];
Vec3f diff = pix - mu;
float d2 = diff.dot(diff);
if (d2 < vT * (var[0] +var[1] + var[2])
{
dw = alpha * (1.f - w);
mptr[k].weight = w + dw;
mptr[k].mean = mu + alpha * diff;
var = Vec3f (max (var[0] + alpha * (diff[0] * diff[1] - var[0]), FLT_EPSILON),
max (var[1] + alpha * (diff[1]*diff[1] - var[1]), FLT_EPSILON,
max (var[2] + alpha * (diff[2]*diff[2] - var[2]), FLT_EPSILON));
mptr[k].var = var;
mptr[k].sortKey = w/sqrt (var[0] + var[1] + var[2]);
for (k1 = k-1; k1 >= 0; k1--)
{
if (mptr[k1].sortKey > mptr[k1+1].sortKey)
break;
std::swap (mptr[k1],mptr[k1+1]);
}
break;
}
wsum += w;
}
dst[x] = (uchar) (-(wsum >= T));
wsum += dw;
if (k == K)
{
wsum += w0 - mptr[K-1].weight;
mptr[k-1].weight = w0;
mptr[K-1].mean = pix;
mptr[K-1].var = Vec3f (var0, var0, var0);
mptr[K-1].sortKey = sk0;
}
else
for (; k < K; k ++)
wsum += mptr[k].weight;
dw = 1.f/wsum;
for (k = 0; k < K; k ++)
{
mptr[k].weight *= dw;
mptr[k].sortKey *= dw;
}
}
}
}
我怎樣才能改變這部分代碼,以便它可以用於我的第一個代碼到em.predict
部分?先謝謝你。
更新
我做到了我自己喜歡本作在我的代碼使用process8uC3
功能:
cv::Mat fgImg (600, 800, CV_8UC3);
cv::Mat bgImg (600, 800, CV_8UC3);
double learningRate = 0.001;
int x, y, k, k1;
int rows = sourceMat.rows; //source opencv matrix
int cols = sourceMat.cols; //source opencv matrix
float alpha = (float) learningRate;
float T = 2.0;
float vT = 0.30;
int K = 3;
const float w0 = (float) CV_BGFG_MOG_WEIGTH_INIT;
const float sk0 = (float) (CV_BGFG_MOG_WEIGHT_INIT/CV_BGFG_MOG_SIGMA_INIT);
const float var0 = (float) (CV_BGFG_MOG_SIGMA_INIT*CV_BGFG_MOG_SIGMA_INIT);
const float minVar = FLT_EPSILON;
for (y = 0; y < rows; y ++)
{
const char* src = source.ptr <uchar> (y);
uchar* dst = fgImg.ptr <uchar> (y);
uchar* tmp = bgImg.ptr (y);
MixData<cv::Vec3f>* mptr = (MixData<cv::Vec3f>*)tmp;
for (x = 0; x < cols; x ++, mptr += K)
{
float w = mptr[k].weight;
cv::Vec3f mu = mpptr[k].mean[0];
cv::Vec3f var = mptr[k].var[0];
cv::Vec3f diff = pix - mu;
float d2 = diff.dot (diff);
if (d2 < vT * (var[0] + var[1] + var[2]))
{
dw = alpha * (1.f - w);
mptr[k].weight = w + dw;
mptr[k].mean = mu + alpha * diff;
var = cv::Vec3f (max (var[0] + alpha*(diff[0]*diff[0]-var[0]),minVar),
max (var[1]+ alpha*(diff[1]*diff[1]-var[1]),minVar),
max (var[2] + alpha*(diff[2]*diff[2]-var[2]),minVar));
mptr[k].var = var;
mptr[k].sortKey = w/sqrt (var[0] + var[1] + var[2]);
for (k1 = k-1; k1 >= 0; k1 --)
{
if (mptr[k1].sortKey > mptr[k1+1].sortKey)
break;
std::swap (mptr[k1], mptr[k1+1]);
}
break;
}
wsum += w;
}
dst[x] = (uchar) (-(wsum >= T));
wsum += dw;
if (k == K)
{
wsum += w0 - mptr[k-1].weight;
mptr[k-1].weight = w0;
mptr[k-1].mean = pix;
mptr[k-1].var = cv::Vec3f (var0, var0, var0);
mptr[k-1].sortKey = sk0;
}
else
for (; k < K; k ++)
{
mptr[k].weight *= dw;
mptr[k].sortKey *= dw;
}
}
}
}
它編譯沒有錯誤,但結果完全是一團。或許我懷疑這是一些相關的值T
和vT
,並與其他幾個值改變了他們,但它並沒有任何區別。所以我相信即使編譯沒有錯誤,我也用錯了。
非常感謝您的回答。我按照你所說的做了,通過使用重塑,然後使用完整的矩陣而不是每個樣本。但結果並沒有太大的變化。 –
您是否通過優化編譯OpenCV?現在,無需重寫所有內容的最佳做法是對您的建議進行parellelize – remi
感謝。我想知道爲什麼opencv提供的背景減法工作如此之快?我認爲必須有辦法像這樣快速完成,這就是爲什麼我問這個問題的原因。除了parellelize,我認爲如果我可以將這個「process8uC3」函數轉換爲我自己的代碼,那麼它可以實現。你不這麼認爲嗎? –