下面是一個算法的C實現,用於計算預測數據中實際數據的差異。該算法來自一個名爲實用的基本程序從奧斯本/麥格勞 - 希爾受版權保護圖書1980
這裏是.h文件:
/*
* divergence.h
*
* Created on: Jan 13, 2011
* Author: Erik Oosterwal
*/
#ifndef DIVERGENCE_H_
#define DIVERGENCE_H_
typedef struct
{
int DataSize;
float TotalError;
float AbsError; //< Total Absolute Error
float SqError; //< Total Squared Error
float MeanError;
float MeanAbsError;
float MeanSqError;
float RMSError; //< Root Mean Square Error
}DIVERGENCE_ERROR_TYPE;
void Divergence__Error(int size, float expected[], float actual[], DIVERGENCE_ERROR_TYPE *error);
// Prefer to use abs() from "stdlib.h"
#ifndef ABS
#define ABS(x) ((x)>0) ? (x) : (0-(x)) //< Not safe!!! - Do not increment parameter inside ABS()!
#endif
#endif /* DIVERGENCE_H_ */
... .c文件:
/*
* divergence.c
*
* Created on: Jan 13, 2011
* Author: Erik Oosterwal
*/
#include "math.h"
#include "divergence.h"
/**
* @brief Compute divergence from expected values.
*
* @details Compute the raw errors, absolute errors, root mean square errors,
* etc. for a series of values.
*
* @param size - integer value defines the number of values to compare.
*/
void Divergence__Error(int size, float expected[], float actual[], DIVERGENCE_ERROR_TYPE *error)
{
double total_err = 0.0;
double abs_err = 0.0;
double abs_sqr_err = 0.0;
double temp = 0.0;
int index = 0;
for(index=0; index<size; index++)
{
temp = (double)(actual[index])-(double)(expected[index]);
total_err+=temp;
abs_err+=ABS(temp);
abs_sqr_err+=pow(ABS(temp),2);
}
temp = (double)size;
error->DataSize = (int)size;
error->TotalError = (float)total_err;
error->AbsError = (float)abs_err;
error->SqError = (float)abs_sqr_err;
error->MeanError = (float)(total_err/temp);
error->MeanAbsError = (float)(abs_err/temp);
error->MeanSqError = (float)(abs_sqr_err/temp);
error->RMSError = (float)(sqrt(abs_sqr_err/temp));
}
...和樣品的main()用於測試功能:
/*
* main.c
*
* Created on: Jan 13, 2011
* Author: Erik Oosterwal
*/
#include <stdio.h>
#include "divergence.h"
float vote[]={40.3, 22.5, 16.3, 10.5, 7.2, 3.2};
float poll[]={42.7, 21.4, 18.2, 6.0, 7.4, 4.3};
float actual[] ={74, 70, 58, 60, 65, 73, 70};
float predict[]={49, 62, 75, 82, 37, 58, 92};
int main(int argc, char *argv[])
{
DIVERGENCE_ERROR_TYPE stats;
Divergence__Error(6, poll, vote, &stats);
printf("%i\n%f\n%f\n%f\n%f\n%f\n%f\n%f\n\n\n",stats.DataSize,stats.TotalError,stats.AbsError,stats.SqError,stats.MeanError,stats.MeanAbsError,stats.MeanSqError,stats.RMSError);
Divergence__Error(7, predict, actual, &stats);
printf("%i\n%f\n%f\n%f\n%f\n%f\n%f\n%f\n\n\n",stats.DataSize,stats.TotalError,stats.AbsError,stats.SqError,stats.MeanError,stats.MeanAbsError,stats.MeanSqError,stats.RMSError);
return(0);
}
我不能保證這是最快的方法,並且該函數可以使用一些調整來使其對不同數據類型更友好,但它的工作原理和結果已根據本書提供的示例進行了驗證。
在C和C++中,某些問題的解決方案是相同的。這不是那些問題之一,那是哪一個? C還是C++? – 2010-10-21 10:49:24
@PigBen,我會用C – Raj 2010-10-21 10:51:10
這個問題是不準確的。最簡單的不相似度量是樣本之間的平均差異。沒有任何語境,人們不能說你的情況是否好。 – 2010-10-21 11:00:53