我有一個Matlab MEX功能,這使得重複調用名爲calculate().
C函數我做的函數的兩個版本:大的差異取決於地方的內存分配
版本A:每次mex()
調用calculate()
,它只傳遞輸入參數,並且calculate()
所需的所有內存在calculate()
內分配並釋放 - 每次!
版本B:calculate()
所需的內存分配在mex()
的開頭,並且指針傳遞給calculate()
。內存僅在mex()
結束時釋放。換句話說,分配/釋放只對整個業務進行一次。
根據內存分配需要時間的觀念,我根據天真的假設創建了B版,這應該會提高速度。但它實際上增加了約5倍的執行時間!那裏發生了什麼?
下面是版本B--實際的代碼,你可以從那裏我本來的版本A.
/* MEX business*/
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]){
/* mexPrintf("Inside the mex function of CalcMz2.\n");
mexEvalString("drawnow;");*/
/*declare local variables*/
struct InputParameters voxelPars;
InputParameters *point2pars;
struct Zcontrast contrast;
struct Zcontrast* ptr2contrast;
ptr2contrast = &contrast;
mxArray *in, *out;
mxArray *temp;
double *output;
int nFields;
int i,j,k;
in = mxDuplicateArray(prhs[0]); /*'in' now is a copy of the input struct*/
nFields = mxGetNumberOfFields(prhs[0]);
if (nFields != 39) mexPrintf("Error: the number of fields in the input struct is incorrect.\n");
//associate outputs
out = plhs[0] = mxCreateDoubleScalar(0.0);
output = mxGetPr(out);
/*Passed variables memory allocation*/
double **pntrA;
pntrA = (double **)mxCalloc(18, sizeof(double*));
for (i=0; i <18; i++){
pntrA[i] = (double *) mxCalloc(18, sizeof(double));
}
double *pntrAinvB;
pntrAinvB = (double *) mxCalloc(18, sizeof(double));
/*used by MatrInv()*/
double **CopyOfMatrix;
CopyOfMatrix = (double**)mxCalloc(18, sizeof(double*));
for (i=0; i<18; i++){
CopyOfMatrix[i] = (double*)mxCalloc(18, sizeof(double));
}
double *vector;
double *col;
int *indx;
vector = (double *)mxCalloc(18, sizeof(double));
indx = (int *)mxCalloc(18, sizeof(int));
col = (double *)mxCalloc(18, sizeof(double));
/*used by CalculateY()*/
double *Aat;
Aat = (double*)mxCalloc(18*18, sizeof(double));
double *Aate;
Aate = (double*)mxCalloc(18*18, sizeof(double));
double *sum;
sum = (double*)mxCalloc(18, sizeof(double));
double *product;
product = (double*)mxCalloc(18, sizeof(double));
/*Reassign values passed from input struct*/
temp = mxGetFieldByNumber(prhs[0],0,0);
voxelPars.ampl = mxGetPr(temp);
temp = mxGetFieldByNumber(prhs[0],0,1);
voxelPars.phi = mxGetPr(temp);
temp = mxGetFieldByNumber(prhs[0],0,2);
voxelPars.count1 = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,3);
voxelPars.pw1 = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,4);
voxelPars.b1 = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,5);
voxelPars.puloffsetppm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,6);
voxelPars.pw1dc = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,7);
voxelPars.cf = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,8);
voxelPars.M0w = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,9);
voxelPars.T1a = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,10);
voxelPars.T2a = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,11);
voxelPars.offsetappm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,12);
voxelPars.bwfraction = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,13);
voxelPars.M0a = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,14);
voxelPars.M0bw = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,15);
voxelPars.T1bw = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,16);
voxelPars.T2bw = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,17);
voxelPars.offsetbwppm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,18);
voxelPars.exratebw = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,19);
voxelPars.M0b = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,20);
voxelPars.T1b = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,21);
voxelPars.T2b = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,22);
voxelPars.offsetbppm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,23);
voxelPars.exrateb = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,24);
voxelPars.M0c = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,25);
voxelPars.T1c = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,26);
voxelPars.T2c = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,27);
voxelPars.offsetcppm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,28);
voxelPars.exratec = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,29);
voxelPars.M0d = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,30);
voxelPars.T1d = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,31);
voxelPars.T2d = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,32);
voxelPars.offsetdppm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,33);
voxelPars.exrated = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,34);
voxelPars.M0e = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,35);
voxelPars.T1e = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,36);
voxelPars.T2e = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,37);
voxelPars.offseteppm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,38);
voxelPars.exratee = mxGetScalar(temp);
point2pars = &voxelPars;
/*Initialize contrast's values to zero*/
ptr2contrast->SimMza = ptr2contrast->SimMzb = ptr2contrast->contrastz = 1.0;
/* call calculate()*/
calculate(point2pars, ptr2contrast, pntrA, pntrAinvB, CopyOfMatrix,vector, col, indx, Aat, Aate, sum, product);
*output = ptr2contrast->contrastz;
//mexPrintf("The contrast has been calculated as :%f.\n", ptr2contrast->contrastz);
/*Free memory
mxFree(pntrA); mxFree(pntrAinvB);*/
mxFree(CopyOfMatrix); mxFree(Aat); mxFree(Aate);
mxFree(vector);mxFree(indx);mxFree(col);
mxFree(sum); mxFree(product);
}//end mex function
void calculate (struct InputParameters *voxelPars1, struct Zcontrast *contrast, double **pntrA,double *pntrAinvB, double **CopyOfMatrix, double *vector, double *col,int *indx, double *Aat, double *Aate, double *sum, double *product){
/*Local variables*/
int i, j, k, n1, m;
double pwms, pw1delay;
int npul;
double timeStepSize;
double W;
double Wa, Wbw, Wb, Wc, Wd, We;
double Cbw, Cb, Cc, Cd, Ce;
double M0a, M0bw, M0b, M0c, M0d, M0e; //declaring these as locals just to make life easier
double pa, pbw, pb, pc, pd, pe;
double Cabw, Cab, Cac, Cad, Cae;
double k1a, k1bw, k1b, k1c, k1d, k1e;
double k2a, k2bw, k2b, k2c, k2d, k2e;
// double **pntrA;
// pntrA = (double **)mxCalloc(18, sizeof(double*));
// for (i=0; i <18; i++){
// pntrA[i] = (double *) mxCalloc(18, sizeof(double));
// }
//
double AdinvB[18];
double AinvB[18];
for(i=0; i<18; i++){
AdinvB[i] = 0.0;
AinvB[i] = 0.0;
}
//
// /*Passed variables memory allocation*/
// double **CopyOfMatrix; /*used by MatriInv()*/
// CopyOfMatrix = (double**)mxCalloc(18, sizeof(double*));
// for (i=0; i<18; i++){
// CopyOfMatrix[i] = (double*)mxCalloc(18, sizeof(double));
// }
//
// double *vector;
// double *col;
// int *indx;
// vector = (double *)mxCalloc(18, sizeof(double));
// indx = (int *)mxCalloc(18, sizeof(int));
// col = (double *)mxCalloc(18, sizeof(double));
//
// /*used by CalculateY()*/
// double *Aat;
// Aat = (double*)mxCalloc(18*18, sizeof(double));
// /* for (i=0; i<18; i++){
// Aat[i] = (double*)mxCalloc(18, sizeof(double));
// }*/
//
// double *Aate;
// Aate = (double*)mxCalloc(18*18, sizeof(double));
// /* for (i=0; i<18; i++){
// Aate[i] = (double*)mxCalloc(18, sizeof(double));
// } */
//
// double *sum;
// sum = (double*)mxCalloc(18, sizeof(double));
// double *product;
// product = (double*)mxCalloc(18, sizeof(double));
double W1[voxelPars1->count1]; //Now THIS might require a pointer, since count1 is only passed in the argument struct. We'll see.
double W1x[voxelPars1->count1];
double W1y[voxelPars1->count1];
/* mexPrintf("Declared all local variables in calculate().\n");
mexEvalString("drawnow;");*/
/*Fill local variables with values from input struct voxelPars*/
內存分配....計算保留後
你認爲這裏的代碼不相關嗎? –
在這裏顯示你的工作,你的代碼可能有一些問題。 – krpra