選擇並分析一個數組中的點的窗口

我有一個函數對數組中包含的一組點進行簡單的迴歸分析。我有一個數組（pval），其中包含我想要執行迴歸分析的所有數據。這就是我想要實現這個的方法。

我得到了數組中前7個元素的平均值。這就是我在程序中所謂的'ref_avg'。
我想對數組中的每五個元素執行一次迴歸分析，將該數組的第一個元素作爲'ref_avg'。在迴歸分析的每一步中，我將在數組中得6分。

例如對於第一步，下面計算的ref_avg是70.78。因此，在簡單迴歸的第一步驟將包含這些點

第一= {70.78,76.26,69.17,68.68,71.49,73.08}，

第二步將包含ref_avg作爲第一元件和其它元件從第二元件原始數組中開始

第二= {70.78,69.17,68.68,71.49,73.08,72.99}，

第三= {70.78,68.68,71.49,73.08,72.99,70.36}，

4th = {70.78,71.49,73.08,72.99,70.36,57.82}等等直到最後。
迴歸函數如下所示。

我不理解爲什麼「演算」數組的第一個3種元素對迴歸的第一步驟中值0.00，2種元素的第二步驟，在第三1個元件。也是迴歸函數的最後一步打印3次。

#include <stdio.h> 
    #include <stdlib.h>   
    #include <string.h> 

    int main() 
{ 

    float pval[]={76.26,69.17,68.68,71.49,73.08,72.99,70.36,57.82,58.98,69.71,70.43,77.53,80.77,70.30,70.5,70.79,75.58,76.88,80.20,77.69,80.80,70.5,85.27,75.25}; 


    int count,Nhour; 
    const int MAX_HOUR = 24; 
    float *calcul=NULL; 
    float *tab_time =NULL; 
    float ref_avg; 
    int size_hour=7; 
    float sum=0; 
    int length = Nhour+1; 
    float m; 
    float b; 
    calcul=(float*)calloc(MAX_HOUR,sizeof(calcul)); 
    if (calcul==NULL) 
    { 
     printf(" error in buffer\n"); 
     exit(EXIT_FAILURE); 
    } 

    tab_time= calloc(MAX_HOUR,sizeof(float)); 

     /* Get the average of the first seven elements */ 
      int i; 
    for (i=0;i<size_hour;i++) 
    { 
    sum += pval[i]; 
    } 
    ref_avg = sum/size_hour; 

      count=0; 
     /* perform the regression analysis on 5 hours increment */ 

     while(count<=MAX_HOUR) 
     { 
      ++count; 
      Nhour=5; 

      int pass = -(Nhour-1); 
      int i=0; 

      for(i=0;i<Nhour+1;i++) 
      { 
      if(count<MAX_HOUR) 
       { 

       calcul[0]=ref_avg; 
       calcul[i] =pval[count+pass]; 
       pass++; 
       } 

    printf("calc=%.2f\n",calcul[i]); // For debug only 
    tab_time[i]=i+1; 

       if(i==Nhour) 
      { 

      linear_regression(tab_time, calcul, length, &m, &b); 
      printf("Slope= %.2f\n", m); 

      } 
      } 
    } 

    free(calcul); 
    calcul=NULL; 
    free(tab_time); 
    tab_time=NULL;    
    return 0; 
    } 
    /* end of the main function */ 


    /* This function is used to calculate the linear 
    regression as it was called above in the main function. 
    It compiles and runs very well, was just included for the 
    compilation and execution of the main function above where I have a problem. */ 


    int linear_regression(const float *x, const float *y, const int n, float *beta1, float *beta0) 
    { 

      float sumx = 0, 
     sumy = 0, 
     sumx2 = 0, 
     sumxy = 0; 

int i; 
if (n <= 1) { 
    *beta1 = 0; 
    *beta0= 0; 
    printf("Not enough data for regression \n"); 
     } 
      else 
      { 
    float variance; 

    for (i = 0; i < n; i++) 
      { 
     sumx += x[i]; 
     sumy += y[i]; 

     sumx2 += (x[i] * x[i]); 

     sumxy += (x[i] * y[i]); 
    } 
    variance = (sumx2 - ((sumx * sumx)/n)); 
    if (variance != 0) { 
     *beta1 = (sumxy - ((sumx * sumy)/n))/variance; 
     *beta0 = (sumy - ((*beta1) * sumx))/n; 
    } 
      else 
       { 
     *beta1 = 0; 
     *beta0 = 0; 

     } 

     } 
      return 0; 
     }

來源

2010-08-24 chriscol

你在'而（計數{'）發佈的代碼，甚至不編譯（語法錯誤巨大的塊。嘗試減少你的程序中表現出的問題，並編譯後的代碼更小的程序。當你處於這種狀態時，請正確地縮進你的程序，這樣人們可能會想要閱讀它。 – Gilles 2010-08-24 21:12:34

@ Gilles：感謝您的評論。我認爲在粘貼代碼時我是一種失敗。我糾正了現在可以編譯的錯誤。 – chriscol 2010-08-25 08:51:34

我認爲這段代碼產生了理智的答案。這個問題中引用的參考平均值似乎是錯誤的。內存分配不是必需的。 MAX_HOUR的值是24，但數組中只有23個數據值。建立要回歸的數組的索引是假的，引用pval數組中的負索引（並因此導致錯誤的結果）。變量Nhour在初始化之前被引用;可變長度未正確設置。沒有好的診斷打印。

這裏的main()的主體基本上被重寫; linear_regression()上的編輯幾乎是最少的。代碼更一致地佈局，並且使用了空白區域以使其更易於閱讀。當不再有足夠的數據填充5個值時，此版本會終止迴歸 - 目前還不清楚預期終止條件是什麼。

#include <assert.h> 
#include <stdio.h> 
#include <stdlib.h>   
#include <string.h> 

void linear_regression(const float *x, const float *y, const int n, 
         float *beta1, float *beta0); 

int main(void) 
{ 
    float pval[]={ 
     76.26, 68.68, 71.49, 73.08, 72.99, 70.36, 57.82, 58.98, 
     69.71, 70.43, 77.53, 80.77, 70.30, 70.50, 70.79, 75.58, 
     76.88, 80.20, 77.69, 80.80, 70.50, 85.27, 75.25, 
     }; 
    const int Nhour = 5; 
    const int MAX_HOUR = sizeof(pval)/sizeof(pval[0]); 
    const int size_hour = 7; 
    float ref_avg; 
    float sum = 0.0; 
    float m; 
    float b; 
    float calc_y[6]; 
    float calc_x[6]; 

    /* Get the average of the first seven elements */ 
    for (int i = 0; i < size_hour; i++) 
     sum += pval[i]; 
    ref_avg = sum/size_hour; 
    printf("ref avg = %5.2f\n", ref_avg); // JL 

    /* perform the regression analysis on 5 hours increment */ 
    for (int pass = 0; pass <= MAX_HOUR - Nhour; pass++) // JL 
    { 
     calc_y[0] = ref_avg; 
     calc_x[0] = pass + 1; 
     printf("pass %d\ncalc_y[0] = %5.2f, calc_x[0] = %5.2f\n", 
       pass, calc_y[0], calc_x[0]); 
     for (int i = 1; i <= Nhour; i++) 
     { 
      int n = pass + i - 1; 
      calc_y[i] = pval[n]; 
      calc_x[i] = pass + i + 1; 
      printf("calc_y[%d] = %5.2f, calc_x[%d] = %5.2f, n = %2d\n", 
        i, calc_y[i], i, calc_x[i], n); 
     } 

     linear_regression(calc_x, calc_y, Nhour+1, &m, &b); 
     printf("Slope= %5.2f, intercept = %5.2f\n", m, b); 
    } 

    return 0; 
} 

void linear_regression(const float *x, const float *y, const int n, float *beta1, float *beta0) 
{ 
    float sumx1 = 0.0; 
    float sumy1 = 0.0; 
    float sumx2 = 0.0; 
    float sumxy = 0.0; 

    assert(n > 1); 

    for (int i = 0; i < n; i++) 
    { 
     sumx1 += x[i]; 
     sumy1 += y[i]; 
     sumx2 += (x[i] * x[i]); 
     sumxy += (x[i] * y[i]); 
    } 
    float variance = (sumx2 - ((sumx1 * sumx1)/n)); 
    if (variance != 0.0) 
    { 
     *beta1 = (sumxy - ((sumx1 * sumy1)/n))/variance; 
     *beta0 = (sumy1 - ((*beta1) * sumx1))/n; 
    } 
    else 
    { 
     *beta1 = 0.0; 
     *beta0 = 0.0; 
    } 
}

來源

2010-09-25 02:09:39

選擇並分析一個數組中的點的窗口

回答

相關問題