2017-08-11 81 views
-1

我試圖在FPGA(Zynq ZC 702)中實現用於小波變換的C代碼,但代碼卡住了,這是因爲內存問題,所以我應該優化我的代碼,但我不知道如何。如何優化我的c代碼?

任何人都可以請給我一些想法如何做到這一點?

這是主代碼

#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 
#include <math.h> 
#include "wavemin.h" 
#include "waveaux.h" 
#include "waveaux.c" 
#include "wavemin.c" 

int main() { 
    printf("Hello World1 \n\r"); 
    wave_object obj; 
    wt_object wt; 
    float *inp, *out; 
    int N, i, J,k; 
    float temp[1280] = {}; 
    char *name = "db4"; 
    obj = wave_init(name); 

    printf("Hello World2 \n\r"); 
    N = 1280; 
    inp = (float*)malloc(sizeof(float) * N); 
    out = (float*)malloc(sizeof(float) * N); 

    //wmean = mean(temp, N); 
    for (i = 0; i < N; ++i) { 
     inp[i] = temp[i]; 

     printf("Hello World3 \n\r"); 
     //printf("%g \n", inp[i]); 
    } 

    J = 4; //Decomposition Levels 
    wt = wt_init(obj, "dwt", N, J); // Initialize the wavelet transform object 
    printf("Hello World4 \n\r"); 
    setDWTExtension(wt, "sym");  // Options are "per" and "sym". Symmetric is the default option 
    printf("Hello World5 \n\r"); 

    setWTConv(wt, "direct"); 
    printf("Hello World6 \n\r"); 
    dwt(wt, inp);  // Perform DWT 
    printf("Hello World7 \n\r"); 

    //getDWTAppx(wt, out, wt->length[0]); 
    // printf("Approximation Coefficients Level 1 \n"); 
    // for (i = 0; i < wt->length[0]; ++i) { 
    // printf("%g ", out[i]); 
    // } 
    // printf("\n\n"); 
    for (k = 1; k <= J; ++k) { 
     getDWTDetail(wt, out, wt->length[k], k); 
     printf("Detail Coefficients Level %d Length %d \n", 
       k, wt - length[k]); 
     for (i = 0; i < wt->length[k]; ++i) { 
      printf("%g ", out[i]); 
     } 
     printf("\n\n"); 
    } 
    wt_summary(wt);// Prints the full summary. 
    printf("Hello World8 \n\r"); 
    wave_free(obj); 
    wt_free(wt); 
    free(inp); 
    free(out); 

    return 0; 
} 

代碼的其他部分,其中有在main功能使用的功能:

#include "wavemin.h" 

wave_object wave_init(char *wname) { 
    wave_object obj = NULL; 
    int retval; 
    retval = 0; 

    if (wname != NULL) { 
     retval = filtlength(wname); 
    } 

    obj = (wave_object)malloc(sizeof(struct wave_set) + sizeof(float) * 4 * 
           retval); 

    obj->filtlength = retval; 
    obj->lpd_len = obj->hpd_len = obj->lpr_len = obj->hpr_len = obj->filtlength; 
    strcpy(obj->wname, wname); 
    if (wname != NULL) { 
     filtcoef(wname, obj->params, obj->params + retval, obj->params + 2 * 
       retval, obj->params + 3 * retval); 
    } 
    obj->lpd = &obj->params[0]; 
    obj->hpd = &obj->params[retval]; 
    obj->lpr = &obj->params[2 * retval]; 
    obj->hpr = &obj->params[3 * retval]; 

    return obj; 
} 

wt_object wt_init(wave_object wave, char *method, int siglength, int J) { 
    int size, i, MaxIter; 
    wt_object obj = NULL; 

    size = wave->filtlength; 

    MaxIter = wmaxiter(siglength, size); 

    if (!strcmp(method, "dwt") || !strcmp(method, "DWT")) { 
     obj = (wt_object)malloc(sizeof(struct wt_set) + sizeof(float) * 
           (siglength + 2 * J * (size + 1))); 
     obj->outlength = siglength + 2 * J * (size + 1); // Default 
     strcpy(obj->ext, "sym"); // Default 
    } 

    obj->wave = wave; 
    obj->siglength = siglength; 
    obj->J = J; 
    obj->MaxIter = MaxIter; 
    strcpy(obj->method, method); 

    if (siglength % 2 == 0) { 
     obj->even = 1; 
    } 
    else { 
     obj->even = 0; 
    } 

    strcpy(obj->cmethod, "direct"); // Default 
    obj->cfftset = 0; 
    obj->lenlength = J + 2; 
    obj->output = &obj->params[0]; 
    if (!strcmp(method, "dwt") || !strcmp(method, "DWT")) { 
     for (i = 0; i < siglength + 2 * J * (size + 1); ++i) { 
      obj->params[i] = 0.0; 
     } 
    } 
    //wave_summary(obj->wave); 

    return obj; 
} 


static void dwt_sym(wt_object wt, float *inp, int N, float *cA, int len_cA, 
        float *cD, int len_cD) { 
    int i, l, t, len_avg; 

    len_avg = wt->wave->lpd_len; 

    for (i = 0; i < len_cA; ++i) { 
     t = 2 * i + 1; 
     cA[i] = 0.0; 
     cD[i] = 0.0; 
     for (l = 0; l < len_avg; ++l) { 
      if ((t - l) >= 0 && (t - l) < N) { 
       cA[i] += wt->wave->lpd[l] * inp[t - l]; 
       cD[i] += wt->wave->hpd[l] * inp[t - l]; 
       printf("world1 \n\r"); 
      } 
      else if ((t - l) < 0) { 
       cA[i] += wt->wave->lpd[l] * inp[-t + l - 1]; 
       cD[i] += wt->wave->hpd[l] * inp[-t + l - 1]; 
       printf("world2 \n\r"); 
      } 
      else if ((t - l) >= N) { 
       cA[i] += wt->wave->lpd[l] * inp[2 * N - t + l - 1]; 
       cD[i] += wt->wave->hpd[l] * inp[2 * N - t + l - 1]; 

       printf("world3 \n\r"); 
      } 
     } 
    } 
} 

void dwt(wt_object wt, float *inp) { 
    int i, J, temp_len, iter, N, lp; 
    int len_cA; 
    float *orig, *orig2; 

    temp_len = wt->siglength; 
    J = wt->J; 
    wt->length[J + 1] = temp_len; 
    wt->outlength = 0; 
    wt->zpad = 0; 
    orig = (float*)malloc(sizeof(float) * temp_len); 
    orig2 = (float*)malloc(sizeof(float) * temp_len); 

    for (i = 0; i < wt->siglength; ++i) { 
     orig[i] = inp[i]; 
     printf("Hello1 \n\r"); 
    } 

    if (wt->zpad == 1) { 
     orig[temp_len - 1] = orig[temp_len - 2]; 
     printf("Hello2 \n\r"); 
    } 

    N = temp_len; 
    lp = wt->wave->lpd_len; 

    if (!strcmp(wt->ext, "sym")) { 
     //printf("\n YES %s \n", wt->ext); 
     i = J; 
     while (i > 0) { 
      N = N + lp - 2; 
      N = (int)ceil((float)N/2.0); 
      wt->length[i] = N; 
      wt->outlength += wt->length[i]; 
      i--; 
     } 
     wt->length[0] = wt->length[1]; 
     wt->outlength += wt->length[0]; 
     N = wt->outlength; 
     printf("Hello3 \n\r"); 

     for (iter = 0; iter < J; ++iter) { 
      len_cA = wt->length[J - iter]; 
      N -= len_cA; 
      dwt_sym(wt, orig, temp_len, orig2, len_cA, wt->params + N, len_cA); 
      temp_len = wt->length[J - iter]; 
      printf("Hello4 \n\r"); 

      if (iter == J - 1) { 
       for (i = 0; i < len_cA; ++i) { 
        wt->params[i] = orig2[i]; 
        printf("Hello5 \n\r"); 
       } 
      } else { 
       for (i = 0; i < len_cA; ++i) { 
        orig[i] = orig2[i]; 
        printf("Hello6 \n\r"); 
       } 
      } 
     } 
    } else { 
     printf("Signal extension can be either per or sym"); 
     exit(-1); 
    } 

    free(orig); 
    free(orig2); 
} 

void setDWTExtension(wt_object wt, char *extension) { 
    if (!strcmp(extension, "sym")) { 
     strcpy(wt->ext, "sym"); 
    } else { 
     printf("Signal extension can be either per or sym"); 
     exit(-1); 
    } 
} 

void setWTConv(wt_object wt, char *cmethod) { 
    if (!strcmp(cmethod, "direct")) { 
     strcpy(wt->cmethod, "direct"); 
    } 
} 

void getDWTDetail(wt_object wt, float *detail, int N, int level) { 
    /* 
     returns Detail coefficents at the jth level where j = 1,2,.., J 
     and Wavelet decomposition is stored as 
     [A(J) D(J) D(J-1) ..... D(1)] in wt->output vector 
     Use getDWTAppx() to get A(J) 
     Level 1 : Length of D(J), ie N, is stored in wt->length[1] 
     Level 2 :Length of D(J-1), ie N, is stored in wt->length[2] 
     .... 
     Level J : Length of D(1), ie N, is stored in wt->length[J] 
    */ 
    int i, iter, J; 
    J = wt->J; 

    if (level > J) { 
     printf("The decomposition only has %d levels", J); 
    } 

    iter = wt->length[0]; 

    for (i = 1; i < level; ++i) { 
     iter += wt->length[i]; 
    } 

    for (i = 0; i < N; ++i) { 
     detail[i] = wt->output[i + iter]; 
    } 
} 

void getDWTAppx(wt_object wt, float *appx, int N) { 
    /* 
     Wavelet decomposition is stored as 
     [A(J) D(J) D(J-1) ..... D(1)] in wt->output vector 

     Length of A(J) , N = wt->length[0] 
    */ 
    int i; 

    for (i = 0; i < N; ++i) { 
     appx[i] = wt->output[i]; 
    } 
} 

void wt_summary(wt_object wt) { 
    int i; 
    int J, t; 
    J = wt->J; 

    printf("Wavelet Coefficients are contained in vector : %s \n", "output"); 
    printf("\n"); 
    printf("Approximation Coefficients \n"); 
    printf("Level %d Access : output[%d] Length : %d \n", 
      1, 0, wt->length[0]); 
    printf("\n"); 
    printf("Detail Coefficients \n"); 
    t = wt->length[0]; 
    for (i = 0; i < J; ++i) { 
     printf("Level %d Access : output[%d] Length : %d \n", 
       i + 1, t, wt->length[i + 1]); 
     t += wt->length[i + 1]; 
    } 
    printf("\n"); 

} 
void wave_free(wave_object object) { 
    free(object); 
} 

void wt_free(wt_object object) { 
    free(object); 
} 

enter image description here

+3

請正確格式化您的代碼。 –

+0

也許這個問題更適合[code review](https://codereview.stackexchange.com/)? – xander

+0

如果您的代碼正在工作,而您只是在尋找優化提示,那麼您的位置不對。 StackOverflow用於解決代碼的特定問題。 「太慢」太廣泛。請檢查https://codereview.stackexchange.com/ – Yunnosch

回答

2

在您的代碼

  1. 務必檢查是否的malloc返回非NULL值

  2. 檢查棧和堆設置在連接文件中聲明巨大的局部變量,並做了很多mallocs的 - 我懷疑(學校名稱預兆)棧溢出,或者失敗的malloc。

它是裸機程序,還是在某種操作系統下運行它?

+1

代碼我會添加:停止鑄造'malloc()'的輸出,這是C++特有的,而不是C。 – perror

+0

但是它並沒有傷害,因爲他包含'stdlib',即使編譯器在C11之前它也不會導致程序崩潰。風格 –

+1

不僅關於樣式,它還可能掩蓋一些可以在編譯時檢測到的錯誤。例如,看到這個[SO問題](https://stackoverflow.com/questions/605845/do-i-cast-the (malloc的結果)(第一個答案的最後一點) – perror

0

只是爲了風格和簡潔的事,我會改寫這個:

if (siglength % 2 == 0) { 
     obj->even = 1; 
    } 
    else { 
     obj->even = 0; 
    } 

爲以下代碼:

obj->even = !(siglength % 2); 

,或者:

obj->even = (siglength % 2) ? 0 : 1; 

而且,我認爲這個功能還有優化的空間:

static void dwt_sym(wt_object wt, float *inp, int N, float *cA, int len_cA, 
        float *cD, int len_cD) { 
    int i, l, t, len_avg; 

    len_avg = wt->wave->lpd_len; 

    for (i = 0; i < len_cA; ++i) { 
     t = 2 * i + 1; 
     cA[i] = 0.0; 
     cD[i] = 0.0; 
     for (l = 0; l < len_avg; ++l) { 
      if ((t - l) >= 0 && (t - l) < N) { 
       cA[i] += wt->wave->lpd[l] * inp[t - l]; 
       cD[i] += wt->wave->hpd[l] * inp[t - l]; 
       printf("world1 \n\r"); 
      } 
      else if ((t - l) < 0) { 
       cA[i] += wt->wave->lpd[l] * inp[-t + l - 1]; 
       cD[i] += wt->wave->hpd[l] * inp[-t + l - 1]; 
       printf("world2 \n\r"); 
      } 
      else if ((t - l) >= N) { 
       cA[i] += wt->wave->lpd[l] * inp[2 * N - t + l - 1]; 
       cD[i] += wt->wave->hpd[l] * inp[2 * N - t + l - 1]; 

       printf("world3 \n\r"); 
      } 
     } 
    } 
} 

首先,你總是提到t - 1永不t本身,所以爲什麼不能有:

t = 2 * i; 

而且,我可以猜測,大量的運算,可以放置在內循環之外..如果你想優化,這裏有很多好的候選人。

關於優化的最後一句話!

您應該首先對您的軟件進行配置,然後在考慮優化之前查看您最花費的時間。如果不知道你的軟件真的在哪裏掙扎,你無法優化「在空中」。考慮使用gprof

PS:你永遠也不會使用作​​爲一個變量信l(ELL)......那是一種從數1(一個)關閉。考慮改變這也是,它可以改善閱讀。