優化解析器函數的C程序

gcc (GCC) 4.6.3 20120306 (Red Hat 4.6.3-2) 
c89

你好，優化解析器函數的C程序

我想知道如果我可以再優化該代碼。由於這是在一個快速的事務服務器中，它每秒會有很多呼叫。所以解析器必須非常快速和優化。

我在想我是否有任何改進。

包含測試用例的完整代碼。函數g_get_dnis_user_part是我想要優化的。

我希望這是發佈到的正確論壇。

#include <stdio.h> 
#include <ctype.h> 
#include <string.h> 

#ifndef FALSE 
#define FALSE 0 
#endif 
#ifndef TRUE 
#define TRUE 1 
#endif 

static int g_get_dnis_user_part(const char *dnis, char *user_part, size_t size); 

int main(void) 
{ 
    /* Test cases */ 
    const char *dnis_test1 = "[email protected]"; 
    const char *dnis_test2 = "084637257310.1.8.34"; 
    const char *dnis_test3 = "[email protected]"; 
    const char *dnis_test4 = ""; 
    const char *dnis_test5 = "[email protected]"; 
    size_t passes = 0; 
    size_t failures = 0; 

#define MAX_ADDRESS_LEN 32 

    char user_part[MAX_ADDRESS_LEN]; 

    memset(user_part, 0, sizeof user_part); 
    if(g_get_dnis_user_part(dnis_test1, user_part, MAX_ADDRESS_LEN) == TRUE) { 
     printf("TEST 1 PASSED [ %s ] [ %s ]\n", dnis_test1, user_part); 
     passes++; 
    } 
    else { 
     printf("TEST 1 FAILED [ %s ] [ %s ]\n", dnis_test1, user_part); 
     failures++; 
    } 

    memset(user_part, 0, sizeof user_part); 
    if(g_get_dnis_user_part(dnis_test2, user_part, MAX_ADDRESS_LEN) == TRUE) { 
     printf("TEST 2 PASSED [ %s ] [ %s ]\n", dnis_test2, user_part); 
     passes++; 
    } 
    else { 
     printf("TEST 2 FAILED [ %s ] [ %s ]\n", dnis_test2, user_part); 
     failures++; 
    } 

    memset(user_part, 0, sizeof user_part); 
    if(g_get_dnis_user_part(dnis_test3, user_part, MAX_ADDRESS_LEN) == TRUE) { 
     printf("TEST 3 PASSED [ %s ] [ %s ]\n", dnis_test3, user_part); 
     passes++; 
    } 
    else { 
     printf("TEST 3 FAILED [ %s ] [ %s ]\n", dnis_test3, user_part); 
     failures++; 
    } 

    memset(user_part, 0, sizeof user_part); 
    if(g_get_dnis_user_part(dnis_test4, user_part, MAX_ADDRESS_LEN) == TRUE) { 
     printf("TEST 4 PASSED [ %s ] [ %s ]\n", dnis_test4, user_part); 
     passes++; 
    } 
    else { 
     printf("TEST 4 FAILED [ %s ] [ %s ]\n", dnis_test4, user_part); 
     failures++; 
    } 

    memset(user_part, 0, sizeof user_part); 
    if(g_get_dnis_user_part(dnis_test5, user_part, MAX_ADDRESS_LEN) == TRUE) { 
     printf("TEST 5 PASSED [ %s ] [ %s ]\n", dnis_test5, user_part); 
     passes++; 
    } 
    else { 
     printf("TEST 5 FAILED [ %s ] [ %s ]\n", dnis_test5, user_part); 
     failures++; 
    } 

    printf("ALL TEST COMPLETED PASSES [ %ld ] FAILURES [ %ld ]\n", passes, failures); 

    return 0; 
} 

/* Get the user part from the complete dnis number 
    [email protected] -> 0846372573 nul terminated */ 
static int g_get_dnis_user_part(const char *dnis, char *user_part, size_t size) 
{ 
    size_t i = 0; 
    int status = FALSE; 

    /* Make room for the nul terminator */ 
    if(size > 1) { 
     size--; 
    } 
    else { 
     return status; 
    } 

    for(i = 0; i < size; i++) { 
     /* Check for valid digit */ 
     if(isdigit(*dnis) != 0) { 
      user_part[i] = *dnis; 
     } 
     else { 
      if(*dnis == '@') { 
       /* We are at the end */ 
       status = TRUE; 
       break; 
      } 
      else { 
       /* Not a digit or @ - corrupted dnis string */ 
       status = FALSE; 
       break; 
      } 
     } 

     /* Next character */ 
     dnis++; 
    } 

    /* nul terminate the string */ 
    user_part[i++] = '\0'; 

    /* Status FALSE indicates that the @ was not found or possible corruption with dnis string */ 
    return status; 
}

非常感謝您的任何建議，

來源

2012-03-30 ant2009

聽起來像是更適合[CodeReview]（http://codereview.stackexchange.com）。 – 2012-03-30 06:26:35

'#define TRUE 0'讓我感到恐慌。 – 2012-03-30 06:28:21

你有沒有簡介，以確保你試圖優化的功能真的是瓶頸？ – RedX 2012-03-30 06:39:06

我想我會寫解析器更是這樣的：

static int g_get_dnis_user_part2(const char *dnis, char *user_part, size_t size) 
{ 
    if (size == 0) 
     return FALSE; 

    size_t i; 

    for (i=0; i<size-1 && isdigit(dnis[i]); i++) 
     user_part[i] = dnis[i]; 
    user_part[i] = '\0'; 
    return (dnis[i] == '@') ? TRUE : FALSE; 
}

如果你真的願意，你也可以調用改變isdigit喜歡的東西my_isdigit，你會實現這樣的事情：

int my_isdigit(int input) { 
    static char table[UCHAR_MAX+1]; 
    bool inited; 

    if (!inited) 
     for (int i='0'; i<'9'; i++) 
      table[i] = 1; 

    return table[input]; 
}

我試圖保持清潔衛生，而是要讓它一點點更快，做初始化明確（並分別），所以你不檢查是否它初始化的每一個字符。（但是體面的分支預測，這不會獲得太多）。

除此之外，正如其他人已經提到的那樣，我會更改TRUE和FALSE的定義 - 您使用的定義非常糟糕。通常情況下，FALSE = 0和TRUE = 1，並且沒有看到在哪些地方進行了改變，這些都讓你獲得了任何有用的東西。

來源

2012-03-30 06:49:00

不要過度優化。這是一個非常簡單的函數，它可以在足夠小的數據集上運行以適應緩存。有可能它的運行速度已經儘可能快（假設優化了編譯器標誌等）。但更重要的是，這只是你整體計劃的一小部分。不要花費所有的精力在彙編程序中重寫這些內容，並仔細閱讀x86體系結構手冊，以確保CPU管道保持完全滿或任何其他情況下確保擁有更多低成本的功能。首先配置文件，然後優化探查器說你太慢的地方。

來源

2012-03-30 06:26:09 bdonlan

更換

if(isdigit(*dnis) != 0)

與

if (*dns>='0' && *dns<='9')

，如果你只關心十進制數字和不操心locales

雖然不顯著，函數調用的開銷將被保存。（您需要確定這是否會對產生顯着影響）除此之外，我沒有看到要做的重大更改。

來源

2012-03-30 06:26:15

-1

也許你可以從展開你的循環中受益。這會不會太漂亮，但沿着這樣的線的東西應該工作（未經測試）：

#define CHECKDIGIT(d, user_part, status) \ 
do {\ 
    if(isdigit(*(d)) != 0) { \ 
     *(user_part)++ = *(d)++;\ 
    }       \ 
    else {      \ 
     if(*(d) == '@') {  \ 
      (status) = TRUE; \ 
      goto finish;  \ 
     }      \ 
     else {     \ 
      (status) = FALSE; \ 
      goto finish;  \ 
     }      \ 
    }       \ 
} while(0) 

static int g_get_dnis_user_part(const char *dnis, char *user_part, size_t size) 
{ 
    size_t i = 0; 
    int status = FALSE; 
    int chunks, rem; 

    /* Make room for the nul terminator */ 
    if(size > 1) { 
     size--; 
    } 
    else { 
     return status; 
    } 

    // Divide size in chunks of 8 
    chunks = size >> 3; 
    rem = size & 0x7; 

    for(i = 0; i < chunks; i++) { 
     /* Check for valid digit */ 
     CHECKDIGIT(dnis, user_part); 
     CHECKDIGIT(dnis, user_part); 
     CHECKDIGIT(dnis, user_part); 
     CHECKDIGIT(dnis, user_part); 
     CHECKDIGIT(dnis, user_part); 
     CHECKDIGIT(dnis, user_part); 
     CHECKDIGIT(dnis, user_part); 
     CHECKDIGIT(dnis, user_part); 
    } 
    for(i = 0; i < rem; i++) { 
     CHECKDIGIT(dnis, user_part); 
    } 

    finish: 
    /* nul terminate the string */ 
    user_part[i++] = '\0'; 

    /* Status FALSE indicates that the @ was not found or possible corruption with  dnis string */ 
    return status; 
}

來源

2012-03-30 07:48:37 Linus

-1不適當地使用宏（更不用說'goto's） – 2012-03-30 08:03:18

我基本上同意bdonlan，不overoptimize和衡量，如果你有一個疑問。我甚至會比這更進一步。如果我正確理解了你的算法，你所做的是檢查你的字符串的最初部分的十進制字符，然後檢查下列字符是否爲@。

strspn有沒有在您的處置檢查整個類的字符，只是使用它。
檢查下列字符'@'

就是這樣。

gcc有strspn作爲內置，我不認爲你可以做得更好，記住你的運行瓶頸是從內存中吸取所有的字符串。一旦他們在緩存中，你應該感覺不到什麼區別。

來源

2012-03-30 07:57:42

優化解析器函數的C程序

回答

相關問題