2015-10-04 196 views
4

在一個C項目中,我寫了一個函數來返回正則表達式搜索中的第一個捕獲組。Posix正則表達式捕獲組不正確Posix正則表達式搜索

我期望達到的效果最好由output of this online parser來說明(請注意右側面板上的捕獲組輸出)。

我寫的功能和測試代碼是如下:

#include <stdio.h> 
#include <stdlib.h> 
#include <regex.h> 
#include <string.h> 
#include <assert.h> 

typedef int bool; 
#define true 1 
#define false 0 

/* 
* Obtains the first group that matches the regex pattern in input string 
* The output pointer is made to point to: 
* - in case the regexp compilation succeeded 
*  - the result in case there was a match found 
*  - or NULL in case there was no match 
* - in case the regexp compilation failed 
*  - the error from the compilation process 
* 
* If there was an error while compiling the input reg_exp, then this function 
* returns false, if not, it returns true. 
* 
* NOTE: The user is responsible for free-ing the memory for *output 
*/ 
bool get_first_match(const char* search_str, const char* reg_exp, char** output) 
{ 
    int res, len; 
    regex_t preg; 
    regmatch_t pmatch; 

    // Compile the input regexp 
    if((res = regcomp(&preg, reg_exp, REG_EXTENDED)) != 0) 
    { 
     char* error = (char*)malloc(1024*sizeof(char)); 
     regerror(res, &preg, error, 1024); 
     output = &error; 
     return false; 
    } 

    res = regexec(&preg, search_str, 1, &pmatch, REG_NOTBOL); 
    if(res == REG_NOMATCH) 
    { 
     return true; 
    } 

    len = pmatch.rm_eo - pmatch.rm_so; 
    char* result = (char*)malloc((len + 1) * sizeof(char)); 
    memcpy(result, search_str + pmatch.rm_so, len); 
    result[len] = 0; // null-terminate the result 
    *output = result; 
    regfree(&preg); 
    return true; 
} 

int main() 
{ 
    const char* search_str = "param1=blah&param2=blahblah&param3=blahetc&map=/usr/bin/blah.map"; 
    const char* regexp = "map=([^\\&]*)(&|$)"; 
    char* output; 
    bool status = get_first_match(search_str, regexp, &output); 
    if(status){ 
     if(output) 
      printf("Found match: %s\n", output); 
     else 
      printf("No match found."); 
    } 
    else{ 
     printf("Regex error: %s\n", output); 
    } 
    free(output); 

    return 0; 
} 

然而,output I get from the C code包含在它的字符串的map=一部分,即使我已經在我的第一個捕獲組明確排除它。

我能做些什麼來獲得沒有map=部分的捕獲組?爲什麼我得到的在線解析器的結果與我的C程序相比有所不同?

回答

2

這裏發生的事情是,你有模式:

const char* regexp = "map=([^\\&]*)(&|$)"; 

哪裏,結果(我們稱之爲數組result),將根據填充:

result = { 
    "map=/usr/bin/blah.map", 
    "/usr/bin/blah.map", 
    "" 
} 

現在,因爲您按如下方式撥打regexc

res = regexec(&preg, search_str, 1, &pmatch, REG_NOTBOL); 
// Notice the argument 1 here ---^ 

參數1表示最多一個結果將被存儲在pmatch陣列中。因此,您從上面獲得result[0]。既然你想第一個匹配組(而不是整個匹配的字符串),你必須:

  1. 定義pmatch是大小至少2的數組。
  2. 通過2作爲上述調用regexc的參數。

做上述後:

bool get_first_match(const char* search_str, const char* reg_exp, char** output) 
{ 
    int res, len; 
    regex_t preg; 
    regmatch_t pmatch[3]; 
    // SNIP 
    // SNIP 
    res = regexec(&preg, search_str, 2, &pmatch, REG_NOTBOL); 
    if(res == REG_NOMATCH) 
    { 
     return true; 
    } 
    // Notice changes in the lines below 
    // I am using pmatch[1] since that is equivalent to our 
    // result[1] from above 
    len = pmatch[1].rm_eo - pmatch[1].rm_so; 
    char* result = (char*) malloc((len + 1) * sizeof(char)); 
    memcpy(result, search_str + pmatch[1].rm_so, len); 
    result[len] = 0; // null-terminate the result 
    *output = result; 
    regfree(&preg); 
    return true; 
} 

和程序works as expected

+0

感謝您的精心和精心的解釋!真的有幫助。 – balajeerc