2011-10-07 75 views
3

給定兩個字符串,S1 S2 &。給予評分方案,其中缺口罰分,不匹配分數和比賽分數。耦合使用DP

找到S1具有與S2最佳匹配。

我的想法是列出所有可能的S1,然後通過一個與S2匹配一個。用蠻力列出所有可能的S1。然後使用dp將每個可能的S1與S2匹配。

是否有任何更快的方式做到這一點?或建議任何參考?

+0

什麼是「最佳匹配」? 'a',...,'f'可以重複用於多個'x'嗎? – aioobe

+1

問題不明確,你是指最長的公共子序列(即非連續的)? –

+0

沒有定義「匹配程度」功能,你所能做的就是嘗試每一種可能性,看看哪個得分最高。 –

回答

0

使用維基百科和思考的一點點人能編寫了這樣的事情:

#include <stdio.h> 
#include <string.h> 
#include <stdlib.h> 

#ifndef MAX 
#define MAX(a,b) ((a)>(b)?(a):(b)) 
#endif 

#define MATCH_SCORE  2 
#define MISMATCH_SCORE -1 
#define GAP_SCORE  0 

// Calculates the match score recursively. 
long MatchScore(const char* p/* in: may contain 'x' */, 
       const char* s/* in: doesn't contain 'x' */) 
{ 
    long res; 

    if (*p && *s) 
    { 
    if ((*p == *s) || 
     ((*p == 'x') && (*s >= 'a') && (*s <= 'f'))) 
    { 
     res = MatchScore(p + 1, s + 1) + MATCH_SCORE; 
    } 
    else 
    { 
     long s1 = MatchScore(p + 1, s + 1) + MISMATCH_SCORE; 
     long s2 = MatchScore(p, s + 1) + GAP_SCORE; 
     long s3 = MatchScore(p + 1, s) + GAP_SCORE; 
     res = MAX(s1, MAX(s2, s3)); 
    } 
    } 
    else 
    { 
    res = GAP_SCORE * (long)(strlen(p) + strlen(s)); 
    } 

    return res; 
} 

// Calculates the best matching string and the match score 
// using dynamic programming, the score must be the same 
// as returned by MatchScore(). 
void FindBestMatch(const char* p/* in: may contain 'x' */, 
        const char* s/* in: doesn't contain 'x' */, 
        long* score/* out: match score */, 
        char** match/* out: best matching string */) 
{ 
    size_t lp = strlen(p) + 1; 
    size_t ls = strlen(s) + 1; 
    size_t i, j; 
    long* table = (long*)malloc(lp * ls * sizeof(long)); 

    for (i = 0; i < lp; i++) 
    table[0 * lp + i] = GAP_SCORE * i; 

    for (j = 0; j < ls; j++) 
    table[j * lp + 0] = GAP_SCORE * j; 

    for (j = 1; j < ls; j++) 
    { 
    for (i = 1; i < lp; i++) 
    { 
     if ((p[i-1] == s[j-1]) || 
      ((p[i-1] == 'x') && (s[j-1] >= 'a') && (s[j-1] <= 'f'))) 
     { 
     table[j * lp + i] = table[(j-1) * lp + (i-1)] + MATCH_SCORE; 
     } 
     else 
     { 
     table[j * lp + i] = 
      MAX(table[(j-1) * lp + (i-1)] + MISMATCH_SCORE, 
       MAX(table[(j-1) * lp + i] + GAP_SCORE, 
        table[j * lp + (i-1)] + GAP_SCORE)); 
     } 
    } 
    } 

    *score = table[lp * ls - 1]; 

    // Now, trace back the score table and construct the best matching string 

    *match = (char*)malloc(lp); 
    (*match)[lp - 1] = '\0'; 

    for (j = ls, i = lp; j || i;) 
    { 
    if ((p[i-1] == s[j-1]) || 
     ((p[i-1] == 'x') && (s[j-1] >= 'a') && (s[j-1] <= 'f'))) 
    { 
     (*match)[i-1] = s[j-1]; 
     j--; 
     i--; 
    } 
    else 
    { 
     if (table[(j-1) * lp + i] > table[j * lp + (i-1)]) 
     { 
     j--; 
     } 
     else 
     { 
     (*match)[i-1] = p[i-1]; 
     i--; 
     } 
    } 
    } 

    free(table); 
} 

int main(void) 
{ 
    const char* pattern = "acdxdcxecxf"; 
    const char* str = "abdfdaaed"; 
    long score; 
    char* match; 
    char* match2; 

    printf("pattern=\"%s\" str=\"%s\"\n", pattern, str); 
    FindBestMatch(pattern, str, &score, &match); 
    printf("score=%ld (recursive)\n", MatchScore(pattern, str)); 
    printf("score=%ld best match=\"%s\"\n", score, match); 

    // Now repeat with the best match we've just found, 
    // the result must be the same 
    printf("\nRepeating with pattern=best match:\n\n"); 

    printf("pattern=\"%s\" str=\"%s\"\n", match, str); 
    FindBestMatch(match, str, &score, &match2); 
    printf("score=%ld (recursive)\n", MatchScore(match, str)); 
    printf("score=%ld best match=\"%s\"\n", score, match2); 

    free(match); 
    free(match2); 
    return 0; 
} 

輸出:

pattern="acdxdcxecxf" str="abdfdaaed" 
score=14 (recursive) 
score=14 best match="acdfdcaecdf" 

Repeating with pattern=best match: 

pattern="acdfdcaecdf" str="abdfdaaed" 
score=14 (recursive) 
score=14 best match="acdfdcaecdf" 

我不知道是否有任何是錯誤(比明顯缺乏其他的錯誤檢查)。