3
給定兩個字符串,S1 S2 &。給予評分方案,其中缺口罰分,不匹配分數和比賽分數。耦合使用DP
找到S1具有與S2最佳匹配。
我的想法是列出所有可能的S1,然後通過一個與S2匹配一個。用蠻力列出所有可能的S1。然後使用dp將每個可能的S1與S2匹配。
是否有任何更快的方式做到這一點?或建議任何參考?
給定兩個字符串,S1 S2 &。給予評分方案,其中缺口罰分,不匹配分數和比賽分數。耦合使用DP
找到S1具有與S2最佳匹配。
我的想法是列出所有可能的S1,然後通過一個與S2匹配一個。用蠻力列出所有可能的S1。然後使用dp將每個可能的S1與S2匹配。
是否有任何更快的方式做到這一點?或建議任何參考?
使用維基百科和思考的一點點人能編寫了這樣的事情:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#ifndef MAX
#define MAX(a,b) ((a)>(b)?(a):(b))
#endif
#define MATCH_SCORE 2
#define MISMATCH_SCORE -1
#define GAP_SCORE 0
// Calculates the match score recursively.
long MatchScore(const char* p/* in: may contain 'x' */,
const char* s/* in: doesn't contain 'x' */)
{
long res;
if (*p && *s)
{
if ((*p == *s) ||
((*p == 'x') && (*s >= 'a') && (*s <= 'f')))
{
res = MatchScore(p + 1, s + 1) + MATCH_SCORE;
}
else
{
long s1 = MatchScore(p + 1, s + 1) + MISMATCH_SCORE;
long s2 = MatchScore(p, s + 1) + GAP_SCORE;
long s3 = MatchScore(p + 1, s) + GAP_SCORE;
res = MAX(s1, MAX(s2, s3));
}
}
else
{
res = GAP_SCORE * (long)(strlen(p) + strlen(s));
}
return res;
}
// Calculates the best matching string and the match score
// using dynamic programming, the score must be the same
// as returned by MatchScore().
void FindBestMatch(const char* p/* in: may contain 'x' */,
const char* s/* in: doesn't contain 'x' */,
long* score/* out: match score */,
char** match/* out: best matching string */)
{
size_t lp = strlen(p) + 1;
size_t ls = strlen(s) + 1;
size_t i, j;
long* table = (long*)malloc(lp * ls * sizeof(long));
for (i = 0; i < lp; i++)
table[0 * lp + i] = GAP_SCORE * i;
for (j = 0; j < ls; j++)
table[j * lp + 0] = GAP_SCORE * j;
for (j = 1; j < ls; j++)
{
for (i = 1; i < lp; i++)
{
if ((p[i-1] == s[j-1]) ||
((p[i-1] == 'x') && (s[j-1] >= 'a') && (s[j-1] <= 'f')))
{
table[j * lp + i] = table[(j-1) * lp + (i-1)] + MATCH_SCORE;
}
else
{
table[j * lp + i] =
MAX(table[(j-1) * lp + (i-1)] + MISMATCH_SCORE,
MAX(table[(j-1) * lp + i] + GAP_SCORE,
table[j * lp + (i-1)] + GAP_SCORE));
}
}
}
*score = table[lp * ls - 1];
// Now, trace back the score table and construct the best matching string
*match = (char*)malloc(lp);
(*match)[lp - 1] = '\0';
for (j = ls, i = lp; j || i;)
{
if ((p[i-1] == s[j-1]) ||
((p[i-1] == 'x') && (s[j-1] >= 'a') && (s[j-1] <= 'f')))
{
(*match)[i-1] = s[j-1];
j--;
i--;
}
else
{
if (table[(j-1) * lp + i] > table[j * lp + (i-1)])
{
j--;
}
else
{
(*match)[i-1] = p[i-1];
i--;
}
}
}
free(table);
}
int main(void)
{
const char* pattern = "acdxdcxecxf";
const char* str = "abdfdaaed";
long score;
char* match;
char* match2;
printf("pattern=\"%s\" str=\"%s\"\n", pattern, str);
FindBestMatch(pattern, str, &score, &match);
printf("score=%ld (recursive)\n", MatchScore(pattern, str));
printf("score=%ld best match=\"%s\"\n", score, match);
// Now repeat with the best match we've just found,
// the result must be the same
printf("\nRepeating with pattern=best match:\n\n");
printf("pattern=\"%s\" str=\"%s\"\n", match, str);
FindBestMatch(match, str, &score, &match2);
printf("score=%ld (recursive)\n", MatchScore(match, str));
printf("score=%ld best match=\"%s\"\n", score, match2);
free(match);
free(match2);
return 0;
}
輸出:
pattern="acdxdcxecxf" str="abdfdaaed"
score=14 (recursive)
score=14 best match="acdfdcaecdf"
Repeating with pattern=best match:
pattern="acdfdcaecdf" str="abdfdaaed"
score=14 (recursive)
score=14 best match="acdfdcaecdf"
我不知道是否有任何是錯誤(比明顯缺乏其他的錯誤檢查)。
什麼是「最佳匹配」? 'a',...,'f'可以重複用於多個'x'嗎? – aioobe
問題不明確,你是指最長的公共子序列(即非連續的)? –
沒有定義「匹配程度」功能,你所能做的就是嘗試每一種可能性,看看哪個得分最高。 –