2010-03-07 34 views
5

如何計算兩個NSStrings之間的差異數。計算兩個NSStrings之間的差異數

例子:

NSString 1 = "this is a string" 

NSString 2 = "Tihs isa string" 

應該返回:4(一爲大寫字母 「T」,一個是 「我」 時, 「H」 和缺少空間)

回答

12

什麼你」重新找的是Levenshtein Distance

在Objective-C的實現:

------------------------------------------------------------------------ 

// 
// NSString-Levenshtein.h 
// 
// Created by Rick Bourner on Sat Aug 09 2003. 
// [email protected] 

@interface NSString(Levenshtein) 

// calculate the smallest distance between all words in stringA and stringB 
- (float) compareWithString: (NSString *) stringB; 

// calculate the distance between two string treating them each as a 
// single word 
- (float) compareWithWord: (NSString *) stringB; 

// return the minimum of a, b and c 
- (int) smallestOf: (int) a andOf: (int) b andOf: (int) c; 

@end 

-------------------------------------------------------------------- 

// 
// NSString-Levenshtein.m 
// 
// Created by Rick Bourner on Sat Aug 09 2003. 
// [email protected] 

#import "NSString-Levenshtein.h" 


@implementation NSString(Levenshtein) 

// calculate the mean distance between all words in stringA and stringB 
- (float) compareWithString: (NSString *) stringB 
{ 
    float averageSmallestDistance = 0.0; 
    float smallestDistance; 
    float distance; 

    NSMutableString * mStringA = [[NSMutableString alloc] initWithString: self]; 
    NSMutableString * mStringB = [[NSMutableString alloc] initWithString: stringB]; 


    // normalize 
    [mStringA replaceOccurrencesOfString:@"\n" 
           withString: @" " 
           options: NSLiteralSearch 
            range: NSMakeRange(0, [mStringA length])]; 

    [mStringB replaceOccurrencesOfString:@"\n" 
           withString: @" " 
           options: NSLiteralSearch 
            range: NSMakeRange(0, [mStringB length])]; 

    NSArray * arrayA = [mStringA componentsSeparatedByString: @" "]; 
    NSArray * arrayB = [mStringB componentsSeparatedByString: @" "]; 

    NSEnumerator * emuA = [arrayA objectEnumerator]; 
    NSEnumerator * emuB; 

    NSString * tokenA = NULL; 
    NSString * tokenB = NULL; 

    // O(n*m) but is there another way ?!? 
    while (tokenA = [emuA nextObject]) { 

     emuB = [arrayB objectEnumerator]; 
     smallestDistance = 99999999.0; 

     while (tokenB = [emuB nextObject]) 
      if ((distance = [tokenA compareWithWord: tokenB]) < smallestDistance) 
       smallestDistance = distance; 

     averageSmallestDistance += smallestDistance; 

    } 

    [mStringA release]; 
    [mStringB release]; 

    return averageSmallestDistance/[arrayA count]; 
} 


// calculate the distance between two string treating them eash as a 
// single word 
- (float) compareWithWord: (NSString *) stringB 
{ 
    // normalize strings 
    NSString * stringA = [NSString stringWithString: self]; 
    [stringA stringByTrimmingCharactersInSet: 
       [NSCharacterSet whitespaceAndNewlineCharacterSet]]; 
    [stringB stringByTrimmingCharactersInSet: 
       [NSCharacterSet whitespaceAndNewlineCharacterSet]]; 
    stringA = [stringA lowercaseString]; 
    stringB = [stringB lowercaseString]; 


    // Step 1 
    int k, i, j, cost, * d, distance; 

    int n = [stringA length]; 
    int m = [stringB length]; 

    if(n++ != 0 && m++ != 0) { 

     d = malloc(sizeof(int) * m * n); 

     // Step 2 
     for(k = 0; k < n; k++) 
      d[k] = k; 

     for(k = 0; k < m; k++) 
      d[ k * n ] = k; 

     // Step 3 and 4 
     for(i = 1; i < n; i++) 
      for(j = 1; j < m; j++) { 

       // Step 5 
       if([stringA characterAtIndex: i-1] == 
         [stringB characterAtIndex: j-1]) 
        cost = 0; 
       else 
        cost = 1; 

       // Step 6 
       d[ j * n + i ] = [self smallestOf: d [ (j - 1) * n + i ] + 1 
              andOf: d[ j * n + i - 1 ] + 1 
              andOf: d[ (j - 1) * n + i -1 ] + cost ]; 
      } 

     distance = d[ n * m - 1 ]; 

     free(d); 

     return distance; 
    } 
    return 0.0; 
} 


// return the minimum of a, b and c 
- (int) smallestOf: (int) a andOf: (int) b andOf: (int) c 
{ 
    int min = a; 
    if (b < min) 
     min = b; 

    if(c < min) 
     min = c; 

    return min; 
} 

@end 

上述來源作者:裏克Bourner,http://www.merriampark.com/ldobjc.htm

+0

太棒了!那麼,我究竟如何才能使其發揮作用呢? – 2010-03-07 19:35:33

+0

我沒有最微不足道的想法!我發佈了Objective-C代碼,因爲(1)我認爲這是你工作的語言,(2)代碼來自「merriampark.com」,使它成爲可靠的源代碼。就我個人而言,我沒有使用iPhone應用程序編寫的語言的經驗。但算法和僞代碼應該足夠讓您暫時工作,對吧? – 2010-03-07 19:38:56

+3

這是NSString的一個類別,所以如果你把它放在適當的頭文件和實現文件中,然後包含頭文件,你應該可以做[string1 compareWithString:string1]。 – 2010-03-07 20:29:20

相關問題