2012-09-27 101 views
1

任何人都可以給我提示如何開發後綴數組部分?我知道這個概念; LCP陣列設計,但我沒有得到如何在C中實現它?任何人都可以幫忙嗎?我知道後綴數組的用法,算法,因爲我已經閱讀了很多。我想要我必須對字符串的後綴進行排序的部分的實現提示。後綴數組實現

例如,如果字符串作爲 '香蕉',那麼:

數據結構應該是這樣的:($ - >助記符)

banana 
anana 
nana 
ana 
na 
a 
$ 

然後,保持它後,我需要對其進行排序,這意味着最低的子字符串應該位於最高點。那麼該怎麼做?字符串可以是很大的長度。如何做這件事?你能給我提示或鏈接嗎?我已經嘗試過,現在從你的幫助中思考。

+1

考慮提供您的嘗試解決代碼塊中的問題以及問題的上下文。 –

回答

0

這可能對你有幫助。

http://code.google.com/p/code-share/source/browse/trunk/cpp/algo/suffix_array/SuffixArray.h

#ifndef _SUFFIX_ARRAY_H 
#define _SUFFIX_ARRAY_H 

#include<algorithm> 
#include<cstring> 
#include <stdexcept> 

using namespace std; 
template<class T> 
struct comp_func 
{ 
    bool operator()(const T l,const T r) 
    { 
     return strcmp(l,r) < 0; 

    } 
}; 
template<class T =char> 
class SuffixArray 
{ 
    int len_; 
    T **data_; 

    public: 
    T *operator[](int i) 
    { 
     if(i<0 || i>len_) 
      throw std::out_of_range("Out of range error\n"); 
     return data_[i]; 
    } 
    SuffixArray(T *str):len_(strlen(str)),data_(new T*[len_]) 
    { 
     //len_ = strlen(str); 
     //data_= new T*[len]; 
     for(int i =0;i<len_;++i) 
     { 
      data_[i] = &str[i]; 
      cout << data_[i] << endl; 
     } 
     std::sort(&data_[0],&data_[len_],comp_func<T *>()); 
    } 
    void Print() 
    { 
     for(int i =0;i<len_;++i) 
     { 
      cout << data_[i] << endl; 
     } 

    } 
}; 


#endif 
+2

歡迎來到Stack Overflow。請注意,該問題標記爲C,但您的答案嚴格爲C++。你至少應該注意你的答案是C++而不是C。但是,通常你應該用C代碼來回答C語言問題;其他人可能不會容忍這個錯誤,即使你在這裏是一個新手。請花時間閱讀[FAQ]。歡迎來到SO,再次。 –

1

你可能想看看這個article

在文章的結尾,你會發現這個實現後綴樹:

注意:下面的代碼是C++,但是如果用C代替新的[]和delete []運算符,就像堆分配一樣,您可以非常輕鬆地重用它。

inline bool leq(int a1, int a2, int b1, int b2) { // lexic. order for pairs 
    return(a1 < b1 || a1 == b1 && a2 <= b2); 
}             // and triples 
inline bool leq(int a1, int a2, int a3, int b1, int b2, int b3) { 
    return(a1 < b1 || a1 == b1 && leq(a2,a3, b2,b3)); 
} 
// stably sort a[0..n-1] to b[0..n-1] with keys in 0..K from r 
static void radixPass(int* a, int* b, int* r, int n, int K) 
{ // count occurrences 
    int* c = new int[K + 1];       // counter array 
    for (int i = 0; i <= K; i++) c[i] = 0;   // reset counters 
    for (int i = 0; i < n; i++) c[r[a[i]]]++; // count occurences 
    for (int i = 0, sum = 0; i <= K; i++) { // exclusive prefix sums 
    int t = c[i]; c[i] = sum; sum += t; 
    } 
    for (int i = 0; i < n; i++) b[c[r[a[i]]]++] = a[i];  // sort 
    delete [] c; 
} 

// find the suffix array SA of s[0..n-1] in {1..K}^n 
// require s[n]=s[n+1]=s[n+2]=0, n>=2 
void suffixArray(int* s, int* SA, int n, int K) { 
    int n0=(n+2)/3, n1=(n+1)/3, n2=n/3, n02=n0+n2; 
    int* s12 = new int[n02 + 3]; s12[n02]= s12[n02+1]= s12[n02+2]=0; 
    int* SA12 = new int[n02 + 3]; SA12[n02]=SA12[n02+1]=SA12[n02+2]=0; 
    int* s0 = new int[n0]; 
    int* SA0 = new int[n0]; 

    // generate positions of mod 1 and mod 2 suffixes 
    // the "+(n0-n1)" adds a dummy mod 1 suffix if n%3 == 1 
    for (int i=0, j=0; i < n+(n0-n1); i++) if (i%3 != 0) s12[j++] = i; 

    // lsb radix sort the mod 1 and mod 2 triples 
    radixPass(s12 , SA12, s+2, n02, K); 
    radixPass(SA12, s12 , s+1, n02, K); 
    radixPass(s12 , SA12, s , n02, K); 

    // find lexicographic names of triples 
    int name = 0, c0 = -1, c1 = -1, c2 = -1; 
    for (int i = 0; i < n02; i++) { 
    if (s[SA12[i]] != c0 || s[SA12[i]+1] != c1 || s[SA12[i]+2] != c2) { 
     name++; c0 = s[SA12[i]]; c1 = s[SA12[i]+1]; c2 = s[SA12[i]+2]; 
    } 
    if (SA12[i] % 3 == 1) { s12[SA12[i]/3]  = name; } // left half 
    else     { s12[SA12[i]/3 + n0] = name; } // right half 
    } 

    // recurse if names are not yet unique 
    if (name < n02) { 
    suffixArray(s12, SA12, n02, name); 
    // store unique names in s12 using the suffix array 
    for (int i = 0; i < n02; i++) s12[SA12[i]] = i + 1; 
    } else // generate the suffix array of s12 directly 
    for (int i = 0; i < n02; i++) SA12[s12[i] - 1] = i; 

    // stably sort the mod 0 suffixes from SA12 by their first character 
    for (int i=0, j=0; i < n02; i++) if (SA12[i] < n0) s0[j++] = 3*SA12[i]; 
    radixPass(s0, SA0, s, n0, K); 

    // merge sorted SA0 suffixes and sorted SA12 suffixes 
    for (int p=0, t=n0-n1, k=0; k < n; k++) { 
#define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2) 
    int i = GetI(); // pos of current offset 12 suffix 
    int j = SA0[p]; // pos of current offset 0 suffix 
    if (SA12[t] < n0 ? 
     leq(s[i],  s12[SA12[t] + n0], s[j],  s12[j/3]) : 
     leq(s[i],s[i+1],s12[SA12[t]-n0+1], s[j],s[j+1],s12[j/3+n0])) 
    { // suffix from SA12 is smaller 
     SA[k] = i; t++; 
     if (t == n02) { // done --- only SA0 suffixes left 
     for (k++; p < n0; p++, k++) SA[k] = SA0[p]; 
     } 
    } else { 
     SA[k] = j; p++; 
     if (p == n0) { // done --- only SA12 suffixes left 
     for (k++; t < n02; t++, k++) SA[k] = GetI(); 
     } 
    } 
    } 
    delete [] s12; delete [] SA12; delete [] SA0; delete [] s0; 
}