2016-11-17 70 views
-1

給出一個字符串s,按照格式「aaa」將其編碼爲「3 [a]」。編碼字符串的長度應該最短。 例如: 「abbabb」 到 「2 A2 [B]」將字符串「aaa」編碼爲「3 [a]」

更新:假設串只包含小寫字母

更新:這裏是我的C++代碼,但它的速度慢。我知道其中一項改進是使用KMP來計算當前字符串是否由重複字符串組合。

// this function is used to check if a string is combined by repeating a substring. 
// Also Here can be replaced by doing KMP algorithm for whole string to improvement 

bool checkRepeating(string& s, int l, int r, int start, int end){ 
    if((end-start+1)%(r-l+1) != 0) 
     return false; 
    int len = r-l+1; 
    bool res = true; 
    for(int i=start; i<=end; i++){ 
     if(s[(i-start)%len+l] != s[i]){ 
      res = false; 
      break; 
     } 
    } 
    return res; 
} 

// this function is used to get the length of the current number 
int getLength(int l1, int l2){ 
    return (int)(log10(l2/l1+1)+1); 
} 

string shortestEncodeString(string s){ 
    int len = s.length(); 

    vector< vector<int> > res(len, vector<int>(len, 0)); 
    //Initial the matrix 
    for(int i=0; i<len; i++){ 
     for(int j=0; j<=i; j++){ 
      res[j][i] = i-j+1; 
     } 
    } 

    unordered_map<string, string> record; 

    for(int i=0; i<len; i++){ 
     for(int j=i; j>=0; j--){ 

      string temp = s.substr(j, i-j+1); 
/* if the current substring has showed before, then no need to compute again 
* Here is a example for this part: if the string is "abcabc". 
* if we see the second "abc", then no need to compute again, just use the 
* result from first "abc". 
**/ 
      if(record.find(temp) != record.end()){ 
       res[j][i] = record[temp].size(); 
       continue; 
      } 
      string ans = temp; 
      for(int k=j; k<i; k++){ 

       string str1 = s.substr(j, k-j+1); 
       string str2 = s.substr(k+1, i-k); 
       if(res[j][i] > res[j][k] + res[k+1][i]){ 
        res[j][i] = res[j][k]+res[k+1][i]; 
        ans = record[str1] + record[str2]; 
       } 

       if(checkRepeating(s, j, k, k+1, i) == true && res[j][i] > 2+getLength(k-j+1, i-k)+res[j][k]){ 
        res[j][i] = 2+getLength(k-j+1, i-k)+res[j][k]; 
        ans = to_string((i-j+1)/(k-j+1)) + '[' + record[str1] +']'; 
       } 
      } 
      record[temp] = ans; 
     } 

    } 

    return record[s]; 
} 
+0

我嘗試了貪婪溶液。但複雜性非常糟糕。嘗試使用長度爲1,2,3的索引0中的字符串...然後查找該子字符串不重複的索引,然後用該字符串的其餘部分調用該函數。也可以用len 1,2,3 ... – NHa

+0

做同樣的前綴字符串好吧,如果你想讓別人幫你排除故障,請將代碼編輯到問題中。 –

回答

0

對於問題陳述而言,我很少從頭開始,所以使用JavaScript對此進行了很快的嘗試,因爲它很容易演示。註釋在代碼中,但基本上存在連接相鄰元素,運行長度檢查,連接相鄰元素等的交替階段,直到只剩下一個元素 - 最終編碼值。

quick algorithm sketch

我希望這有助於。

function encode(str) { 
 
    var tmp = str.split(''); 
 
    var arr = []; 
 

 
    do { 
 
    if (tmp.length === arr.length) { 
 
     // Join adjacent elements 
 
     arr.length = 0; 
 
     for (var i = 0; i < tmp.length; i += 2) { 
 
     if (i < tmp.length - 1) { 
 
      arr.push(tmp[i] + tmp[i + 1]); 
 
     } else { 
 
      arr.push(tmp[i]); 
 
     } 
 
     } 
 
     tmp.length = 0; 
 
    } else { 
 
     // Swap arrays and clear tmp 
 
     arr = tmp.slice(); 
 
     tmp.length = 0; 
 
    } 
 

 
    // Build up the run-length strings 
 
    for (var i = 0; i < arr.length;) { 
 
     var runlength = runLength(arr, i); 
 
     if (runlength > 1) { 
 
     tmp.push(runlength + '[' + arr[i] + ']'); 
 
     } else { 
 
     tmp.push(arr[i]); 
 
     } 
 
     i += runlength; 
 
    } 
 
    console.log(tmp); 
 
    } while (tmp.length > 1); 
 
    return tmp.join(); 
 
} 
 

 
// Get the longest run length from a given index 
 
function runLength(arr, ind) { 
 
    var count = 1; 
 
    for (var i = ind; i < arr.length - 1; i++) { 
 
    if (arr[i + 1] === arr[ind]) { 
 
     count++; 
 
    } else { 
 
     break; 
 
    } 
 
    } 
 
    return count; 
 
}
<input id='inp' value='abbabb'> 
 
<button type="submit" onClick='javascript:document.getElementById("result").value=encode(document.getElementById("inp").value)'>Encode</button> 
 
<br> 
 
<input id='result' value='2[a2[b]]'>