2015-09-24 63 views
2

我有一個trie(也稱爲前綴樹)。給定一個前綴,我想獲得一個以前綴開頭的單詞列表。Javascript:在前綴樹中查找以給定前綴開頭的精確10個單詞

這是關於這個問題唯一的事情是,我只想要與給定prefix--不是所有的人開始的話。鑑於此,可以進行優化。

我的代碼如下我知道工作正常。特里結構中的每個節點都有一個children屬性和一個this_is_the_end_of_a_word屬性。例如,當您插入「hi」時,這是特里結果的樣子:

trie

問題:給定一個前綴,我想獲得一個以前綴開頭的單詞列表。

我對這個問題的處理方法是:低下前綴樹,跟隨prefix的字符,直到找到對應於prefix最後一個字符的節點。現在您應該在此節點上執行DFS,並跟蹤列表中具有this_is_the_end_of_a_word === true的節點。但是當你的列表長度等於10時,你應該停止搜索,並返回列表。

我認爲我的方法是合理的,但是我在實現時遇到了問題 - 特別是因爲我試圖使用遞歸DFS,所以我不確定如何通過「全局」列表遞歸調用。我知道我應該使用閉包,但我是JavaScript新手,我不確定如何去做。下面是我已經嘗試過的一個例子。

我的特里類(我知道這個代碼的工作,這只是讓你可以看到我是如何組織我的數據結構。)

var Trie = function() { 

    var that = Object.create(Trie.prototype); 
    that.children = {}; //mapping: next character -> child nodes 
    that.this_is_the_end_of_a_word = false; 

    that.insertWord = function(word) { 

     var current_node = that; 

     for (var i = 0; i < word.length; i++) { 
      var c = word[i] 
       //if character is not in the trie already, add it 
      if (!(c in current_node.children)) { 
       current_node.children[c] = Trie(); 
      } 
      //update current_node 
      current_node = current_node.children[c]; 
     }; 

     //after adding all the chars of the word, 
     //you are at the end of a word 
     current_node.this_is_the_end_of_a_word = true; 
    } 

    that.insertWords = function(words) { 
     for (var i = 0; i < words.length; i++) { 
      that.insertWord(words[i]); 
     } 
    } 

    that.contains = function(word) { 
     //start at the root 
     var current_node = that; 
     for (var i = 0; i < word.length; i++) { 
      var c = word[i]; 

      //if the word's character isn't a child of the current_node, 
      //the word isn't in the trie 
      if (!(c in current_node.children)) { 
       return false; 
      } 
      //move down the trie, update current_node 
      current_node = current_node.children[c]; 
     }; 
     return current_node.this_is_the_end_of_a_word; 
    } 

    Object.freeze(that); 
    return that; 
} 

我的第一個方法(有很多bug)

num_words_to_go = 10; 
//this global is bad practice; 
//I want to put this as the argument to a closure 
//so it's passed between recursive calls 

that.getWords = function(start_node, prefix) { 
    console.log(0); 
    var words = []; 

    //if start node is a word, add it 
    if (start_node.this_is_the_end_of_a_word) { 
     words.push(start_node); 
     num_words_to_go--; 
    } 

    if (num_words_to_go <= 0 || !start_node.children) { 
     return words; 
    } 

    return start_node.children.forEach(
           currentValue.getWords(
            currentValue, prefix + <character for this child>)); 

    /*I can't think of a nice way to write this without going through all of the children. 
    I know I don't need to, because I only need to find 10 words and get out. 
    This is why I was leaning towards the recursive DFS. 
    */ 

}

第二個辦法:我還發現,我一直在尋找一個Python的例子: http://v1v3kn.tumblr.com/post/18238156967/roll-your-own-autocomplete-solution-using-tries 我試着翻譯他的考試歡迎使用JavaScript,但仍有問題,all_suffixes

that.all_suffixes = function (prefix){ 
    results = []; 
    if (that.this_is_the_end_of_a_word) results.push(prefix); 
    if (!(that.children)) return results; 
    if (results.length > 2) return results; 
    var callback = function(currentValue, i, array){ 
     return currentValue.all_suffixes(prefix+array[i]); 
    } 
    arr = that.children.forEach(callback, that); 
     //[child.all_suffixes(prefix + char) for (char, child) in self.children.items()] 
    return concat(reduce(concat, arr), results);   
} 

that.autocomplete = function(prefix){ 
    current_node = that; 
    for(var i = 0; i < prefix.length; i++){ 
     var c = prefix[i]; 
     //if there is nothing in the trie with this prefix 
     if (!(c in current_node.children)){ 
      return []; 
     } 
     current_node = current_node.children[c]; 
    } 
    return list(current_node.all_suffixes(prefix)) 
} 
+0

請看看到http://stackoverflow.com/questions/32595151/how-to-retrieve-all-the-data-words-in-a-radix -trie-in-javascript –

+0

謝謝,我看了那篇文章,但是它查找了以前綴開頭的所有單詞。然而,這個問題的獨特之處在於我只需要以給定前綴開始的10個單詞 - 而不是全部。 – user2946797

+0

爲什麼Javascript與這個問題有關係嗎?看來你在問2個問題,如何編寫/改進算法來做到這一點,以及如何在JavaScript中做到這一點。您可能能夠在其他語言中找到類似的答案,哪一部分是您感到困惑的?解決問題的方法或如何以特定語言實現它的方法? – Tai

回答

0

基本上我把你的模型並應用新方法getWords(word[, count])Trie類。我改變了方法contains,因爲我也需要getWords中的功能。所以我創建了一個新方法getNode,它返回找到單詞或部分的節點。

方法getWords首先查找單詞(部分),然後遍歷數據結構。當找到一個單詞時,它會被推送到結果集。如果結果集長度大於或等於所需長度,則終止迭代(因此Array.prototype.some),並停止遞歸調用fork

that.getWords = function (word, count) { 

     function fork(n, w) { 

      function child(c) { 
       return fork(n.children[c], w + c); 
      } 

      n.isWord && words.push(w); 
      return words.length >= count || Object.keys(n.children).some(child); 
     } 

     var words = [], 
      current_node = that.getNode(word); 

     if (current_node) { 
      fork(current_node, word); 
      return words; 
     } 
    } 

附註:我改變this_is_the_end_of_a_wordisWord

輸入

  1. 創建的Trie一個新的實例。
  2. 插入一些單詞進行測試。

輸出

  1. 測試,如果線索包含'motor',返回false。
  2. 測試trie是否包含'te',返回false。
  3. 測試trie是否包含'ten',返回true。
  4. 獲取全部'ind'開頭的單詞(8個可用的,顯示8)。
  5. 獲取以'in'開頭的10個單詞,(16個可用,顯示10個)。
  6. 整個線索。

var Trie = function() { 
 

 
    var that = Object.create(Trie.prototype); 
 
    that.children = {}; //mapping: next character -> child nodes 
 
    that.isWord = false; 
 

 
    that.insertWord = function (word) { 
 
     var current_node = that; 
 
     for (var i = 0; i < word.length; i++) { 
 
      var c = word[i] 
 
      //if character is not in the trie already, add it 
 
      if (!(c in current_node.children)) { 
 
       current_node.children[c] = Trie(); 
 
      } 
 
      //update current_node 
 
      current_node = current_node.children[c]; 
 
     }; 
 

 
     //after adding all the chars of the word, 
 
     //you are at the end of a word 
 
     current_node.isWord = true; 
 
    } 
 

 
    that.insertWords = function (words) { 
 
     for (var i = 0; i < words.length; i++) { 
 
      that.insertWord(words[i]); 
 
     } 
 
    } 
 

 
    that.getNode = function (word) { 
 
     //start at the root 
 
     var current_node = that; 
 
     for (var i = 0; i < word.length; i++) { 
 
      var c = word[i]; 
 

 
      //if the word's character isn't a child of the current_node, 
 
      //the word isn't in the trie 
 
      if (!(c in current_node.children)) { 
 
       return; 
 
      } 
 
      //move down the trie, update current_node 
 
      current_node = current_node.children[c]; 
 
     }; 
 
     return current_node; 
 
    } 
 

 
    that.contains = function (word) { 
 
     var current_node = that.getNode(word); 
 
     if (current_node) { 
 
      return current_node.isWord; 
 
     } 
 
     return false; 
 
    } 
 

 
    that.getWords = function (word, count) { 
 

 
     function fork(n, w) { 
 

 
      function child(c) { 
 
       return fork(n.children[c], w + c); 
 
      } 
 

 
      n.isWord && words.push(w); 
 
      return words.length >= count || Object.keys(n.children).some(child); 
 
     } 
 

 
     var words = [], 
 
      current_node = that.getNode(word); 
 

 
     if (current_node) { 
 
      fork(current_node, word); 
 
      return words; 
 
     } 
 
    } 
 

 
    // freeze does lock the isWord property, which is not required here 
 
    //Object.freeze(that); 
 
    return that; 
 
} 
 

 
var trie = new Trie(); 
 
trie.insertWords([ 
 
    'car', 'cool', 'i', 'in', 'indeed', 'independence', 'india', 'indoor', 'induction', 
 
    'industrial', 'industry', 'indwell', 'inferior', 'informal', 'inhale', 'inn', 
 
    'inside', 'instance', 'intrepid', 'of', 'off', 'other', 'tea', 'ted', 'ten', 
 
    'to', 'zoo', 'zoom' 
 
]); 
 
document.write(trie.contains('motor') + '<br>'); // false 
 
document.write(trie.contains('te') + '<br>'); // false 
 
document.write(trie.contains('ten') + '<br>'); // true 
 
document.write('<pre>' + JSON.stringify(trie.getWords('ind'), 0, 4) + '</pre>'); 
 
document.write('<pre>' + JSON.stringify(trie.getWords('in', 10), 0, 4) + '</pre>'); 
 
document.write('<pre>' + JSON.stringify(trie, 0, 4) + '</pre>');