2010-07-20 99 views
3

除了好玩之外,我沒有任何理由今天實施了Trie。目前它支持add()和search(),remove()也應該被實現,但我認爲這很簡單。優化Trie實施

它功能完善,但填充數據需要一點點太多,我的口味。我使用這個列表作爲數據源:http://www.isc.ro/lists/twl06.zip(在SO的其他地方找到)。加載需要大約11秒。我最初的實施花了15秒左右,所以我已經給它一個很好的性能提升,但我仍然不滿意:)

我的問題是:還有什麼可以給我一個(大幅)的性能提升?我不受這種設計的約束,可以接受徹底的檢修。

class Trie 
{ 
    private $trie; 
    public function __construct(TrieNode $trie = null) 
    { 
     if($trie !== null) $this->trie = $trie; 
     else $this->trie = new TrieNode(); 
     $this->counter = 0; 
    } 
    public function add($value, $val = null) 
    { 
     $str = ''; 
     $trie_ref = $this->trie; 
     foreach(str_split($value) as $char) 
     { 
      $str .= $char; 
      $trie_ref = $trie_ref->addNode($str); 
     } 
     $trie_ref->value = $val; 
     return true; 
    } 
    public function search($value, $only_words = false) 
    { 
     if($value === '') return $this->trie; 
     $trie_ref = $this->trie; 
     $str = ''; 
     foreach(str_split($value) as $char) 
     { 
      $str .= $char; 
      if($trie_ref = $trie_ref->getNode($str)) 
      { 
       if($str === $value) return ($only_words ? $this->extractWords($trie_ref) : new self($trie_ref)); 
       continue; 
      } 
      return false; 
     } 
     return false; 
    } 
    public function extractWords(TrieNode $trie) 
    { 
     $res = array(); 
     foreach($trie->getChildren() as $child) 
     { 
      if($child->value !== null) $res[] = $child->value; 
      if($child->hasChildren()) $res = array_merge($res, $this->extractWords($child)); 
     } 
     return $res; 
    } 
} 
class TrieNode 
{ 
    public $value; 
    protected $children = array(); 
    public function addNode($index) 
    { 
     if(isset($this->children[$index])) return $this->children[$index]; 
     return $this->children[$index] = new self(); 
    } 
    public function getNode($index) 
    { 
     return (isset($this->children[$index]) ? $this->children[$index] : false); 
    } 
    public function getChildren() 
    { 
     return $this->children; 
    } 
    public function hasChildren() 
    { 
     return count($this->children)>0; 
    } 
} 
+0

您是否已經成型使用[XHProf的](http://pecl.php.net/package/xhprof)的代碼或[Xdebug的](HTTP:// PECL .php.net /包/ Xdebug的)? – Charles 2010-07-20 17:38:03

+0

我還沒有,很好的電話。我明天會! – 2010-07-20 18:55:59

回答

3

不知道PHP,但

在下面的方法:

public function add($value, $val = null) 
    { 
     $str = ''; 
     $trie_ref = $this->trie; 
     foreach(str_split($value) as $char) 
     { 
      $str .= $char; 
      $trie_ref = $trie_ref->addNode($str); 
     } 
     $trie_ref->value = $val; 
     return true; 
    } 
    public function search($value, $only_words = false) 
    { 
     if($value === '') return $this->trie; 
     $trie_ref = $this->trie; 
     $str = ''; 
     foreach(str_split($value) as $char) 
     { 
      $str .= $char; 
      if($trie_ref = $trie_ref->getNode($str)) 
      { 
       if($str === $value) return ($only_words ? $this->extractWords($trie_ref) : new self($trie_ref)); 
       continue; 
      } 
      return false; 
     } 
     return false; 
    } 

爲什麼你甚至不需要$str .= $char(我想是附加)?這本身會改變你的O(n)時間加法/搜索到Omega(n^2)(n的長度爲$value)而不是O(n)。

在線索中,通常在走繩時走線,即找到基於當前字符的下一個節點,而不是當前前綴。

+0

好點,絕對沒有必要。但會增加速度嗎?但內存肯定會更容易。無論如何,我會明天實施並在此發佈結果。 – 2010-07-20 18:59:37

+0

@ Dennis:是的,IMO。這實際上是嘗試可以如此快速的主要原因之一 – 2010-07-20 20:36:12

+0

@Dennis:我很好奇你要發佈的結果:-) – 2010-08-09 21:00:22

1

我想這個實現是針對鍵值類型的插入和查找?這是處理[英文]單詞的單詞。

class Trie { 


static function insert_word(Node $root, $text) 
{ 
    $v = $root; 
    foreach(str_split($text) as $char) { 
    $next = $v->children[$char]; 
     if ($next === null) 
     { 
      $v->children[$char] = $next = new Node(); 
     } 
     $v = $next; 
    } 

    $v->leaf = true; 
} 


static function get_words_sorted(Node $node, $text) 
{ 

    $res = array(); 
    for($ch = 0; $ch < 128; $ch++) { 
    $child = $node->children[chr($ch)]; 

     if ($child !== null) 
     { 
      $res = array_merge($res, Trie::get_words_sorted($child, $text . chr($ch))); 

     } 
    } 
    if ($node->leaf === true) 
    { 
     $res[] = $text; 
    } 
    return $res; 

} 

static function search(Node $root, $text) 
{ 
    $v = $root; 
    while($v !== null) 
    { 
     foreach(str_split($text) as $char) { 
      $next = $v->children[$char]; 
      if ($next === null) 
      { 
       return false; 
      } 
      else 
      { 
       $v = $next; 
      } 
     } 

     if($v->leaf === true) 
     { 
      return true; 
     } 
     else 
     { 
      return false; 
     } 

    } 
    return false; 

} 

} 


class Node { 

    public $children; 
    public $leaf; 


    function __construct() 
    { 
     $children = Array(); 

    } 
} 

示例用法

$root = new Node(); 
    $words = Array("an", "ant", "all", "allot", "alloy", "aloe", "are", "ate", "be"); 


    for ($i = 0; $i < sizeof($words); $i++) 
    { 

     Trie::insert_word($root, $words[$i]); 
    } 

    $search_words = array("alloy", "ant", "bee", "aren't", "allot"); 

    foreach($search_words as $word) 
    { 
     if(Trie::search($root, $word) === true) 
     { 
      echo $word . " IS in my dictionary<br/>"; 
     } 
     else 
     { 
      echo $word . " is NOT in my dictionary <br/>"; 
     } 
    }