2009-09-07 158 views
2

我遇到了問題curl_multi_*,我想創建一個類/函數,它接收1000個URL,並且一次處理所有這些URL 5,所以當URL完成下載時它會將現在可用的插槽分配給尚未處理的新URL。CURL問題(多)

我見過someimplementations curl_multi的,但他們都不讓我做我想做的,我相信這個解決方案是在curl_multi_select使用的地方,但該文件還不是很清楚,並且用戶注意到唐」幫助很大。

任何人都可以請我提供一些例子,我如何實現這樣的功能?

回答

6

以下是一種方法。該腳本一次可以獲取任意數量的url,並在每次完成時添加一個新的url(因此它總是獲取$ maxConcurrent頁面)。

$sites = array('http://example.com', 'http://google.com', 'http://stackoverflow.com'); 
$concurrent = 2; // Any number. 

$mc = new MultiCurl($sites, $concurrent); 
$mc->process(); 

echo '</pre>'; 

class MultiCurl 
{ 
    private $allToDo; 
    private $multiHandle; 
    private $maxConcurrent = 2; 
    private $currentIndex = 0; 
    private $info   = array(); 
    private $options  = array(CURLOPT_RETURNTRANSFER => true, 
            CURLOPT_FOLLOWLOCATION => true, 
            CURLOPT_MAXREDIRS  => 3, 
            CURLOPT_TIMEOUT  => 3); 

    public function __construct($todo, $concurrent) 
    { 
     $this->allToDo = $todo; 
     $this->maxConcurrent = $concurrent; 
     $this->multiHandle = curl_multi_init(); 
    } 

    public function process() 
    { 
     $running = 0; 
     do { 
      $this->_addHandles(min(array($this->maxConcurrent - $running, $this->_moreToDo()))); 
      while ($exec = curl_multi_exec($this->multiHandle, $running) === -1) { 
      } 
      curl_multi_select($this->multiHandle); 
      while ($multiInfo = curl_multi_info_read($this->multiHandle, $msgs)) { 
       $this->_showData($multiInfo); 
       curl_multi_remove_handle($this->multiHandle, $multiInfo['handle']); 
       curl_close($multiInfo['handle']); 
      } 
     } while ($running || $this->_moreTodo()); 
     return $this; 
    }  

    private function _addHandles($num) 
    { 
     while ($num-- > 0) { 
      $handle = curl_init($this->allToDo[$this->currentIndex]); 
      curl_setopt_array($handle, $this->options); 
      curl_multi_add_handle($this->multiHandle, $handle); 
      $this->info[$handle]['url'] = $this->allToDo[$this->currentIndex]; 
      $this->currentIndex++; 
     } 
    }   

    private function _moreToDo() 
    { 
     return count($this->allToDo) - $this->currentIndex; 
    } 

    private function _showData($multiInfo) 
    { 
     $this->info[$multiInfo['handle']]['multi'] = $multiInfo; 
     $this->info[$multiInfo['handle']]['curl'] = curl_getinfo($multiInfo['handle']); 
     //print_r($this->info[$multiInfo['handle']]); 
     $content = curl_multi_getcontent($multiInfo['handle']); 
     echo $this->info[$multiInfo['handle']]['url'] . ' - ' . strlen($content) . ' bytes<br />'; 
     //echo htmlspecialchars($content); 
    } 
} 
+1

因爲這個例子沒有使用curl_multi_select(),它會忙環像瘋了似的,因而採取100%的CPU,直到所有的傳輸完成... –

+1

@Daniel Stenberg發明的 - 感謝;你是對的。我已經用一個速度更快並且使用更少CPU時間的(精簡)類替換了該代碼。 – GZipp