2014-01-26 61 views
2

我想將網頁內容保存到來自多個網址的文件。來自多個網址的file_get_contents

對於開始我從陣列

$site = array( 
     'url' => 'http://onesite.com/index.php?c='.$row['code0'].'&o='.$row['code1'].'&y='.$row['code2'].'&a='.$row['cod3'].'&sid=', 'selector' => 'table.tabel tr' 
    ); 

網站的URL saveving文件,我有嘗試:

foreach($site as $n) { 
$referer = 'reffername'; 


$header[] = "Accept: text/xml,application/xml,application/json,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"; 
$header[] = "Cache-Control: max-age=0"; 
$header[] = "Connection: keep-alive"; 
$header[] = "Keep-Alive: 300"; 
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7"; 
$header[] = "Accept-Language: en-us,en;q=0.5"; 

$opts = array('http'=>array('method'=>"GET", 
          'header'=>implode('\r\n',$header)."\r\n". 
          "Referer: $referer\r\n", 
          'user_agent'=> "Mozilla/5.0 (X11; U; Linux i686; pl-PL; rv:1.9.0.2) Gecko/2008092313 Ubuntu/9.25 (jaunty) Firefox/3.8")); 
$context = stream_context_create($opts); 

$data = file_get_contents($site["url"], false, $context); 

$file = md5('$id'); 

file_put_contents($file, $data); 
$content = unserialize(file_get_contents($file)); 
} 
+0

這是更好,如果你使用curl_multi函數同時下載多個文件 – gskema

+0

@GytisŠk好吧,但我怎麼能從一個數組中做到這一點,並保存它? –

回答

7

基本捲曲多腳本:

// Your URL array that hold links to files 
$urls = array(); 

// cURL multi-handle 
$mh = curl_multi_init(); 

// This will hold cURLS requests for each file 
$requests = array(); 

$options = array(
    CURLOPT_FOLLOWLOCATION => true, 
    CURLOPT_AUTOREFERER => true, 
    CURLOPT_USERAGENT  => 'paste your user agent string here', 
    CURLOPT_HEADER   => false, 
    CURLOPT_SSL_VERIFYPEER => false, 
    CURLOPT_RETURNTRANSFER => true 
); 

//Corresponding filestream array for each file 
$fstreams = array(); 

$folder = 'content/'; 
if (!file_exists($folder)){ mkdir($folder, 0777, true); } 

foreach ($urls as $key => $url) 
{ 
    // Add initialized cURL object to array 
    $requests[$key] = curl_init($url); 

    // Set cURL object options 
    curl_setopt_array($requests[$key], $options); 

    // Extract filename from URl and create appropriate local path 
    $path  = parse_url($url, PHP_URL_PATH); 
    $filename = pathinfo($path, PATHINFO_FILENAME)).'-'.$key; // Or whatever you want 
    $filepath = $folder.$filename; 

    // Open a filestream for each file and assign it to corresponding cURL object 
    $fstreams[$key] = fopen($filepath, 'w'); 
    curl_setopt($requests[$key], CURLOPT_FILE, $fstreams[$key]); 

    // Add cURL object to multi-handle 
    curl_multi_add_handle($mh, $requests[$key]); 
} 

// Do while all request have been completed 
do { 
    curl_multi_exec($mh, $active); 
} while ($active > 0); 

// Collect all data here and clean up 
foreach ($requests as $key => $request) { 

    //$returned[$key] = curl_multi_getcontent($request); // Use this if you're not downloading into file, also remove CURLOPT_FILE option and fstreams array 
    curl_multi_remove_handle($mh, $request); //assuming we're being responsible about our resource management 
    curl_close($request);     //being responsible again. THIS MUST GO AFTER curl_multi_getcontent(); 
    fclose($fstreams[$key]); 
} 

curl_multi_close($mh); 
+0

謝謝,我現在就試試 –

+0

'''curl_multi_close($ mh);''' – jaggedsoft

+1

很棒!非常感謝'curl_multi_getcontent($ request)'部分;) –