2010-07-02 76 views
1
<?php 
$url='http://edition.cnn.com/?fbid=4OofUbASN5k'; 

$var = fread_url($url);// function calling to get the page from curl 
$search = array('@<script[^>]*?>.*?</script>@si'); // Strip out javascript 
$var = preg_replace($search, "\n", html_entity_decode($var)); // Strip out javascript 

$linklabel = array(); 
$link = array(); 
$dom = new DOMDocument($var); 
@$dom->loadHTML($var); 
$xpath = new DOMXPath($dom);// Grab the DOM nodes 

foreach($xpath->find('a') as $element) { 
    array_push($linklabel, $element->innerText); 
    print $linklabel; 
    array_push($link, $element->href); 
    print $link.'<br>'; 
} 


function fread_url($url) { 
    if(function_exists("curl_init")) { 
     $ch = curl_init(); 
     $user_agent = "Mozilla/4.0 (compatible; MSIE 5.01; ". 
       "Windows NT 5.0)"; 
     $ch = curl_init(); 
     curl_setopt($ch, CURLOPT_USERAGENT, $user_agent); 
     curl_setopt($ch, CURLOPT_HTTPGET, 1); 
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 
     curl_setopt($ch, CURLOPT_FOLLOWLOCATION , 1); 
     curl_setopt($ch, CURLOPT_FOLLOWLOCATION , 1); 
     curl_setopt($ch, CURLOPT_URL, $url); 

     curl_setopt ($ch, CURLOPT_COOKIEJAR, 'cookie.txt'); 
     $html = curl_exec($ch); 
//print $html;//printing the web page. 
     curl_close($ch); 
    } 
    else { 
     $hfile = fopen($url,"r"); 
     if($hfile) { 
      while(!feof($hfile)) { 
       $html.=fgets($hfile,1024); 
      } 
     } 
    } 
    return $html; 
} 

我需要將鏈接和鏈接標籤分爲兩個單獨的數組。我跟着幾個論壇,並做了一個代碼,但是出現錯誤。我不知道在代碼中使用的查找功能錯誤的第14行,PHP捲曲DOM

+0

請修復您的代碼格式併發布錯誤。 – 2010-07-02 02:20:09

回答

0

幾個問題,主要是對不存在的函數的調用和對不存在的屬性的引用。正確版本:

<?php 
$var = <<<EOD 
<html> 
<a href="sdfgs">sdfd</a> 
</html> 
EOD; 

$dom = new DOMDocument(); 
@$dom->loadHTML($var); 
$xpath = new DOMXPath($dom); 

foreach($xpath->query('//a') as $element) { 
    $linklabel[] = $element->textContent; 
    $link[] = $element->getAttribute("href"); 
} 
var_dump($linklabel); 
var_dump($link); 
+0

thx for ur help,.... – 2010-07-02 16:09:00

+0

它對我來說工作正常 – 2010-07-02 16:24:11