0
我想提取curl和DOMDocument的信息,我必須提取一個div層的所有鏈接。PHP CURL和DOMDocument錯誤
但沒有顯示任何東西,我不明白,因爲沒有捲曲,如果它的作品。
function media_uri_request($url, $method='', $vars='')
{
$ch = curl_init();
if ($method == 'post')
{
curl_setopt ($ch, CURLOPT_POST, 1);
curl_setopt ($ch, CURLOPT_POSTFIELDS, $vars);
}
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_HEADER, false);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt ($ch, CURLOPT_FAILONERROR, false);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt ($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt ($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7");
curl_setopt ($ch, CURLOPT_HTTPHEADER, array("REMOTE_ADDR: ".$_SERVER['REMOTE_ADDR'], "HTTP_X_FORWARDED_FOR: ".$_SERVER['REMOTE_ADDR']));
curl_setopt ($ch, CURLOPT_COOKIEJAR, 'tmp/cookie.txt');
curl_setopt ($ch, CURLOPT_COOKIEFILE, 'tmp/cookie.txt');
curl_setopt ($ch, CURLOPT_MAXREDIRS, 10);
curl_setopt ($ch, CURLOPT_TIMEOUT, 0);
curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, 0);
$buffer = curl_exec($ch);
curl_close ($ch);
if (isset($buffer) && filter_var($buffer, FILTER_SANITIZE_URL)) {
$urls = Array();
$dom = new DOMDocument();
@$dom->loadHTMLFile($buffer);
foreach($dom->getElementsByTagName('a') as $buffer) {
$urls[] = Array(
'name' => $buffer->nodeValue,
'href' => $buffer->getAttribute('href'),
'title' => $buffer->getAttribute('title'),
'rel' => $buffer->getAttribute('rel'),
'id' => $buffer->getAttribute('id'),
);
}
return $urls;
}
}
當前顯示我頁面上的所有鏈接,但我只需要一個div的id並獲取此鏈接。
<div id="something">
<a href="anylink">sometitle</a>
<a href="anylink">sometitle</a>
<a href="anylink">sometitle</a>
<a href="anylink">sometitle</a>
</div>
你能幫助我嗎?
謝謝,但我得到了一個錯誤**調用成員函數getElementsByTagName()在一個非對象在**我不知道爲什麼 –
這意味着在文檔中沒有id「東西」的元素。 –
我不知道,但沒有正確的工作,這將返回一個空白頁面和ID已存在於HTML中,當我只通過getElementsByTagName('A')即時通訊獲取頁面上的所有鏈接,**感謝您的時間** –