我寫了一段代碼,以解析作爲輸入給出的網站中的URL(我選擇網站:www.tunisie-web.org),並確定是否存在外部 我的PHP腳本:crawl.php的內容致命錯誤:無法重新聲明parse_external_url()
<?php
set_time_limit(10000);
include_once('../PHPCrawl_083/PHPCrawl_083/libs/PHPCrawler.class.php');
class MyCrawler extends PHPCrawler
{
function handleDocumentInfo(PHPCrawlerDocumentInfo $DocInfo)
{
// Just detect linebreak for output ("\n" in CLI-mode, otherwise "<br>").
if (PHP_SAPI == "cli") $lb = "\n";
else {
$lb = "<br/>";
function parse_external_url($url) {
echo "0"."<br/>";
// Abort if parameter URL is empty
if(empty($url)) {
echo "l'url est vide"."<br/>";
}
echo "1"."<br/>";
// Parse home URL and parameter URL
$link_url = parse_url($url);
$home_url = parse_url($_SERVER['HTTP_HOST']);
//$home_url = parse_url(home_url()); // Works for WordPress
// Decide on target
if($link_url['host'] == $home_url['host']) {
// Is an internal link
echo "<br/>";
echo "2"."<br/>";
}
else {
// Is an external link
// Print the URL and the HTTP-status-Code
echo "Page requested: ".$url." (".$url->http_status_code.")"."<br/>";
// Print the refering URL
echo "Referer-page: ".$url->referer_url."<br/>";
}
echo "3";
}
parse_external_url($DocInfo->url);
echo "<br/>";
flush();
}
}
}
$crawler = new MyCrawler();
$crawler->setURL("www.tunisie-web.org ");
$crawler->addReceiveContentType("#text/html#");
$crawler->addURLFilterRule("#\.(jpg|gif|png|pdf|jpeg|css|js)$# i");
$crawler->setWorkingDirectory("C:/Users/mayss/Documents/travailcrawl/");
$crawler->go();
?>
,但它告訴我下面
頁面的搜索結果要求: http://www.tunisie-web.org() Referer的頁面:和此錯誤 致命錯誤:無法在第23行的C:\ wamp \ www \ crawl \ crawl.php中重新聲明parse_external_url()(以前在C:\ wamp \ www \ crawl \ crawl.php:23中聲明)