我使用PHP腳本(使用捲曲)很慢,檢查是否:優化PHP捲曲基於鏈接檢查腳本 - 目前
- 在我的數據庫的鏈接都是正確的(即返回HTTP狀態200)
- 這些鏈接其實重定向和重定向到一個適當的/類似的網頁(使用頁面的內容)
這樣做的結果保存到一個日誌文件,並通過電子郵件發送給我作爲附件。
這一切都很好,而且工作起來很慢,但它很慢,因爲所有地獄和一半的時間都會超時,並且很快就會中止。值得注意的是,我有大約16,000個鏈接要檢查。
想知道如何更好地使這個運行更快,我做錯了什麼?下面
代碼:
function echoappend ($file,$tobewritten) {
fwrite($file,$tobewritten);
echo $tobewritten;
}
error_reporting(E_ALL);
ini_set('display_errors', '1');
$filename=date('YmdHis') . "linkcheck.htm";
echo $filename;
$file = fopen($filename,"w+");
try {
$conn = new PDO('mysql:host=localhost;dbname=databasename',$un,$pw);
$conn->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
echo '<b>connected to db</b><br /><br />';
$sitearray = array("medical.posterous","ebm.posterous","behavenet","guidance.nice","www.rch","emedicine","www.chw","www.rxlist","www.cks.nhs.uk");
foreach ($sitearray as $key => $value) {
$site=$value;
echoappend ($file, "<h1>" . $site . "</h1>");
$q="SELECT * FROM link WHERE url LIKE :site";
$stmt = $conn->prepare($q);
$stmt->execute(array(':site' => 'http://' . $site . '%'));
$result = $stmt->fetchAll();
$totallinks = 0;
$workinglinks = 0;
foreach($result as $row)
{
$ch = curl_init();
$originalurl = $row['url'];
curl_setopt($ch, CURLOPT_URL, $originalurl);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_NOBODY, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
$output = curl_exec($ch);
if ($output === FALSE) {
echo "cURL Error: " . curl_error($ch);
}
$urlinfo = curl_getinfo($ch);
if ($urlinfo['http_code'] == 200)
{
echoappend($file, $row['name'] . ": <b>working!</b><br />");
$workinglinks++;
}
else if ($urlinfo['http_code'] == 301 || 302)
{
$redirectch = curl_init();
curl_setopt($redirectch, CURLOPT_URL, $originalurl);
curl_setopt($redirectch, CURLOPT_HEADER, 1);
curl_setopt($redirectch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($redirectch, CURLOPT_NOBODY, false);
curl_setopt($redirectch, CURLOPT_FOLLOWLOCATION, true);
$redirectoutput = curl_exec($redirectch);
$doc = new DOMDocument();
@$doc->loadHTML($redirectoutput);
$nodes = $doc->getElementsByTagName('title');
$title = $nodes->item(0)->nodeValue;
echoappend ($file, $row['name'] . ": <b>redirect ... </b>" . $title . " ... ");
if (strpos(strtolower($title),strtolower($row['name']))===false) {
echoappend ($file, "FAIL<br />");
}
else {
$header = curl_getinfo($redirectch);
echoappend ($file, $header['url']);
echoappend ($file, "SUCCESS<br />");
}
curl_close($redirectch);
}
else
{
echoappend ($file, $row['name'] . ": <b>FAIL code</b>" . $urlinfo['http_code'] . "<br />");
}
curl_close($ch);
$totallinks++;
}
echoappend ($file, '<br />');
echoappend ($file, $site . ": " . $workinglinks . "/" . $totallinks . " links working. <br /><br />");
}
$conn = null;
echo '<br /><b>connection closed</b><br /><br />';
} catch(PDOException $e) {
echo 'ERROR: ' . $e->getMessage();
}
當您的腳本自行中止時,錯誤消息是什麼? – ariefbayu
嘗試'CURLOPT_CONNECTTIMEOUT'和'CURLOPT_TIMEOUT'並將其設置爲5(?)秒。 –
我在這些行上得到了「PHP致命錯誤:最大執行時間超過30秒」:「$ output = curl_exec($ ch);」和「$ redirectoutput = curl_exec($ redirectch);」 – Tomcat