我見過this question,但它並不真正滿足我在找什麼。這個問題的答案是:從元描述標籤中提取,第二個是爲您已經擁有正文的文章生成摘錄。PHP Scrape文章摘要像可讀性
我想要做的是實際得到文章的前幾句話,就像Readability一樣。最好的方法是什麼? HTML解析?這是我目前使用的,但這不是很可靠。
function guessExcerpt($url) {
$html = file_get_contents_curl($url);
$doc = new DOMDocument();
@$doc->loadHTML($html);
$metas = $doc->getElementsByTagName('meta');
for ($i = 0; $i < $metas->length; $i++)
{
$meta = $metas->item($i);
if($meta->getAttribute('name') == 'description')
$description = $meta->getAttribute('content');
}
return $description;
}
function file_get_contents_curl($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, 5);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
你也可以看看[php-readability](http://code.fivefilters.org/php-readability/) – j0k 2013-06-17 21:24:31