用DOMDocument解析HTML代替正則表達式更安全。試試這個代碼:
<?php
function replaceInAnchors($html)
{
$dom = new DOMDocument();
// loadHtml() needs mb_convert_encoding() to work well with UTF-8 encoding
$dom->loadHtml(mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"));
$xpath = new DOMXPath($dom);
foreach($xpath->query('//text()[(ancestor::a)]') as $node)
{
$replaced = str_ireplace('_', ' ', $node->wholeText);
$newNode = $dom->createDocumentFragment();
$newNode->appendXML($replaced);
$node->parentNode->replaceChild($newNode, $node);
}
// get only the body tag with its contents, then trim the body tag itself to get only the original content
return mb_substr($dom->saveXML($xpath->query('//body')->item(0)), 6, -7, "UTF-8");
}
$html = '<li class="hook">
<a href="i_have_underscores">I_have_underscores</a>
</li>';
echo replaceInAnchors($html);
*(相關)* [最佳方法來解析HTML(http://stackoverflow.com/questions/3577641/best-methods-to-parse-html/3577662#3577662) – Gordon 2010-11-27 21:41:23