2
我目前正在尋找解決方案來動態過濾網站內容。通過「動態」我的意思是我會計算第一頁上的整個單詞中不良詞的百分比,即shit
,f**k
等。如果百分比不超過30%,說網站是允許的。如何讓它搜索第一頁上的每個單詞,並將它們與不良單詞列表進行匹配,然後除以單詞的總數,以便我能夠得到該百分比?理由不是製作內容過濾器,而是僅僅阻止網站,即使頁面中的一個單詞與不良單詞列表匹配。我已經得到了這個,但它是靜態的。如何使用PHP動態過濾網站內容
$filename = "filters.txt";
$fp = @fopen($filename, 'r');
if ($fp) {
$array = explode("\n", fread($fp, filesize($filename)));
foreach($array as $key => $val){
list($before,$after) = split("~",$val);
$input = preg_replace($before,$after,$input);
}
}
* filter.txt包含髒話
感謝名單Erisco列表!
試過這個,但它似乎並沒有工作。
function get_content($url)
{
$ch = curl_init();
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_HEADER, 0);
ob_start();
curl_exec ($ch);
curl_close ($ch);
$string = ob_get_contents();
ob_end_clean();
return $string;
}
/* $toLoad is from Browse.php */
$sourceOfWebpage = get_content($toLoad);
$textOfWebpage = strip_tags($sourceOfWebpage);
/* array: Obtained by your filter.txt file */
// Open the filters file and filter all of the results.
$filename = "filters.txt";
$badWords = @fopen($filename, 'r');
if ($badWords) {
$array = explode("\n", fread($fp, filesize($filename)));
foreach($array as $key => $val){
list($before,$after) = split("~",$val);
$input = preg_replace($before,$after,$input);
}
}
/* float: Some decimal value */
$allowedBadWordsPercent = 0.30;
$numberOfWords = str_word_count($textOfWebpage);
$numberOfBadWords = 0;
str_ireplace($badWords, '', $sourceOfWebpage, $numberOfBadWords);
if ($numberOfBadWords != 0) {
$badWordsPercent = $numberOfWords/$numberOfBadWords;
} else {
$badWordsPercent = 0;
}
if ($badWordsPercent > $allowedBadWordsPercent) {
echo 'This is a naughty webpage';
}