2012-08-16 64 views
0
<?php 

$wordFrequencyArray = array(); 

function countWordsfrequency($filename) { 
global $wordFrequencyArray; 

$contentoffile = (file_get_contents($filename)); 

$wordArray = preg_split('/[^a-zA-Z0-9]/', $contentoffile, -1, NO_EMPTY); 


foreach (array_count_values($wordArray) as $word => $count) { 
     if (!isset($wordFrequencyArray[$word])) $wordFrequencyArray[$word] = 0; 
     $wordFrequencyArray[$word] += $count; 
    } 
} 


$filenames = array('file1.txt', 'file2.txt','file3.txt','file4.txt'); 
foreach ($filenames as $filename) { 
    countWordsfrequency($filename); 
} 



print_r($wordFrequencyArray); 

?> 

這是我的代碼查找多個文件和打印每個單詞的頻率them.Now我想做的事是檢查發現在路口發生的是哪個字哪些文件。例如,如果有一個單詞「堆棧」,我想打印出它發生在哪些文件和它的頻率,我想我已經計算過了。查找路口/在多個文件中的單詞的頻率

最終結果應該與發生該單詞的文件的頻率相同。

我該如何處理?我應該在countWords函數本身的for循環中檢查它。

回答

0

您將不得不保存更多信息。我會避免使用類,因爲它似乎不需要太強大的任何東西。

<?php 
$wordFrequencies = array(); 

function countWordsFrequency($filename) { 
    global $wordFrequencies; 
    $contentoffile = (file_get_contents($filename)); 
    $wordArray = preg_split('/[^a-zA-Z0-9]/', $contentoffile, -1, NO_EMPTY); 

    foreach (array_count_values($wordArray) as $word => $count) { 
    $wordFreqInfo = $wordFrequencies[$word]; 
    if (!isset($wordFreqInfo)) { 
     $wordFreqInfo = array(); 
     $wordFreqInfo['total'] = 0; 
     $wordFreqInfo['files'] = array(); 
     $wordFrequencies[$word] = $wordFreqInfo; 
    } 

    // If this is the first occurence of this word in the file, start a count. 
    if (!isset($wordFreqInfo['files'][$filename])) 
     $wordFreqInfo['files'][$filename] = 0; 
    } 

    // Increment counts for both the total and the file. 
    $wordFreqInfo['total'] += $count; 
    $wordFreqInfo['files'][$filename] += $count; 
    } 
} 

$filenames = array('file1.txt', 'file2.txt','file3.txt','file4.txt'); 
foreach ($filenames as $filename) { 
    countWordsFrequency($filename); 
} 

print_r($wordFrequencies); 
?>