我任務是創造一個詞頻分析程序讀取從文本文件的內容,併產生下列示例輸出:C:鏈表詞頻 - 排序
SUMMARY:
27340 words
2572 unique words
WORD FREQUENCIES (TOP 10):
the 1644
and 872
to 729
a 632
it 595
she 553
i 545
of 514
said 462
you 411
我試圖創建一個程序實現這樣的產出。我對C編程非常陌生,儘管它在某種程度上有效,但可能存在很多效率問題/缺陷。這是我寫到目前爲止:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX_WORD 32
#define MAX_TEXT_LENGTH 10000
// ===========================================
// STRUCTURE
//============================================
typedef struct word {
char *str; /* Stores the word */
int freq; /* Stores the frequency */
struct word *pNext; /* Pointer to the next word counter in the list */
} Word;
// ===========================================
// FUNCTION PROTOTYPES
//============================================
int getNextWord(FILE *fp, char *buf, int bufsize); /* Given function to get words */
void addWord(char *pWord); /* Adds a word to the list or updates exisiting word */
void show(Word *pWordcounter); /* Outputs a word and its count of occurrences */
Word* createWordCounter(char *word); /* Creates a new WordCounter structure */
// ===========================================
// GLOBAL VARIABLES
//============================================
Word *pStart = NULL; /* Pointer to first word counter in the list */
int totalcount = 0; /* Total amount of words */
int uniquecount = 0; /* Amount of unique words */
// ===========================================
// MAIN
//============================================
int main() {
/* File pointer */
FILE * fp;
/* Read text from here */
fp = fopen("./test.txt","r");
/* buf to hold the words */
char buf[MAX_WORD];
/* Size */
int size = MAX_TEXT_LENGTH;
/* Pointer to Word counter */
Word *pCounter = NULL;
/* Read all words from text file */
while (getNextWord(fp, buf, size)) {
/* Add the word to the list */
addWord(buf);
/* Increment the total words counter */
totalcount++;
}
/* Loop through list and figure out the number of unique words */
pCounter = pStart;
while(pCounter != NULL)
{
uniquecount++;
pCounter = pCounter->pNext;
}
/* Print Summary */
printf("\nSUMMARY:\n\n");
printf(" %d words\n", totalcount); /* Print total words */
printf(" %d unique words\n", uniquecount); /* Print unique words */
/* List the words and their counts */
pCounter = pStart;
while(pCounter != NULL)
{
show(pCounter);
pCounter = pCounter->pNext;
}
printf("\n");
/* Free the allocated memory*/
pCounter = pStart;
while(pCounter != NULL)
{
free(pCounter->str);
pStart = pCounter;
pCounter = pCounter->pNext;
free(pStart);
}
/* Close file */
fclose(fp);
return 0;
}
// ===========================================
// FUNCTIONS
//============================================
void show(Word *pWordcounter)
{
/* output the word and it's count */
printf("\n%-30s %5d", pWordcounter->str,pWordcounter->freq);
}
void addWord(char *word)
{
Word *pCounter = NULL;
Word *pLast = NULL;
if(pStart == NULL)
{
pStart = createWordCounter(word);
return;
}
/* If the word is in the list, increment its count */
pCounter = pStart;
while(pCounter != NULL)
{
if(strcmp(word, pCounter->str) == 0)
{
++pCounter->freq;
return;
}
pLast = pCounter;
pCounter = pCounter->pNext;
}
/* Word is not in the list, add it */
pLast->pNext = createWordCounter(word);
}
Word* createWordCounter(char *word)
{
Word *pCounter = NULL;
pCounter = (Word*)malloc(sizeof(Word));
pCounter->str = (char*)malloc(strlen(word)+1);
strcpy(pCounter->str, word);
pCounter->freq = 1;
pCounter->pNext = NULL;
return pCounter;
}
int getNextWord(FILE *fp, char *buf, int bufsize) {
char *p = buf;
char c;
//skip all non-word characters
do {
c = fgetc(fp);
if (c == EOF)
return 0;
} while (!isalpha(c));
//read word chars
do {
if (p - buf < bufsize - 1)
*p++ = tolower(c);
c = fgetc(fp);
} while (isalpha(c));
//finalize word
*p = '\0';
return 1;
}
它正確顯示摘要。單詞和獨特單詞的數量是完全正確的。然後列出文件中找到的每個唯一字,並顯示正確的出現次數。
我現在需要做的事情(以及我遇到的很多麻煩)是按降序排列我的鏈接列表的出現次數。最重要的是,它應該只顯示前10個單詞,而不是全部(這應該是可行的,一旦我有鏈接列表排序)。
我知道代碼本身現在效率很低,但我現在主要關心的是獲取正確的輸出。
如果任何人都可以幫助我排序算法,或至少指出我在正確的方向,將不勝感激。
謝謝。
好的,我會試試看。謝謝。 – user1300922 2012-08-06 04:29:06