計算每個單詞的出現次數

我試圖計算函數中每個單詞的出現次數countWords我相信我在函數中正確啓動了for循環，但是如何將數組中的單詞進行比較並對它們進行計數，然後刪除重複項？是不是像斐波那契系列或我誤解？另外int n的值爲756，因爲數組中有多少個單詞，wordsArray是數組中的元素。計算每個單詞的出現次數

#include <stdio.h> 
#include <string.h> 
#include <stdlib.h> 
#include <ctype.h> 

int *countWords(char **words, int n); 
int main(int argc, char *argv[]) 
{ 
    char buffer[100]; //Maximum word size is 100 letters 
    FILE *textFile; 
    int numWords=0; 
    int nextWord; 
    int i, j, len, lastChar; 
    char *wordPtr; 
    char **wordArray; 
    int *countArray; 
    int *alphaCountArray; 
    char **alphaWordArray; 
    int *freqCountArray; 
    char **freqWordArray; 
    int choice=0; 

    //Check to see if command line argument (file name) 
    //was properly supplied. If not, terminate program 
    if(argc == 1) 
    { 
    printf ("Must supply a file name as command line argument\n"); 
    return (0); 
    } 

    //Open the input file. Terminate program if open fails 
    textFile=fopen(argv[1], "r"); 
    if(textFile == NULL) 
    { 
    printf("Error opening file. Program terminated.\n"); 
    return (0); 
    } 

    //Read file to count the number of words 
    fscanf(textFile, "%s", buffer); 
    while(!feof(textFile)) 
    { 
    numWords++; 
    fscanf(textFile, "%s", buffer); 
    } 

    printf("The total number of words is: %d\n", numWords); 
    //Create array to hold pointers to words 
    wordArray = (char **) malloc(numWords*sizeof(char *)); 
    if (wordArray == NULL) 
    { 
    printf("malloc of word Array failed. Terminating program.\n"); 
    return (0); 
    } 
    //Rewind file pointer and read file again to create 
    //wordArray 
    rewind(textFile); 
    for(nextWord=0; nextWord < numWords; nextWord++) 
    { 
    //read next word from file into buffer. 
    fscanf(textFile, "%s", buffer); 

    //Remove any punctuation at beginning of word 
    i=0; 
    while(!isalpha(buffer[i])) 
    { 
     i++; 
    } 
    if(i>0) 
    { 
     len = strlen(buffer); 
     for(j=i; j<=len; j++) 
     { 
     buffer[j-i] = buffer[j]; 
     } 
    } 

    //Remove any punctuation at end of word 
    len = strlen(buffer); 
    lastChar = len -1; 
    while(!isalpha(buffer[lastChar])) 
    { 
     lastChar--; 
    } 
    buffer[lastChar+1] = '\0'; 

    //make sure all characters are lower case 
    for(i=0; i < strlen(buffer); i++) 
    { 
     buffer[i] = tolower(buffer[i]); 
    } 

    //Now add the word to the wordArray. 
    //Need to malloc an array of chars to hold the word. 
    //Then copy the word from buffer into this array. 
    //Place pointer to array holding the word into next 
    //position of wordArray 
    wordPtr = (char *) malloc((strlen(buffer)+1)*sizeof(char)); 
    if(wordPtr == NULL) 
    { 
     printf("malloc failure. Terminating program\n"); 
     return (0); 
    } 
    strcpy(wordPtr, buffer); 
    wordArray[nextWord] = wordPtr; 
    } 

    //Call countWords() to create countArray and replace 
    //duplicate words in wordArray with NULL 
    countArray = countWords(wordArray, numWords); 
    if(countArray == NULL) 
    { 
    printf("countWords() function returned NULL; Terminating program\n"); 
    return (0); 
    } 

    //Now call compress to remove NULL entries from wordArray 
    compress(&wordArray, &countArray, &numWords); 
    if(wordArray == NULL) 
    { 
    printf("compress() function failed; Terminating program.\n"); 
    return(0); 
    } 
    printf("Number of words in wordArray after eliminating duplicates and compressing is: %d\n", numWords); 

    //Create copy of compressed countArray and wordArray and then sort them alphabetically 
    alphaCountArray = copyCountArray(countArray, numWords); 
    freqCountArray = copyCountArray(alphaCountArray, numWords); 
int *countWords(char **wordArray, int n) 
{ 
    return NULL; 
    int i=0; 
    int n=0; 

    for(i=0;i<n;i++) 
    { 
     for(n=0;n<wordArray[i];n++) 
     { 

     } 
    } 

}

來源

2016-07-12 Nate

'而 { NUMWORDS ++; fscanf（textFile，「％s」，buffer）; }'是錯的。使用'fscanf（）'的返回值來確定何時退出循環。 – chux

假設你想要的countWords返回值是一個整數數組爲每個唯一字的字數，你需要有一個雙迴路。一個循環遍歷整個數組，第二個循環遍歷數組的其餘部分（在當前單詞之後），尋找重複數據。

你可以做這樣的事情的僞代碼：

Allocate the return array countArray (n integers) 
Loop over all words (as you currently do in your `for i` loop) 
    If the word at `i` is not null // Check we haven't already deleted this word 
     // Found a new word 
     Set countArray[i] to 1 
     Loop through the rest of the words e.g. for (j = i + 1; j < n; j++) 
     If the word at j is not NULL and matches the word at i (using strcmp) 
      // Found a duplicate word 
      Increment countArray[i] (the original word's count) 
      // We don't want wordArray[j] anymore, so 
      Free wordArray[j] 
      Set wordArray[j] to NULL 
    Else 
     // A null indicates this was a duplicate, set the count to 0 for consistency. 
     Set countArray[i] to 0 
Return wordArray

來源

2016-07-12 02:26:25

我要在這裏你扔有點曲線球。

與其修復你的代碼，由於它本身非常好，可以很容易地修復，但不完整，所以我決定從頭開始寫一個例子。

無需兩次讀取文件[第一次只是爲了獲得最大數量]。這可以通過動態數組和realloc來處理。

主要的一點，我想，是它更容易確保單詞列表有沒有重複而創建它，而不是在最後刪除重複。

我選擇了一些東西。

我創建了一個「字控制」結構。你有幾個單獨的數組，索引方式相同。那種，就是「爲結構而哭喊」。也就是說，而不是[說] 5個獨立的數組，有一個結構的單個數組，其中有5個元素。

單詞列表是這些結構的鏈接列表。它可能是堆上的動態數組，取而代之，但鏈接列表實際上更容易維護這個特定的用法。

每個結構都有[清理過的]單詞文本和出現次數（與單獨的wordArray和countArray相比）。

添加一個單詞時，該列表將被掃描以尋找一個現有的匹配。如果找到一個，計數會遞增，而不是創建一個新的單詞列表元素。這是消除重複的關鍵[即不要在第一個地方創建它們]。

無論如何，在這裏它是：（！FEOF（文本文件））

#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 
#include <ctype.h> 
#include <errno.h> 

#define sysfault(_fmt...) \ 
    do { \ 
     printf(_fmt); \ 
     exit(1); \ 
    } while (0) 

// word control 
typedef struct word { 
    struct word *next;    // linked list pointer 
    char *str;      // pointer to word string 
    int count;      // word frequency count 
} word_t; 

word_t wordlist;     // list of words 

// cleanword -- strip chaff and clean up word 
void 
cleanword(char *dst,const char *src) 
{ 
    int chr; 

    // NOTE: using _two_ buffers in much easier than trying to clean one 
    // buffer in-place 
    for (chr = *src++; chr != 0; chr = *src++) { 
     if (! isalpha(chr)) 
      continue; 
     chr = tolower(chr); 
     *dst++ = chr; 
    } 

    *dst = 0; 
} 

// addword -- add unique word to list and keep count of number of words 
void 
addword(const char *str) 
{ 
    word_t *cur; 
    word_t *prev; 
    char word[1000]; 

    // get the cleaned up word 
    cleanword(word,str); 

    // find a match to a previous word [if it exists] 
    prev = NULL; 
    for (cur = wordlist.next; cur != NULL; cur = cur->next) { 
     if (strcmp(cur->str,word) == 0) 
      break; 
     prev = cur; 
    } 

    // found a match -- just increment the count (i.e. do _not_ create a 
    // duplicate that has to be removed later) 
    if (cur != NULL) { 
     cur->count += 1; 
     return; 
    } 

    // new unique word 
    cur = malloc(sizeof(word_t)); 
    if (cur == NULL) 
     sysfault("addword: malloc failure -- %s\n",strerror(errno)); 

    cur->count = 1; 
    cur->next = NULL; 

    // save off the word string 
    cur->str = strdup(word); 
    if (cur->str == NULL) 
     sysfault("addword: strdup failure -- %s\n",strerror(errno)); 

    // add the new word to the end of the list 
    if (prev != NULL) 
     prev->next = cur; 

    // add the first word 
    else 
     wordlist.next = cur; 
} 

int 
main(int argc,char **argv) 
{ 
    FILE *xf; 
    char buf[1000]; 
    char *cp; 
    char *bp; 
    word_t *cur; 

    --argc; 
    ++argv; 

    xf = fopen(*argv,"r"); 
    if (xf == NULL) 
     sysfault("main: unable to open '%s' -- %s\n",*argv,strerror(errno)); 

    while (1) { 
     // get next line 
     cp = fgets(buf,sizeof(buf),xf); 
     if (cp == NULL) 
      break; 

     // loop through all words on a line 
     bp = buf; 
     while (1) { 
      cp = strtok(bp," \t\n"); 
      bp = NULL; 

      if (cp == NULL) 
       break; 

      // add this word to the list [avoiding duplicates] 
      addword(cp); 
     } 
    } 

    fclose(xf); 

    // print the words and their counts 
    for (cur = wordlist.next; cur != NULL; cur = cur->next) 
     printf("%s %d\n",cur->str,cur->count); 

    return 0; 
}

來源

2016-07-12 03:17:16

計算每個單詞的出現次數

回答

相關問題