2015-04-17 70 views
1

我是C編程新手,已經找到了這個程序。它需要一個文本並計算單詞的頻率。我遇到的問題是,當兩個或更多的單詞出現相同的次數時,這些單詞需要按字母順序排序,但我不知道如何。按字母順序排列字符串C

下面是代碼:

#include <stdio.h> 
#include <string.h> 
#include <ctype.h> 
#include <stdlib.h> 

#define MAXWORDS 10000 
#define MAXSTRING 100 

/* structure holding word frequency information */ 

typedef struct _word { 
    char s[MAXSTRING]; /* the word */ 
    int count;  /* number of times word occurs */ 
} word; 

void insert_word(word *words, int *n, char *s) { 
    int i; 

    for (i = 0; i < *n; i++) { 
     if (strcmp(s, words[i].s) == 0) { 
      /* found it? increment and return. */ 
      words[i].count++; 
      return; 
     } 
    }  
    strcpy(words[*n].s, s); 

    /* this word has occurred once up to now, so count = 1 */ 
    words[*n].count = 1; 

    /* one more word */ 
    (*n)++; 
} 

/* comparison function for quicksort. this lets quicksort sort words 
* by descending order of count, i.e., from most to least frequent 
*/ 
int wordcmp(word *a, word *b) { 
    if (a->count < b->count) return +1; 
    if (a->count > b->count) return -1; 
    return 0; 
} 

/* return 1 if c is alphabetic (a..z or A..Z), 0 otherwise */ 
int is_alpha(char c) { 
    if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) return 1; 
    return 0; 
} 

/* remove the i'th character from the string s */ 
void remove_char (char *s, int i) { 
    while (s[i]) { 
     i++; 
     s[i-1] = s[i]; 
    } 
    s[i] = 0; 
} 

/* remove non-alphabetic characters from the string s */ 
void remove_non_alpha(char *s) { 
    int i; 

    for (i = 0; s[i]; i++) { 
     if (!is_alpha (s[i])) 
      remove_char (s, i); 
    } 
} 

/* make all the letters in s lowercase */ 
void make_lowercase(char *s) { 
    int i; 

    for (i = 0; s[i]; i++) 
     s[i] = tolower(s[i]); 
} 

/* main program */ 
int main() { 
    word words[MAXWORDS]; 
    char s[1000]; 
    int i, n, m; 

    n = 0; 
    int a; 
    scanf("%d",&a); 

    /* read all the words in the file... */ 

    while (!feof(stdin)) { 
     scanf("%s", s); 

     if (is_alpha(s[0])) { 
      remove_non_alpha(s); 
      make_lowercase(s); 
      insert_word(words, &n, s); 
     } 
    } 

    qsort((void *)words, n, sizeof(word), 
      (int (*)(const void *, const void *))wordcmp); 

    /* if fewer than 20 words in total, just print up the the 
    * first n words 
    */ 
    if (n < a) 
     m = n; 
    else 
     m = a; 

    /* print the words with their frequencies */ 
    for (i = 0; i < m; i++) 
     printf("%s %d\n", words[i].s, words[i].count); 
} 
+0

當「兩個或更多的單詞出現相同的次數」時,你的輸出是什麼?想想看,這是什麼意思? –

回答

2

你應該細化對比功能:如果出現次數相等,返回字符串本身的比較:

/* comparison function for quicksort. this lets quicksort sort words 
* by descending order of count, i.e., from most to least frequent. 
* words with equal frequency will be sorted in ascending lexicographical 
* order. 
*/ 
int wordcmp (word *a, word *b) { 
    if (a->count < b->count) return +1; 
    if (a->count > b->count) return -1; 
    return strcmp(a->s, b->s); 
} 

另外請注意,您解析循環不正確:while (!feof(stdin))在文件結束時沒有正確停止,最後一個詞被解析兩次。你應該在邏輯更改爲:

while (scanf("%999s", s) == 1) { 
    ... 
} 

格式"%999s"防止造成緩衝區溢出過長的話。這樣一個長長的單詞將被無聲地分割,因此會略微偏移統計量,而不是調用未定義的行爲(潛在的崩潰)。

+0

注意:'strcmp'通常不是字母(它比較字節值)。雖然在這種情況下它*是*按字母順序排列(輸入是小寫的a..z)。 – jfs

+0

@ J.F。塞巴斯蒂安:沒錯! 'strcmp()'在'C'語言環境中執行字典式比較,即逐字節比較。如果OP想要更精細的排序方法,他可以使用'strcoll()'來代替,並通過正確定義和選擇來祈禱區域設置。即使只使用小寫字母a-z,西班牙語的適當整理與「strcmp」將產生的不同:「ll」和「ch」需要特殊處理。 – chqrlie