2016-12-22 48 views
-1

我的單詞「all」中有單詞「alter」,但「alt」不是單詞中的單詞。但是當我檢查「alt」時,它仍然返回true,因爲is_word是true,因爲「all」是一個單詞。我應該如何解決這個錯誤。區分詞組中的單詞

//Here's the code 
typedef struct node{ 
    bool is_word;  
    struct node *children[27]; 
} node; 

unsigned int wsize=0; 
node * root; 

bool check(const char* word) 
{ 
    // TODO 
    node *chrawler=root; 
    for(int i=0;i<strlen(word)-1;i++) 
    { 
     int t; 
     if(word[i]>=65&&word[i]<=90) 
     {   
      t=word[i]-'A'; 
     } 
     else if(isalpha(word[i])) 
      t=word[i]-'a'; 
     else 
      t=26; 

     if(chrawler->children[t]==NULL) 
      return false; 
     else 
      chrawler=chrawler->children[t]; 
    } 

    if(chrawler->is_word) 
     return true; 
    return false;  

} 

// Load function 
bool load(const char* dictionary) 
{ 
    // TODO 

    FILE *inptr=fopen(dictionary,"r"); 
    if(inptr==NULL) 
    { 
     return false; 
    } 

    node *new_node=malloc(sizeof(node)); 
    root=new_node; 

    char * word=malloc((LENGTH+1)*sizeof(char)); 
    int index=0; 
    for(int c=fgetc(inptr);c!=EOF;c=fgetc(inptr)) 
    { 
     char ch=c; 
     if(ch=='\n') 
     { 
      word[index]='\0'; 
      index=0; 
      node *chrawler=root; 
      for(int i=1;i<strlen(word);i++) 
      { 
        int t; 
        if(isalpha(word[i-1])) 
         t=word[i-1]-'a'; 
        else 
         t=26; 
        if(chrawler->children[t]==NULL) 
        { 
         node *new_node=malloc(sizeof(node)); 
         chrawler->children[t]=new_node; 

         chrawler=chrawler->children[t]; 
        } 
        else 
         chrawler=chrawler->children[t]; 
      } 
      chrawler->is_word=1; 
      wsize++; 

     } 
     else 
     { 
      word[index]=ch; 
      index++; 
     } 

    } 

    return true; 
} 
+0

有一些模糊點......第一招:爲什麼'strlen的(字) -1'?第二:你如何填充你的特里? – Fefux

+0

對於輸入trie我做了一個單獨的函數加載和我用strlen(單詞)-1,因爲我需要遍歷到第二個最後節點,然後使用它的指針檢查,如果最後一個節點包含該單詞。 –

+0

你可以發佈你的加載函數嗎? – Fefux

回答

0

您需要確保在新節點的所有指針都爲空,還有is_word值設置爲false。這也許最容易通過使用calloc()來分配空間來完成。創建一個函數來分配和錯誤檢查節點的分配使得它更容易。同樣,你有兩個代碼塊將字符映射到索引。你應該使用更加慷慨的功能 - 甚至是小功能。

對於一行數據而言,逐字符輸入並不是真的必要;最好使用fgets()來讀取行。

添加這些和其他雜物的變化(例如,本地陣列word代替的動態分配的數組 - 這是不被釋放;關閉完成後的文件;等等)給出了一個MCVE(Minimal, Complete, Verifiable Example)所示:

#include <ctype.h> 
#include <stdbool.h> 
#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 

enum { LENGTH = 256 }; 

// Here's the code 
typedef struct node 
{ 
    bool is_word; 
    struct node *children[27]; 
} node; 

unsigned int wsize = 0; 
node *root; 

static inline int map_char(unsigned char c) 
{ 
    int t; 
    if (isalpha(c)) 
     t = tolower(c) - 'a'; 
    else 
     t = 26; 
    return t; 
} 

static inline node *alloc_node(void) 
{ 
    node *new_node = calloc(1, sizeof(node)); 
    if (new_node == 0) 
    { 
     fprintf(stderr, "Memory allocation failed in %s\n", __func__); 
     exit(1); 
    } 
    return new_node; 
} 

static bool check(const char *word) 
{ 
    node *chrawler = root; 
    int len = strlen(word); 
    for (int i = 0; i < len; i++) 
    { 
     int t = map_char(word[i]); 
     if (chrawler->children[t] == NULL) 
      return false; 
     else 
      chrawler = chrawler->children[t]; 
    } 

    return chrawler->is_word; 
} 

// Load function 
static bool load(const char *dictionary) 
{ 
    FILE *inptr = fopen(dictionary, "r"); 
    if (inptr == NULL) 
    { 
     fprintf(stderr, "Failed to open file '%s' for reading\n", dictionary); 
     return false; 
    } 

    root = alloc_node(); 

    char word[LENGTH]; 
    while (fgets(word, sizeof(word), inptr) != 0) 
    { 
     word[strcspn(word, "\n")] = '\0'; 
     printf("[%s]\n", word); 
     node *chrawler = root; 
     int len = strlen(word); 
     for (int i = 0; i < len; i++) 
     { 
      int t = map_char(word[i]); 
      //printf("t = %d (%c)\n", t, word[i]); 
      if (chrawler->children[t] == NULL) 
       chrawler->children[t] = alloc_node(); 
      chrawler = chrawler->children[t]; 
     } 
     chrawler->is_word = 1; 
     wsize++; 
    } 
    printf("%d words read from %s\n", wsize, dictionary); 
    fclose(inptr); 

    return true; 
} 

int main(void) 
{ 
    const char *wordfile = "words.txt"; 
    if (load(wordfile)) 
    { 
     char line[4096]; 
     while (fgets(line, sizeof(line), stdin) != 0) 
     { 
      line[strcspn(line, "\n")] = '\0'; 
      if (check(line)) 
       printf("[%s] is a word\n", line); 
      else 
       printf("[%s] is unknown\n", line); 
     } 
    } 
    return 0; 
} 

還有其他的改變。例如, wsize變量應該變爲非全局變量;它不是在load()函數之外真正使用的。很容易論證根節點不應該是全局的; load()函數應該返回根節點,並且check()函數應該傳遞到根節點。一般來說,應儘可能避免使用全局變量,而且通常是可行的。

給出一個包含文件words.txt

abelone 
abyssinia 
archimedes 
brachiosaurus 
triceratops 
all 
alter 
asparagus 
watchamacallit 
a 
abracadabra 
abyss 
ant 

從程序的運行輸出是:

[abelone] 
[abyssinia] 
[archimedes] 
[brachiosaurus] 
[triceratops] 
[all] 
[alter] 
[asparagus] 
[watchamacallit] 
[a] 
[abracadabra] 
[abyss] 
[ant] 
13 words read from words.txt 
a 
[a] is a word 
ab 
[ab] is unknown 
al 
[al] is unknown 
all 
[all] is a word 
alt 
[alt] is unknown 
alte 
[alte] is unknown 
alter 
[alter] is a word 
triceratops 
[triceratops] is a word 
brachiosaurus 
[brachiosaurus] is a word 
abys 
[abys] is unknown 
abbey 
[abbey] is unknown 
abyss 
[abyss] is a word 
ant 
[ant] is a word 
a 
[a] is a word 
archimedes 
[archimedes] is a word 
+0

因爲所有的is_word都是1而alt在同一個節點中所以alt並沒有給出,因爲如果我們檢查is_word,它應該返回1? –

+0

Alt和All不在同一個節點中。一個在l節點,一個在AL的t節點。並且ALL有單詞標誌設置,而ALT沒有。 –