2017-03-26 85 views
-1

我是C++新手。我試圖做倒排索引,但我無法理解線索。我想要統計這個詞的頻率。你能解釋這個代碼,以及如何計算這個詞的頻率?請幫我解決這個問題。倒排索引節點addword

class node{ 
public: 
node() { 
    clear(); 
} 
node(char z) { 
    clear(); 
} 
~node() { 
    for (int x = 0; x < MAX_NODES; x++) { 
     if (next[x]) { 
      delete next[x]; 
     } 
    } 
} 
void clear() { 
    for (int x = 0; x < MAX_NODES; x++){ 
     next[x] = 0; 
     isWord = false; 
    } 
} 
bool isWord; 
int count;//frq 
std::vector<std::string> files; 
node* next[MAX_NODES]; 
map<string, int> counts; 
}; 
class index { 
public: 
void add(std::string s, std::string fileName) { 
    std::transform(s.begin(), s.end(), s.begin(), tolower); 
    std::string h; 
    int freq=0; 
    for (std::string::iterator i = s.begin(); i != s.end(); i++) { 
     if (*i == 32) { 
      pushFileName(addWord(h), fileName); 
      h.clear(); 
      continue; 
     } 
     h.append(1, *i); 
    } 
    if (h.length()){ 
     pushFileName(addWord(h), fileName); 
    } 
} 
void findWord(std::string s, map<string, int> counts) { 
    std::vector<std::string> v = find(s); 
    if (!v.size()) { 
     std::cout <<"'"<< s + "' is not found!\n"; 
     return; 
    } 
    std::cout << "'" << s << "' is found in:\n"; 
    for (std::vector<std::string>::iterator i = v.begin(); i != v.end(); i++) { 
     std::cout << *i << "\n"; 

    } 
    std::cout << "frequency is : "; 

} 
private: 
void pushFileName(node* n, std::string fn) { 
    std::vector<std::string>::iterator i = std::find(n->files.begin(), n->files.end(), fn); 
    if (i == n->files.end()){ 
     n->files.push_back(fn); 
     n->count; 
    } 
} 

const std::vector<std::string>& find(std::string s) { 
    size_t idx; 
    std::transform(s.begin(), s.end(), s.begin(), tolower); 
    node* rt = &root; 
    for (std::string::iterator i = s.begin(); i != s.end(); i++) { 
     idx = _CHARS.find(*i); 
     if (idx < MAX_NODES) { 
      if (!rt->next[idx]){ 
       return std::vector<std::string>(); 
      } 
      rt = rt->next[idx]; 
     } 
    } 
    if (rt->isWord) return rt->files; 
    return std::vector<std::string>(); 
} 
node* addWord(std::string s) { 
    size_t idx; 
    node *rt = &root, *n; 
    for (std::string::iterator i = s.begin(); i != s.end(); i++) { 
     idx = _CHARS.find(*i); 
     if (idx < MAX_NODES) { 
      n = rt->next[idx]; 
      if (n){ 
       rt = n; 
       continue; 
      } 
      n = new node(*i); 
      rt->next[idx] = n; 
      rt = n; 
     } 
    } 
    rt->isWord = true; 
    rt->count++; 
    return rt; 
} 
node root; 
}; 

class index { 
public: 
void add(std::string s, std::string fileName) { 
    std::transform(s.begin(), s.end(), s.begin(), tolower); 
    std::string h; 
    int freq=0; 
    for (std::string::iterator i = s.begin(); i != s.end(); i++) { 
     if (*i == 32) { 
      pushFileName(addWord(h), fileName); 
      h.clear(); 
      continue; 
     } 
     h.append(1, *i); 
    } 
    if (h.length()){ 
     pushFileName(addWord(h), fileName); 
    } 
} 
void findWord(std::string s, map<string, int> mFilesFreq) { 
    std::vector<std::string> v = find(s); 
    if (!v.size()) { 
     std::cout <<"'"<< s + "' is not found!\n"; 
     return; 
    } 
    std::cout << "'" << s << "' is found in:\n"; 
    for (std::vector<std::string>::iterator i = v.begin(); i != v.end(); i++) { 
     std::cout << *i << "\n"; 

    } 
    std::cout << "frequency is : "; 

} 

回答

1

如果你指望次add數量被稱爲對於給定的話,你可能想rt->count++;,並在您struct nodeint count取代bool isWord更換rt->isWord = true;

+0

我會使用'map counts'(而不是'files'和'count'),其中'string'是文件名,'int'是count。要更新計數,請使用'rt-> counts [filename] ++;' –