2015-12-27 99 views
0

我試圖展示一個單詞在文件中出現的次數。我不允許使用第三方庫(C++ stl,boost等),這就是爲什麼即時通訊有麻煩。這是我到目前爲止;找不到單詞的頻率C++

我創建了一個數組列表來存儲文件中的單詞。我擺脫了標點符號和數字。現在我需要打印所有這些單詞的頻率和頻率,就像這樣;

Words:   Frequency: 
their   13 
how   10 
apple   9 
is    5 

arrayList.h

#include <iostream> 
#include <string> 
using namespace std; 

class arrayList 
{ 
public: 
    bool isEmpty() const; 
    bool isFull() const; 
    int listSize() const; 
    void print() const; 

    void insertAt(int location, const string& insertItem); 
    void removeAt(int location); 
    string retrieveAt(int location) const; 
    bool seqSearch(const string& item) const; 
    void insert(const string& insertItem); 
    void remove(const string& removeItem); 

    arrayList(int); 
    ~arrayList(); 

private: 
    string *list; 
    int length; 
    int maxSize; 
}; 

arrayList.cpp

#include "arrayList.h" 


bool arrayList::isEmpty() const 
{ 
    return (length == 0); 
} 

bool arrayList::isFull() const 
{ 
    return (length == maxSize); 
} 

int arrayList::listSize() const 
{ 
    return length; 
} 

void arrayList::print() const 
{ 
    for (int i = 0; i < length; i++) 
     cout << list[i]; 
} 

void arrayList::insertAt(int location, const string& insertItem) 
{ 
    list[location] = insertItem; 
    length++; 
} 

void arrayList::removeAt(int location) 
{ 

     for (int i = location; i < length - 1; i++) 
      list[i] = list[i+1]; 

     length--; 
} 

string arrayList::retrieveAt(int location) const 
{ 
     return list[location]; 
} 


bool arrayList::seqSearch(const string& item) const 
{ 
    int loc; 
    bool found = false; 

    for (loc = 0; loc < length; loc++) 
     if (list[loc] == item) 
     { 
      found = true; 
      break; 
     } 
     if (found) 
      return 1; 
     else 
      return 0; 
} 

void arrayList::insert(const string& insertItem) 
{ 
    list[length++] = insertItem; 
} 

void arrayList::remove(const string& removeItem) 
{ 
    int loc; 
    loc = seqSearch(removeItem); 

    removeAt(loc); 
} 

arrayList::arrayList(int size) 
{ 
    maxSize = size; 
    length = 0; 

    list = new string[maxSize]; 
} 


arrayList::~arrayList(void) 
{ 
    delete [] list; 
} 

Source.cpp

#include <iostream> 
#include <fstream> 
#include <string> 
#include "arrayList.h" 
#include <cctype> 

using namespace std; 


int wordCount(ifstream &file) 
{ 
    string word;; 
    int count=0; 
    while (file >> word) 
    { 
     count++; 
    } 
    file.clear(); 
    file.seekg(0, ios::beg); 
    return count; 
} 

string removePunct(string word) 
{ 
    for (unsigned int i = 0; i < word.length(); i++) 
    { 
     if(word[i] == '.') 
      word[i] = ' '; 
     else if(word[i] == ',') 
      word[i] = ' '; 
     else if(word[i] == ';') 
      word[i] = ' '; 
     else if(word[i] == ':') 
      word[i] = ' '; 
     else if(word[i] == '?') 
      word[i] = ' '; 
     else if(word[i] == '-') 
      word[i] = ' '; 
     else if(word[i] == '[') 
      word[i] = ' '; 
     else if(word[i] == ']') 
      word[i] = ' '; 
     else if(word[i] == '(') 
      word[i] = ' '; 
     else if(word[i] == ')') 
      word[i] = ' '; 
     else if(word[i] == '!') 
      word[i] = ' '; 
     else if(word[i] == '\"') 
      word[i] = ' '; 
     else if(word[i] == '\'') 
      word[i] = ' '; 
     else if(word[i] == '_') 
      word[i] = ' '; 
    } 
    return word; 
} 

string makelower (string word) 
{ 
    for (unsigned int i = 0; i < word.length(); i++) 
    { 
     if (isupper(word[i])) 
      word[i] = tolower(word[i]); 
    } 
    return word; 
} 

int main() 
{ 
    string fileName; 
    ifstream file; 

    cout << "Please enter the file name: "; 
    getline(cin,fileName); 
    file.open(fileName); 

    int listSize = wordCount(file); 
    arrayList list1(listSize*2); 

    string word, newWord; 
    int i = 0; 
    while (file >> word) 
    { 
     if (word[i] >= '1' && word[i]<= '9') 
     { 
      list1.insert(" "); 
      i++; 
     } 
     else 
     { 
      newWord = makelower(word); 
      list1.insert(removePunct(newWord)); 
      list1.insert(" "); 
     } 
    } 
    /*int *counter = new int [listSize*2]; //I tried this but don't think its working 
    string item; 

    for (int i = 0; i < list1.listSize(); i++) 
    { 
     if(list1.retrieveAt(i) != " ") 
     { 
      string item = list1.retrieveAt(i); 
      while (list1.seqSearch(item)) 
       counter[i]++; 
     } 
    }*/ 



    system("pause"); 
    return 0; 
} 

任何幫助表示讚賞。

+0

」我創建了一個數組列表來存儲文件中的單詞,我擺脫了標點符號和數字,現在我需要打印所有單詞的頻率並按照頻率排序「 – JrLinq

+0

排序您的列表,然後計數連續的相等字很容易。 – Jarod42

+0

因爲您目前使用'std :: string','std :: ifstream'和'std :: ostream',所以您不允許使用C++標準庫的哪些部分(不是STL)? – Daniel

回答

0

你的代碼中有奇怪的事情發生。但我認爲這並不影響你真正的問題。

解決辦法是這樣的:

class freqList 
{ 
    freqList(int size) 
    { 
     words = new string[size]; 
     freqList = new int[size]; 
     memset(freqList, 0, sizeof(int)*size); //set freqs to 0 
     length = 0; 
    } 
    int seqSearch(const string& item) const 
    { 
     ... 
     return index_of_item; //-1 if you can't find it 
    } 
    void insertWord(string word) 
    { 
     int idx = searchSeq(word); 
     if(idx >= 0) 
     {//it already exists in the words list 
      freqs[idx]++; 
     } 
     else 
     {// new word, add to the end of the list 
      words[length] = word; 
      freqs[length]++; 
      length++; 
     } 
    } 
    ... 
    string *words; 
    int *freqs; //same size of words 
    ... 
}; 


int wordCount(ifstream &file) 
{ 
    string word;; 
    int count=0; 
    while (file >> word) 
    { 
     word = removePunct(word); 
     word = wordToLower(word); 
     if(isWord(word)) 
      count++; 
    } 
    file.clear(); 
    file.seekg(0, ios::beg); 
    return count; 
} 

bool isWord(word) 
{ 
    //write a function which fills your constraints 

} 

現在,主要功能應該是這樣的:

int listSize = wordCount(file); 
freqList freqs(listSize); 
string word; 
int i = 0; 
while (file >> word) 
{ 
    word = removePunct(word); 
    word = wordToLower(word); 
    ... // maybe you have other constraints 
    if (isWord(word)) 
    { 
     freqs.insert(word); 
    } 
} 

while循環後,您有文字和freqs相應的頻率列表到每個單詞。 「

+0

非常感謝@ seleciii44 – JrLinq

+0

歡迎:) – seleciii44