2014-05-14 22 views
0

因爲我想計算和計算文本中的每個單詞而不僅僅是我的輸入單詞,所以我該如何將此C++代碼「char word [50]」更改爲「string string [50]」。所有關於TF/IDF計算的計算。C++字計數器

#include <iostream> 
#include <stdio.h> 
#include <conio.h> 
#include <math.h> 
#include <string.h> 
#include <stdlib.h> 
#include <iomanip> 
#include <cmath> 

using namespace std; 

/**************************************************/ 

這裏char字[50];

int ch,match[3],cnt,i,cntt[3],matchh[3],occurence,flg[3]; 
float tf,idf; 
char word[50],line[50]; 
int nd=3,iterm; 
int doc[20]; 

/**************************************************/ 

void case1() 
{ 

FILE *fill[3]; 
fill[1]=fopen("doc1.txt","r"); 
fill[2]=fopen("doc2.txt","r"); 
fill[3]=fopen("doc3.txt","r"); 
match[i]=0; 
cnt=0; 

我需要改變這裏的話,當我做焦炭字[50]字符串字[50]這裏提供了一些錯誤

for(i=1;i<4;i++) 
{ 
while(!feof(fill[i])) 
{ 
fscanf(fill[i],"%s",line); 
// if(strcmp(line,word)==0) 

    if(strstr(line,word)!=0) 

    match[i]++; 
    cntt[i]++; 
} 

fclose(fill[i]); 

} 

cout<<"\n----------------Total # of Word-------------------\n"; 

for(i=1;i<4;i++) 

{ 

cout<<"\n Documant "<<i<<" = "<<cntt[i]; 

} 

cout<<"\n\n------------------Term Counts--------------------\n"; 


cout<<word<<" "; 
cout<<"("; 
for(i=1;i<4;i++) 
{ 

cout<<"Doc"<<i<<","<<match[i]<<" ; "; 

} 
cout<<")"; 
cout << "\n\n\n | Words | D1  | D2 | D3 | \n"; 
cout<<"----------------------------------------------------------\n"; 


cout<<"  "<<word; 

for(i=1;i<4;i++) 

{ 

tf=(float)match[i]/cntt[i]; //Term Frequency 

cout<<"  "<<tf; 

} 


cout<<"\n\n\n\n\n\n\n"; 
} 

/**************************************************/ 

void case2() 
{ 
FILE *fill[3]; 
fill[1]=fopen("doc1.txt","r"); 
fill[2]=fopen("doc2.txt","r"); 
fill[3]=fopen("doc3.txt","r"); 
match[i]=0; 
cnt=0; 

cout<<"\n Total Number Of Documants => 3"; 
for(i=1;i<4;i++) 
{ 
while(!feof(fill[i])) 
{ 
    fscanf(fill[i],"%s",line); 
    // if(strcmp(line,word)==0) 
    if(strstr(line,word)!=0) 
    { 
     match[i]++; 
     matchh[i]++; 
     flg[i]=1; 
    } 
    cnt++; 
    cntt[i]++; 
} 

fclose(fill[i]); 
} 

cout<<"\n----------------Total # of Word-------------------\n"; 

for(i=1;i<4;i++) 
{ 

cout<<"\n Documant "<<i<<" are = "<<cntt[i]; 

} 

cout<<"\n\n------------------Term Counts--------------------\n"; 

for(i=1;i<4;i++) 
{ 

cout<<"\n Documant "<<i<<" = "<<matchh[i]; 

} 

for(i=1;i<4;i++) 
{ 
if(matchh[i]>0) 
{ 

matchh[i]=1; 
flg[i]=flg[i]+matchh[i]; 
occurence=occurence+flg[i]; 

} 

else 
matchh[i]=0; 

} 


tf=(float)3/occurence; 
idf=log10(tf); 

cout<<"\n\n-----------------Inverse Document Frequency-------------------\n"; 


cout<<"\n IDF = "<<idf; 

for(i=1;i<4;i++) 

matchh[i]=cntt[i]=flg[i]=0; 

cout<<"\n"; 


} 
/**************************************************/ 



int main() 

{ 


do 

{ 
cout<<"\n************** Menu ****************\n"; 
cout<<"\n (1) Term Frequency"; 
cout<<"\n (2) Inverse Document Frequency"; 
cout<<"\n (3) Exit"; 
cout<<"\n************************************\n"; 
cout<<"\n Select from Menu => ";cin>>ch; 
switch(ch) 

{ 

這裏主要部分我不想要進入的話,我想列出每個字數。

case 1: 


      cout<<"\n Enter The Word => "; 
      cin>>word; 

      case1(); 
      break; 

    case 2: cout<<"\n Enter The Word => "; 
      cin>>word; 
      case2(); 
      break; 

    case 3: exit(0); 

} 


} 

while(ch!=3); 

getch(); 

} 
+1

組合(如'的std :: cin')可以正常工作,但它可能是最好堅持使用一個或另一個。 – Rook

+0

首先感謝您回覆我,我正在使用代碼塊,以及我將更改哪個部分?關於std :: cin,如果(strstr(line,word)!= 0)我會改變它「strstr」? – seroberto666

+0

請看我的答案,以替代使用'strstr'。 – Rook

回答

1

使用std::string。在可能的情況下,簡單的舊式C字符串值得避免,因爲它們很容易被濫用。 C++字符串更安全,通常更簡單。

std::string text = "some long line of text"; 
std::string word = "line"; 

if (text.find(word) != std::string::npos) 
    std::cout << "Found the word!\n"; 

如果你有多個單詞:

std::vector<std::string> words = { "one", "word", "is", "badgers" }; 

for (const auto& word : words) 
    if (text.find(word) != std::string::npos) 
    std::cout << "Found \"" << word << "\"!\n"; 

如果你想從文件加載你的話,你可以這樣做完全使用(相對)安全的C++的IO類和函數。 C風格的IO(比如`fscanf`)和C++風格IO的

std::ifstream wordfile("where/your/file/is"); 
std::string word; 

while (std::getline(wordfile, word).good()) 
{ 
    words.push_back(word); 
} 
+0

是'std :: array '本意是說'std :: vector '? – interjay

+0

@interjay是的,很好的發現。現在修好了,ta。 – Rook