2014-10-18 86 views
1

我希望有人能幫助我解決這個問題。我正在用C++創建一個HTML詞法分析器。根據老師,我應該有3個文件。一個頭和2個主要的.cpp,它應該能夠讀取文件 這是我的文件try.txt爲什麼我的詞法分析器不能識別行情「」

<<<<<Hello there <H1 style=」BOLD」>header!!</H1> 
<< 
<< = 

這是我的頭

#ifndef tokens_h 
#define tokens_h 
#include <string> 
#include <iostream> 



     enum tokens {TEXT, LANGLE = 60, RANGLE = 62, SLASH = 47, ID, EQ = 61, QSTRING = 34, OTHER, END}; 

/* TEXT = 0 
    LANGLE = 60 
    RANGLE = 62 
    SLASH = 47 
    ID  = 48 
    EQ  = 61 
    QSTRING = 34 
    OTHER = 36 
    END  = 36 

*/ 
      int getToken(std::istream *br, std::string a); 

#endif 

這是我的main.cpp

#include <iostream> 
#include <fstream> 
#include <vector> 
#include "tokens.h" 


using namespace std; 

int main(int argc, char *argv[]) 
{ 
    //defineTokens(); 
    istream *br; 
    ifstream infile; 
    string output; 
    int a; 
    vector<int> count; 
    int langle = 0; 

      string line; 
    if(argc == 1){ 
     while(cin.good()){  //Get continous input 
       br = &cin; 

      getline(cin,line); 
      getToken(br,line); 
     } 
    } 
    else if(argc != 2){ 
     return 1; 
    }else{ 
     infile.open(argv[1]); 
     if(infile.is_open()){ 
      br = &infile; 
      while(!infile.eof()){ 
      getline(infile,output); 
     getToken(br,output); 
      } 
     } 

     else{ 
      cout << argv[1] << "Can't Be Opened" << endl; 
      return 1; 
     } 
    } 
} 

,這是我的tokens.cpp,我打印結果

#include <iostream> 
#include <stdio.h> 
#include <string> 
#include <vector> 
#include <algorithm> 
#include <numeric> 
#include <map> 
#include <utility> 
#include "tokens.h" 



using namespace std; 

void compar(int ch) 
{ 
    vector<int> text; 
    vector<int> langle; 
    //string langle; 
    vector<int> rangle; 
    vector<int> slash; 
    vector<int> id; 
    vector<int> eq; 
    vector<int> qstring; 
    vector<int> other; 
    map <string, int> result; 
    int c=0; 
    int d=0; 
    int sum; 
    string r; 

    switch(ch){ 
     case 60:static int countlangle = 0; 
       countlangle ++; 
       result["LANGLE"]= countlangle; 
       cout << "LANGLE: " << result["LANGLE"] << " "; 
       break; 

     case 62:static int countrangle = 0; 
       countrangle ++; 
       result["RANGLE"]= countrangle; 
       cout << "RANGLE: " << result["RANGLE"] << " "; 
       break; 

     case 47:static int countslash = 0; 
       countslash ++; 
       result["SLASH"]= countslash; 
       cout << "SLASH: " << result["SLASH"] << " "; 
       break;  

     case 61:static int counteq = 0; 
       counteq ++; 
       result["EQ"]= counteq; 
       cout << "EQ: " << result["EQ"] << " "; 
       break;          

     case 34:static int countqstring = 0; 
       countqstring ++; 
       result["QSTRING"]= countqstring; 
       cout << "QSTRING: " << result["QSTRING"] << " "; 
       break; 
    } 


} 
int getToken(istream *br, string a) 
{ 

    int b; 
    string d = "no"; 
    string f = "no"; 
    string r; 
    vector<char> st; 
    vector<string> trial; 
    vector<int> countr; 
    vector<int> countl; 
    vector<char> quotes; 
    string ans; 
    int x=0; 

    r = a; 
    cout << a[27]; 


    int found; 
      found = a.find('\"'); 
         cout << found<<"XXxxxxxX"; 


     for(int i = 0; i< a.length();i++){ //read entire string 
     if(a[i] == '<'){ 
      // cout << LANGLE << " "; 
      d="yes"; 
      x +=1; 
      countr.push_back(LANGLE); 
      //cout << count.size(); 
      //cout << x; 
      compar(LANGLE); 
      b =LANGLE; 

    // return LANGLE; 
     } 
     else if(a[i]== '>' && d == "yes"){ 
      f = "yes"; 
      b = RANGLE; //assing to the int variable the value from the enum header 
      compar(RANGLE); 

     } 
     else if(a[i]== '/' && d == "yes"){ 
      compar(SLASH); 

     } 
     else if(a[i] == '=' && d == "yes"){ 
      compar(EQ); 

     } 
     else if(a[found] == '\"' && d == "yes"){ 

      // for(int k =0;k < quotes.size();k++) 
      //cout << r[found] <<"XXX"; 
      compar(QSTRING); 

     } 

     } 
    return 0; 
} 

程序讀取<> =沒有問題,但是當我嘗試讀取「\「」與COUT < <一個[27]; 我得到這個:? 如果我打印cout < < a; 我得到<<<<<Hello there <H1 style=」BOLD」>header!!</H1>//這是我想讀

當我使用found = a.find('\"');它給了我一個-1 我的問題是,爲什麼我的程序不能識別的引號括起來的?這是我正在閱讀文件的方式嗎?

在此先感謝

回答

4

您的文件包含:

,而你的詞法分析器查找:

「。

這些都是不同的

+1

這就是你從互聯網上獲取複製/粘貼代碼(或者在MS W中編寫代碼ord) – sehe 2014-10-18 20:40:47

+0

我複製的是教授給我們的文本文件到我的文本文件。 Wao謝謝你的回答。在這個問題上我幾乎失去了主意 – elctronyc 2014-10-18 20:45:55

相關問題