2013-10-05 26 views
5

編輯:我已經撕去了詞法分析器,因爲它不乾淨齊整合,只是混淆語法(見here)。升壓精神分析器lex->齊:讓「無證」 on_success的工作機制


on_success不是有據可查的,我想它連線到我的解析器。處理on_success處理解析器的例子只是建立在沒有lexqi --i.e。

這是我正在嘗試引入結構:

using namespace qi::labels; 
qi::on_success(event_entry_,std::cout << _val << _1); 

但它不會編譯。我很擔心這個問題是lex。有人可以告訴我我做錯了什麼,然後告訴我所有佔位符是否可用,類型和它們代表什麼(因爲它們沒有記錄)。

完整文件如下:

#include <boost/spirit/include/phoenix_core.hpp> 
#include <boost/spirit/home/phoenix/bind/bind_member_variable.hpp> 
#include <boost/spirit/include/lex_lexertl.hpp> 
#include <boost/spirit/include/qi.hpp> 
#include <boost/none.hpp> 
#include <boost/cstdint.hpp> 
#include <boost/fusion/include/adapt_struct.hpp> 
#include <string> 
#include <exception> 
#include <vector> 

namespace lex = boost::spirit::lex; 
namespace px = boost::phoenix; 
namespace qi = boost::spirit::qi; 
namespace ascii = boost::spirit::ascii; 


template <typename Lexer> 
struct tokens : lex::lexer<Lexer> 
{ 
    tokens() 
     : left_curly("\"{\""), 
     right_curly("\"}\""), 
     left_paren("\"(\""), 
     right_paren("\")\""), 
     colon(":"), 
     scolon(";"), 
     namespace_("(?i:namespace)"), 
     event("(?i:event)"), 
     optional("(?i:optional)"), 
     required("(?i:required)"), 
     repeated("(?i:repeated)"), 
     t_int_4("(?i:int4)"), 
     t_int_8("(?i:int8)"), 
     t_string("(?i:string)"), 
     ordinal("\\d+"), 
     identifier("\\w+") 

    { 
     using boost::spirit::lex::_val; 

     this->self 
      = 
       left_curly [ std::cout << px::val("lpar") << std::endl] 
      | right_curly [ std::cout << px::val("rpar") << std::endl] 
      | left_paren 
      | right_paren 
      | colon    [ std::cout << px::val("colon") << std::endl] 
      | scolon 
      | namespace_   [ std::cout << px::val("kw namesapce") << std::endl] 
      | event    [ std::cout << px::val("kw event") << std::endl] 
      | optional   [ std::cout << px::val("optional ") << "-->" << _val << "<--" << std::endl] 
      | required   [ std::cout << px::val("required") << std::endl] 
      | repeated 
      | t_int_4 
      | t_int_8 
      | t_string 
      | ordinal    [ std::cout << px::val("val ordinal (") << _val << ")" << std::endl] 
      | identifier   [std::cout << px::val("val identifier(") << _val << ")" << std::endl]; 


     this->self("WS") = lex::token_def<>("[ \\t\\n]+"); 
    } 


    lex::token_def<lex::omit> left_curly, right_curly, colon, scolon,repeated, left_paren, right_paren; 
    lex::token_def<lex::omit> namespace_, event, optional, required,t_int_4, t_int_8, t_string; 
    lex::token_def<boost::uint32_t> ordinal; 
    lex::token_def<> identifier; 
}; 

enum event_entry_qualifier 
{ 
    ENTRY_OPTIONAL, 
    ENTRY_REQUIRED, 
    ENTRY_REPEATED 
}; 

enum entry_type 
{ 
    RBL_INT4, 
    RBL_INT8, 
    RBL_STRING, 
    RBL_EVENT 
}; 

struct oid 
{ 
    boost::uint32_t ordinal; 
    std::string  name; 
}; 

BOOST_FUSION_ADAPT_STRUCT 
(
    oid, 
    (boost::uint32_t, ordinal) 
    (std::string, name) 
) 

struct type_descriptor 
{ 
    entry_type type_id; 
    std::string referenced_event; 
}; 

BOOST_FUSION_ADAPT_STRUCT 
(
    type_descriptor, 
    (entry_type, type_id) 
    (std::string, referenced_event) 
) 

struct event_entry 
{ 
    event_entry_qualifier qualifier; 
    oid     identifier; 
    type_descriptor  descriptor; 
}; 

BOOST_FUSION_ADAPT_STRUCT 
(
    event_entry, 
    (event_entry_qualifier, qualifier) 
    (oid, identifier) 
    (type_descriptor, descriptor) 
) 

struct event_descriptor 
{ 
    oid      identifier; 
    std::vector<event_entry> event_entries; 
}; 

BOOST_FUSION_ADAPT_STRUCT 
(
    event_descriptor, 
    (oid, identifier) 
    (std::vector<event_entry>, event_entries) 
) 

template <typename Iterator, typename Lexer> 
struct grammar : qi::grammar<Iterator,event_descriptor(), qi::in_state_skipper<Lexer> > 
{ 
    template <typename TokenDef> 
    grammar(TokenDef const& tok) 
     : grammar::base_type(event_descriptor_) 
    { 
     using qi::_val; 
     //start = event; 
     event_descriptor_ = tok.event >> oid_ >> tok.left_curly >> *(event_entry_) >> tok.right_curly; 

     event_entry_ = event_qualifier >> oid_ >> type_descriptor_ >> tok.scolon; 

     event_qualifier = tok.optional [ _val = ENTRY_OPTIONAL] 
         | tok.required [ _val = ENTRY_REQUIRED] 
         | tok.repeated [ _val = ENTRY_REPEATED]; 

     oid_ = tok.ordinal 
      >> tok.colon 
      >> tok.identifier; 

     type_descriptor_ 
      = ((atomic_type >> qi::attr("")) 
      | (event_type >> tok.left_paren >> tok.identifier >> tok.right_paren)); 

     atomic_type = tok.t_int_4   [ _val = RBL_INT4] 
       | tok.t_int_8    [ _val = RBL_INT8] 
       | tok.t_string   [ _val = RBL_STRING]; 

     event_type = tok.event   [_val = RBL_EVENT]; 

     using namespace qi::labels; 
     qi::on_success(event_entry_,std::cout << _val << _1); 
    } 

    qi::rule<Iterator> start; 
    qi::rule<Iterator, event_descriptor(), qi::in_state_skipper<Lexer> > event_descriptor_; 
    qi::rule<Iterator, event_entry(), qi::in_state_skipper<Lexer> > event_entry_; 
    qi::rule<Iterator, event_entry_qualifier()> event_qualifier; 
    qi::rule<Iterator, entry_type()> atomic_type; 
    qi::rule<Iterator, entry_type()> event_type; 
    qi::rule<Iterator, type_descriptor(),qi::in_state_skipper<Lexer> > type_descriptor_; 
    qi::rule<Iterator, oid()> oid_; 


}; 

std::string test = " EVENT 1:sihan { OPTIONAL 123:hassan int4; OPTIONAL 123:hassan int4; } "; 

int main() 
{ 
    typedef lex::lexertl::token<std::string::iterator, boost::mpl::vector<boost::uint32_t, std::string> > token_type; 
    typedef lex::lexertl::actor_lexer<token_type> lexer_type; 
    typedef tokens<lexer_type>::iterator_type iterator_type; 

    tokens<lexer_type> token_lexer; 
    grammar<iterator_type,tokens<lexer_type>::lexer_def> grammar(token_lexer); 

    std::string::iterator it = test.begin(); 
    iterator_type first = token_lexer.begin(it, test.end()); 
    iterator_type last = token_lexer.end(); 

    bool r; 

    r = qi::phrase_parse(first, last, grammar, qi::in_state("WS")[token_lexer.self]); 

    if(r) 
     ; 
    else 
    { 
     std::cout << "parsing failed" << std::endl; 
    } 
} 
+1

我可以提供部分答案。 '_1','_2'和'_3'是迭代器('_1'是規則的起始位置,'_3'是結束符,我從未確定'_2'是什麼。)對於我來說,我沒有lexer和a迭代了一個普通的'std :: string',所以迭代器很容易檢查和使用。我不知道lex迭代器是如何工作的。顯然,'_val'是剛分析過的值,你也可以修改。 (我的所有AST節點都是從包含源信息的基類派生的,所以我的'on_success'函數只是將'_1'和'_3'的位置複製到'_val'以便進行錯誤報告。) – GManNickG

+2

您的錯誤可能是因爲您試圖插入一個迭代器到一個ostream,這可能不會工作。如果需要,我可以將這些註釋移到一個答案,以及一個將被調用的通用'success_handler'函數,但正如您所提到的,您可以自己找出來自詞法分析器的迭代器。 :) – GManNickG

+0

據我的測試顯示,on_error/on_success只是不想與你的樣品工作......嗯。 – sehe

回答

4

望着頭文件,我認爲佔位符的意思是:

_1 = Iterator position when the rule was tried. 
_2 = Iterator to the end of the input. 
_3 = Iterator position right after the rule has been successfully matched. 

(因爲我不知道該上面的線是可以理解的,這裏是你輸入一個小例子)

        rule being tried 
         _________________________________ 
         ´         ` 
[EVENT][1][:][sihan][{][OPTIONAL][123][:][hassan][int4][;][OPTIONAL][321][:][hassan2][int4][;][}] 
          _1         _3         _2 

由於GManNickG在評論中提到這些詞法分析器的迭代器,你不能與他們輕鬆地訪問原始字符串。該conjure2 example結合使用詞法分析器和on_error/on_success的。爲了實現它,它使用了一種特殊的令牌,position_token。此標記始終可以訪問對與自身相關的原始字符串的迭代器(當你使用lex::omit正常令牌丟失此信息)。 position_token有幾個有趣的方法。 matched()返回iterator_range<OriginalIterator>,並且begin()end()返回相應的迭代器。

在下面我選擇創建一個phoenix::function採用兩個詞法分析器迭代的代碼(稱爲與_1和_3),並返回該覆蓋(使用std::string(begin_iter->begin(), end_iter->begin()))它們之間的距離的字符串。

我發現的一個問題是,空白處於不同狀態導致返回的迭代器position_token無效。我所做的解決這個問題的方法是將所有內容都放在相同的狀態,然後簡單地使用帶有空格的lex::_pass = lex::pass_flags::pass_ignore

最後的(未成年人)的問題是,如果你想使用std::cout << _val您需要定義operator<<你感興趣的類型

PS:我總是用BOOST_SPIRIT_USE_PHOENIX_V3,這要求每一個精神/鳳凰包括來自boost/spirit/include/...。如果出於任何原因需要/想要使用V2,則需要更改phoenix :: function。我也無法使用舊的循環風格,所以如果你不能使用c + + 11,你必須更改event_descriptor的運算符< <的定義。


#define BOOST_SPIRIT_USE_PHOENIX_V3 
// #define BOOST_SPIRIT_DEBUG 
#include <boost/spirit/include/qi.hpp> 
#include <boost/spirit/include/phoenix_core.hpp> 
#include <boost/spirit/include/phoenix_bind.hpp> //CHANGED 
#include <boost/spirit/include/lex_lexertl.hpp> 
#include <boost/spirit/include/lex_lexertl_position_token.hpp> //ADDED 
#include <boost/none.hpp> 
#include <boost/cstdint.hpp> 
#include <boost/fusion/include/adapt_struct.hpp> 
#include <string> 
#include <exception> 
#include <vector> 

namespace lex = boost::spirit::lex; 
namespace px = boost::phoenix; 
namespace qi = boost::spirit::qi; 
namespace ascii = boost::spirit::ascii; 


template <typename Lexer> 
struct tokens : lex::lexer<Lexer> 
{ 
    tokens() 
     : left_curly("\"{\""), 
     right_curly("\"}\""), 
     left_paren("\"(\""), 
     right_paren("\")\""), 
     colon(":"), 
     scolon(";"), 
     namespace_("(?i:namespace)"), 
     event("(?i:event)"), 
     optional("(?i:optional)"), 
     required("(?i:required)"), 
     repeated("(?i:repeated)"), 
     t_int_4("(?i:int4)"), 
     t_int_8("(?i:int8)"), 
     t_string("(?i:string)"), 
     ordinal("\\d+"), 
     identifier("\\w+") 

    { 
     using boost::spirit::lex::_val; 

     this->self 
      = 
       left_curly //[ std::cout << px::val("lpar") << std::endl] 
      | right_curly //[ std::cout << px::val("rpar") << std::endl] 
      | left_paren 
      | right_paren 
      | colon    //[ std::cout << px::val("colon") << std::endl] 
      | scolon 
      | namespace_   // [ std::cout << px::val("kw namesapce") << std::endl] 
      | event    // [ std::cout << px::val("kw event") << std::endl] 
      | optional   //[ std::cout << px::val("optional ") << "-->" << _val << "<--" << std::endl] 
      | required   //[ std::cout << px::val("required") << std::endl] 
      | repeated 
      | t_int_4 
      | t_int_8 
      | t_string 
      | ordinal    //[ std::cout << px::val("val ordinal (") << _val << ")" << std::endl] 
      | identifier   //[std::cout << px::val("val identifier(") << _val << ")" << std::endl] 
      | lex::token_def<>("[ \\t\\n]+") [lex::_pass = lex::pass_flags::pass_ignore] //CHANGED 
      ; 
    } 


    lex::token_def<lex::omit> left_curly, right_curly, left_paren, right_paren, colon, scolon; 
    lex::token_def<lex::omit> namespace_, event, optional, required, repeated, t_int_4, t_int_8, t_string; 
    lex::token_def<boost::uint32_t> ordinal; 
    lex::token_def<> identifier; 
}; 

enum event_entry_qualifier 
{ 
    ENTRY_OPTIONAL, 
    ENTRY_REQUIRED, 
    ENTRY_REPEATED 
}; 

enum entry_type 
{ 
    RBL_INT4, 
    RBL_INT8, 
    RBL_STRING, 
    RBL_EVENT 
}; 

struct oid 
{ 
    boost::uint32_t ordinal; 
    std::string  name; 
}; 

BOOST_FUSION_ADAPT_STRUCT 
(
    oid, 
    (boost::uint32_t, ordinal) 
    (std::string, name) 
) 

std::ostream& operator<<(std::ostream& os, const oid& val) //ADDED 
{ 
    return os << val.ordinal << "-" << val.name; 
} 

struct type_descriptor 
{ 
    entry_type type_id; 
    std::string referenced_event; 
}; 

BOOST_FUSION_ADAPT_STRUCT 
(
    type_descriptor, 
    (entry_type, type_id) 
    (std::string, referenced_event) 
) 

std::ostream& operator<<(std::ostream& os, const type_descriptor& val) //ADDED 
{ 
    return os << val.type_id << "-" << val.referenced_event; 
} 

struct event_entry 
{ 
    event_entry_qualifier qualifier; 
    oid     identifier; 
    type_descriptor  descriptor; 
}; 


BOOST_FUSION_ADAPT_STRUCT 
(
    event_entry, 
    (event_entry_qualifier, qualifier) 
    (oid, identifier) 
    (type_descriptor, descriptor) 
) 

std::ostream& operator<<(std::ostream& os, const event_entry& val) //ADDED 
{ 
    return os << val.qualifier << "-" << val.identifier << "-" << val.descriptor; 
} 

struct event_descriptor 
{ 
    oid      identifier; 
    std::vector<event_entry> event_entries; 
}; 



BOOST_FUSION_ADAPT_STRUCT 
(
    event_descriptor, 
    (oid, identifier) 
    (std::vector<event_entry>, event_entries) 
) 

std::ostream& operator<<(std::ostream& os, const event_descriptor& val) //ADDED 
{ 
    os << val.identifier << "["; 
    for(const auto& entry: val.event_entries) //C++11 
     os << entry; 
    os << "]"; 
    return os; 
} 

struct build_string_impl  //ADDED 
{ 
    template <typename Sig> 
    struct result; 
    template <typename This, typename Iter1, typename Iter2> 
    struct result<This(Iter1,Iter2)> 
    { 
     typedef std::string type; 
    }; 

    template <typename Iter1, typename Iter2> 
    std::string operator()(Iter1 begin, Iter2 end) const 
    { 
     return std::string(begin->begin(),end->begin()); 
    } 
}; 

px::function<build_string_impl> build_string; 

template <typename Iterator, typename Lexer> 
struct grammar : qi::grammar<Iterator,event_descriptor() > 
{ 
    template <typename TokenDef> 
    grammar(TokenDef const& tok) 
     : grammar::base_type(event_descriptor_) 
    { 
     using qi::_val; 
     //start = event; 
     event_descriptor_ = tok.event >> oid_ >> tok.left_curly >> *(event_entry_) >> tok.right_curly; 

     event_entry_ = event_qualifier >> oid_ >> type_descriptor_ >> tok.scolon; 

     event_qualifier = tok.optional [ _val = ENTRY_OPTIONAL] 
         | tok.required [ _val = ENTRY_REQUIRED] 
         | tok.repeated [ _val = ENTRY_REPEATED]; 

     oid_ = tok.ordinal 
      >> tok.colon 
      >> tok.identifier; 

     type_descriptor_ 
      = ((atomic_type >> qi::attr("")) 
      | (event_type >> tok.left_paren >> tok.identifier >> tok.right_paren)); 

     atomic_type = tok.t_int_4   [ _val = RBL_INT4] 
       | tok.t_int_8    [ _val = RBL_INT8] 
       | tok.t_string   [ _val = RBL_STRING]; 

     event_type = tok.event   [_val = RBL_EVENT]; 

     using namespace qi::labels; 
     qi::on_success(event_entry_,std::cout << _val << " " << build_string(_1,_3) << std::endl); //CHANGED 
     // BOOST_SPIRIT_DEBUG_NODES((event_descriptor_)(event_entry_)(event_qualifier)(oid_)(type_descriptor_)(atomic_type)(event_type)); 

    } 

    qi::rule<Iterator> start; 
    qi::rule<Iterator, event_descriptor()> event_descriptor_; 
    qi::rule<Iterator, event_entry()> event_entry_; 
    qi::rule<Iterator, event_entry_qualifier()> event_qualifier; 
    qi::rule<Iterator, entry_type()> atomic_type; 
    qi::rule<Iterator, entry_type()> event_type; 
    qi::rule<Iterator, type_descriptor()> type_descriptor_; 
    qi::rule<Iterator, oid()> oid_; 


}; 

std::string test = " EVENT 1:sihan { OPTIONAL 123:hassan int4; OPTIONAL 321:hassan2 int4; } "; 

int main() 
{ 
    typedef lex::lexertl::position_token<std::string::iterator, boost::mpl::vector<boost::uint32_t, std::string> > token_type; //CHANGED 
    typedef lex::lexertl::actor_lexer<token_type> lexer_type; 
    typedef tokens<lexer_type>::iterator_type iterator_type; 

    tokens<lexer_type> token_lexer; 
    grammar<iterator_type,tokens<lexer_type>::lexer_def> grammar(token_lexer); 

    std::string::iterator it = test.begin(); 
    iterator_type first = token_lexer.begin(it, test.end()); 
    iterator_type last = token_lexer.end(); 

    bool r; 

    r = qi::parse(first, last, grammar); //CHANGED 

    if(r) 
     ; 
    else 
    { 
     std::cout << "parsing failed" << std::endl; 
    } 
} 
+0

謝謝你這樣深思熟慮並解釋了答案。它還處理我之前的一個小問題,就是不得不使用'qi :: in_state'作爲隊長。我確實想切換到上面的樣式以跳過空白區域。我想我現在終於在一個地方開始寫我的DSL了。唯一缺少的是將我在上面的'event_qualifier','atomic_type'和'event_type'規則中的'qi :: grammar'移動到lex中。 –

+0

最優秀的東西。我在那裏「全部」都一樣,除了我因丟失的鏈接「position_token」而丟失了。 +1(@HassanSyed我重新排列了聲明以匹配初始化順序,'-Wextra'是你的朋友) – sehe

+0

當我在詞法分析器中的'option'標記中註釋語義動作時,會出現奇怪的錯誤。分配內存。在clang ++和gcc中都顯示出來。如果我取消對可選和必需的註釋,並且將輸入更改爲同時具有這兩個參數,則不會顯示此消息:D –