2016-01-25 64 views
2

我想用Boost :: Spirit編寫一個語言解析器。我讀了教程和嘗試下面的代碼來解析的語法功能:高清MYFUNC(ARG1 TYPE1,ARG2,類型2 ...)return_type:用Boost :: Spirit解析時發生Segfault

AST:

namespace ast { 

enum Type { BOOL, INT32, FLOAT32 }; 

using Identifier = std::string; 

using TypedIdentifier = std::tuple<Identifier, Type>; 

using ArgList = std::vector<TypedIdentifier>; 

using FunctionDef = std::tuple<Identifier, ArgList, Type>; 
} 

分析器:

namespace parser { 

struct Identifier 
    : qi::grammar<string::iterator, ast::Identifier(), ascii::space_type> { 
    Identifier() : Identifier::base_type(start) { 
    start = qi::char_("[a-zA-Z_]") >> *qi::char_("[a-zA-Z_0-9]"); 
    } 
    qi::rule<string::iterator, ast::Identifier(), ascii::space_type> start; 
}; 

struct Type : qi::symbols<char, ast::Type> { 
    Type() { 
    add("int32", ast::INT32)("float32", ast::FLOAT32)("bool", ast::BOOL); 
    } 
}; 

struct TypedIdentifier 
    : qi::grammar<string::iterator, ast::TypedIdentifier(), ascii::space_type> { 
    TypedIdentifier() : TypedIdentifier::base_type(start) { 
    start = Identifier() >> Type(); 
    } 
    qi::rule<string::iterator, ast::TypedIdentifier(), ascii::space_type> start; 
}; 

struct FunctionDef 
    : qi::grammar<string::iterator, ast::FunctionDef(), ascii::space_type> { 
    FunctionDef() : FunctionDef::base_type(start) { 
    start = "def" >> Identifier() >> "(" >> (TypedIdentifier() % ",") >> ")" >> 
      Type() >> ":"; 
    } 
    qi::rule<string::iterator, ast::FunctionDef(), ascii::space_type> start; 
}; 
} 

然後,當我嘗試解析一個簡單的代碼時,我得到一個段錯誤。段錯誤發生在試圖解析一個函數定義時,但我調試了一下,並且在嘗試解析一個類型化的標識符時段錯誤發生了。

int main() { 
    string foo("foo int32"); 
    auto begin = foo.begin(); 
    auto end = foo.end(); 
    ast::TypedIdentifier id; 
    bool result = qi::phrase_parse(begin, end, parser::TypedIdentifier(), 
           ascii::space, id); 
    cout << "Parse " << (result ? "successful " : "failed ") << endl; 
    return 0; 
} 

我測試了標識符和類型解析器,它們自行工作正常。 我也嘗試過定義全局語法,而不是實例化新的語法,但是我也得到了段錯誤。 我在這裏做錯了什麼?

回答

1

鏈接的答案確實表明出了什麼問題(在文法規則中提到臨時對象)。

我建議你不需要爲每一個生產創建grammar<>實例。相反,它的方式更有效(優雅)將它們分組爲規則成語法:

Live On Coliru

#define BOOST_SPIRIT_DEBUG 
#include <iostream> 
#include <boost/spirit/include/qi.hpp> 
#include <boost/fusion/include/std_tuple.hpp> 
#include <boost/fusion/include/io.hpp> 
#include <boost/optional/optional_io.hpp> 

namespace qi = boost::spirit::qi; 
namespace ascii = boost::spirit::ascii; 

namespace ast { 

    enum Type { BOOL, INT32, FLOAT32 }; 

    using Identifier  = std::string; 
    using TypedIdentifier = std::tuple<Identifier, Type>; 
    using ArgList   = std::vector<TypedIdentifier>; 
    using FunctionDef  = std::tuple<Identifier, ArgList, Type>; 

    std::ostream& operator<<(std::ostream& os, Type v) { 
     switch (v) { 
      case BOOL: return os << "BOOL"; 
      case INT32: return os << "INT32"; 
      case FLOAT32: return os << "FLOAT32"; 
     } 
     return os << "?"; 
    } 

    template <typename... Ts> std::ostream& operator<<(std::ostream& os, std::tuple<Ts...> const& v) { 
     return os << boost::fusion::as_vector(v); 
    } 

    template <typename T> std::ostream& operator<<(std::ostream& os, std::vector<T> const& v) { 
     os << "{"; 
     for (auto& el : v) os << el << " "; 
     return os << "}"; 
    } 
} 

namespace parser { 

    template <typename Iterator> 
    struct MyGrammarImpl : qi::grammar<Iterator, ast::FunctionDef(), ascii::space_type> { 
     MyGrammarImpl() : MyGrammarImpl::base_type(functionDef) 
     { 
      identifier  = qi::char_("[a-zA-Z_]") >> *qi::char_("[a-zA-Z_0-9]"); 
      typedIdentifier = identifier >> type; 
      functionDef  = "def" >> identifier >> '(' >> (typedIdentifier % ",") >> ')' >> type >> ":"; 
      type   = type_; 

      BOOST_SPIRIT_DEBUG_NODES((identifier)(typedIdentifier)(type)(functionDef)) 
     } 
     private: 
     qi::rule<Iterator, ast::TypedIdentifier(), ascii::space_type> typedIdentifier; 
     qi::rule<Iterator, ast::FunctionDef(),  ascii::space_type> functionDef; 
     qi::rule<Iterator, ast::Type(),   ascii::space_type> type; 

     // lexemes 
     qi::rule<Iterator, ast::Identifier()> identifier; 

     struct Type : qi::symbols<char, ast::Type> { 
      Type() { 
       add("int32", ast::INT32) 
        ("float32", ast::FLOAT32) 
        ("bool", ast::BOOL) 
        ; 
      } 
     }; 

     Type type_; 
    }; 

    using MyGrammar = MyGrammarImpl<std::string::const_iterator>; 
} 

int main() { 
    std::string const foo("def bar(foo int32) bool:"); 

    auto begin = foo.begin(); 
    auto end = foo.end(); 

    ast::FunctionDef def; 

    bool result = qi::phrase_parse(begin, end, parser::MyGrammar(), ascii::space, def); 

    std::cout << "Parse " << (result ? "successful " : "failed ") << std::endl; 
    if (result) 
     std::cout << def << "\n"; 
} 

打印:

Parse successful 
(bar {(foo INT32) } BOOL) 

隨着調試信息:

<functionDef> 
<try>def bar(foo int32) </try> 
<identifier> 
    <try>bar(foo int32) bool</try> 
    <success>(foo int32) bool:</success> 
    <attributes>[[b, a, r]]</attributes> 
</identifier> 
<typedIdentifier> 
    <try>foo int32) bool:</try> 
    <identifier> 
    <try>foo int32) bool:</try> 
    <success> int32) bool:</success> 
    <attributes>[[f, o, o]]</attributes> 
    </identifier> 
    <type> 
    <try> int32) bool:</try> 
    <success>) bool:</success> 
    <attributes>[INT32]</attributes> 
    </type> 
    <success>) bool:</success> 
    <attributes>[[[f, o, o], INT32]]</attributes> 
</typedIdentifier> 
<type> 
    <try> bool:</try> 
    <success>:</success> 
    <attributes>[BOOL]</attributes> 
</type> 
<success></success> 
<attributes>[[[b, a, r], [[[f, o, o], INT32]], BOOL]]</attributes> 
</functionDef> 

¹Internal Boost::Spirit code segfaults when parsing a composite grammar

+0

非常感謝!我對您的更改有幾個問題: - 爲什麼需要類型規則?我認爲type_足以解析。 - 爲什麼沒有標識符規則的ascii :: space_type? - 現在我將結果存儲在stl容器中,我甚至不知道它們實際包含的內容。有沒有辦法使用自定義結構/類作爲屬性?什麼是解析功能AST對象的慣用方法? – ElefEnt

+0

我添加了調試的類型規則。識別的必須是一個詞位。剩下的可能是最好的問題,所以我們知道你的意思,並得到應有的重視。 – sehe

+0

我在看一些例子,我發現了一個使用臨時變量uint_來定義語法的地方:http://www.boost.org/doc/libs/1_60_0/libs/spirit/example/qi/compiler_tutorial/calc2 .cpp 所以爲了保持一致,我們應該爲該段錯誤提交一個錯誤? :) – ElefEnt

相關問題