2014-06-14 15 views
1

我試圖複製this example以實現類似於運算符優先級規則的C++(我從一個子集開始,但最終打算添加其他子集)。用boost精神實現運算符優先級

盡我所能,我無法獲得解析單個二進制操作的語法。它會解析文字(44,3.42,「stackoverflow」)就好,但會失敗像3 + 4

我確實看過this questionthis one試圖讓我的解決方案工作,但得到了同樣的結果。

(在試圖讓事情變得很短,我會在這裏,完整的代碼here後只有相關位)

的AST相關的數據結構:

enum class BinaryOperator 
{ 
    ADD, SUBTRACT, MULTIPLY, DIVIDE, MODULO, LEFT_SHIFT, RIGHT_SHIFT, EQUAL, NOT_EQUAL, LOWER, LOWER_EQUAL, GREATER, GREATER_EQUAL, 
}; 

typedef boost::variant<double, int, std::string> Litteral; 

struct Identifier { std::string name; }; 

typedef boost::variant< 
    Litteral, 
    Identifier, 
    boost::recursive_wrapper<UnaryOperation>, 
    boost::recursive_wrapper<BinaryOperation>, 
    boost::recursive_wrapper<FunctionCall> 
> Expression; 

struct BinaryOperation 
{ 
    Expression rhs, lhs; 
    BinaryOperator op; 

    BinaryOperation() {} 
    BinaryOperation(Expression rhs, BinaryOperator op, Expression lhs) : rhs(rhs), op(op), lhs(lhs) {} 
}; 

語法:

template<typename Iterator, typename Skipper> 
struct BoltGrammar : qi::grammar<Iterator, Skipper, Program()> 
{ 
    BoltGrammar() : BoltGrammar::base_type(start, "start") 
    { 
     equalOp.add("==", BinaryOperator::EQUAL)("!=", BinaryOperator::NOT_EQUAL); 
     equal %= (lowerGreater >> equalOp >> lowerGreater); 
     equal.name("equal"); 

     lowerGreaterOp.add("<", BinaryOperator::LOWER)("<=", BinaryOperator::LOWER_EQUAL)(">", BinaryOperator::GREATER)(">=", BinaryOperator::GREATER_EQUAL); 
     lowerGreater %= (shift >> lowerGreaterOp >> shift); 
     lowerGreater.name("lower or greater"); 

     shiftOp.add("<<", BinaryOperator::LEFT_SHIFT)(">>", BinaryOperator::RIGHT_SHIFT); 
     shift %= (addSub >> shiftOp >> addSub); 
     shift.name("shift"); 

     addSubOp.add("+", BinaryOperator::ADD)("-", BinaryOperator::SUBTRACT); 
     addSub %= (multDivMod >> addSubOp >> multDivMod); 
     addSub.name("add or sub"); 

     multDivModOp.add("*", BinaryOperator::MULTIPLY)("/", BinaryOperator::DIVIDE)("%", BinaryOperator::MODULO); 
     multDivMod %= (value >> multDivModOp >> value); 
     multDivMod.name("mult, div, or mod"); 

     value %= identifier | litteral | ('(' > expression > ')'); 
     value.name("value"); 

     start %= qi::eps >> *(value >> qi::lit(';')); 
     start.name("start"); 

     expression %= identifier | litteral | equal; 
     expression.name("expression"); 

     identifier %= qi::lexeme[ascii::char_("a-zA-Z") >> *ascii::char_("0-9a-zA-Z")]; 
     identifier.name("identifier"); 

     litteral %= qi::double_ | qi::int_ | quotedString; 
     litteral.name("litteral"); 

     quotedString %= qi::lexeme['"' >> +(ascii::char_ - '"') >> '"']; 
     quotedString.name("quoted string"); 

     namespace phx = boost::phoenix; 
     using namespace qi::labels; 
     qi::on_error<qi::fail>(start, std::cout << phx::val("Error! Expecting: ") << _4 << phx::val(" here: \"") << phx::construct<std::string>(_3, _2) << phx::val("\"") << std::endl); 
} 

qi::symbols<char, BinaryOperator> equalOp, lowerGreaterOp, shiftOp, addSubOp, multDivModOp; 
qi::rule<Iterator, Skipper, BinaryOperation()> equal, lowerGreater, shift, addSub, multDivMod; 
qi::rule<Iterator, Skipper, Expression()> value; 

qi::rule<Iterator, Skipper, Program()> start; 
qi::rule<Iterator, Skipper, Expression()> expression; 
qi::rule<Iterator, Skipper, Identifier()> identifier; 
qi::rule<Iterator, Skipper, Litteral()> litteral; 
qi::rule<Iterator, Skipper, std::string()> quotedString; 
}; 

回答

4

主要問題(確實)似乎是在那second answer you linked to中解決。

讓我解決了一些要點:

  1. 的主要問題是爲化合物:

    • 你開始的規則是

      start  %= qi::eps >> *(value >> qi::lit(';')); 
      

      這意味着它預計value S:

      value  %= identifier | literal | ('(' > expression > ')'); 
      

      但是,由於此解析只有標識符和文字或帶圓括號的子表達式,3+4二進制操作將永遠不會被解析。

    • 表達式規則,允許再次identifierliteral第一(冗餘/迷惑):

      expression %= identifier | literal | equal; 
      

    我想你想要的東西更像

    expression = '(' >> expression >> ')' | equal | value; 
    value  = identifier | literal; 
    
    // and then 
    start  = qi::eps >> -expression % ';'; 
    
  2. BinaryOperation對於操作員預先設置的情況,生產僅允許發送;這打破規則是嵌套的運算符優先級的方式:一個multDivOp永遠不會被接受作爲匹配,除非它跟一個addSubOp

    addSub %= (multDivMod >> addSubOp >> multDivMod); 
    multDivMod %= (value >> multDivModOp >> value); 
    

    如圖所示的連接回答這個問題才能最好地固定:

    addSub  = multDivMod >> -(addSubOp  >> multDivMod); 
    multDivMod = value  >> -(multDivModOp >> value); 
    

    在那裏你可以使用語義動作來構建AST節點「動態」:

    addSub  = multDivMod >> -(addSubOp  >> multDivMod) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ]; 
    multDivMod = value  >> -(multDivModOp >> value)  [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ]; 
    

    這節拍「乏味的」東方電氣arative approach hand-down(這導致了很多回溯,參見例如Boost spirit poor perfomance with Alternative parser

  3. 字面規則將解析一個整數作爲雙,因爲它不是嚴格:

    literal %= qi::double_ | qi::int_ | quotedString; 
    

    就可以解決這個問題,如:

    qi::real_parser<double, qi::strict_real_policies<double> > strict_double; 
    
    literal  = quotedString | strict_double | qi::int_; 
    
  4. FunctionCall要適應functionName作爲Identifier(不是std::string

    BOOST_FUSION_ADAPT_STRUCT(FunctionCall, (Identifier, functionName)(std::vector<Expression>, args)) 
    
  5. Expressionoperator<<可以(應該)是一個boost::static_visitor,讓你

    • 消除魔法型開關數字開關
    • 的完整性
    • GET編譯器檢查可以利用重載決議對變異切換成員類型

    使用C++ 11,代碼仍然可以在一個函數中:

    std::ostream& operator<<(std::ostream& os, const Expression& expr) 
    { 
        os << "Expression "; 
        struct v : boost::static_visitor<> { 
         v(std::ostream& os) : os(os) {} 
         std::ostream& os; 
    
         void operator()(Literal   const& e) const { os << "(literal: " << e       << ")"; } 
         void operator()(Identifier  const& e) const { os << "(identifier: " << e.name      << ")"; } 
         void operator()(UnaryOperation const& e) const { os << "(unary op: " << boost::fusion::as_vector(e) << ")"; } 
         void operator()(BinaryOperation const& e) const { os << "(binary op: " << boost::fusion::as_vector(e) << ")"; } 
         void operator()(FunctionCall const& e) const { 
          os << "(function call: " << e.functionName << "("; 
          if (e.args.size() > 0) os << e.args.front(); 
          for (auto it = e.args.begin() + 1; it != e.args.end(); it++) { os << ", " << *it; } 
          os << ")"; 
         } 
        }; 
        boost::apply_visitor(v(os), expr); 
        return os; 
    } 
    
  6. 可以使用BOOST_SPIRIT_DEBUG_NODES宏命名規則

    BOOST_SPIRIT_DEBUG_NODES(
        (start)(expression)(identifier)(literal)(quotedString) 
        (equal)(lowerGreater)(shift)(addSub)(multDivMod)(value) 
    ) 
    
  7. 您應該包括spirit/include/目錄,然後繼電器spirit/home/phoenix/include/而不是直接包括它們。

這裏是一個完全工作的樣本,也改善了可讀性的語法規則Live On Coliru

//#define BOOST_SPIRIT_DEBUG 
#define BOOST_SPIRIT_USE_PHOENIX_V3 

#include <boost/fusion/include/adapt_struct.hpp> 
#include <boost/fusion/include/io.hpp> 
#include <boost/spirit/include/phoenix.hpp> 
#include <boost/spirit/include/qi.hpp> 
#include <boost/variant.hpp> 
#include <iostream> 
#include <string> 
#include <vector> 

namespace qi = boost::spirit::qi; 
namespace phx = boost::phoenix; 
namespace ascii = boost::spirit::ascii; 

enum class UnaryOperator 
{ 
    NOT, 
    PLUS, 
    MINUS, 
}; 

std::ostream& operator<<(std::ostream& os, const UnaryOperator op) 
{ 
    switch (op) 
    { 
     case UnaryOperator::NOT: return os << "!"; 
     case UnaryOperator::PLUS: return os << "+"; 
     case UnaryOperator::MINUS: return os << "-"; 
    } 

    assert(false); 
} 

enum class BinaryOperator 
{ 
    ADD,  SUBTRACT,  MULTIPLY, DIVIDE, 
    MODULO, 
    LEFT_SHIFT, RIGHT_SHIFT, 
    EQUAL,  NOT_EQUAL, 
    LOWER,  LOWER_EQUAL, 
    GREATER, GREATER_EQUAL, 
}; 

std::ostream& operator<<(std::ostream& os, const BinaryOperator op) 
{ 
    switch (op) 
    { 
     case BinaryOperator::ADD:   return os << "+"; 
     case BinaryOperator::SUBTRACT:  return os << "-"; 
     case BinaryOperator::MULTIPLY:  return os << "*"; 
     case BinaryOperator::DIVIDE:  return os << "/"; 
     case BinaryOperator::MODULO:  return os << "%"; 
     case BinaryOperator::LEFT_SHIFT: return os << "<<"; 
     case BinaryOperator::RIGHT_SHIFT: return os << ">>"; 
     case BinaryOperator::EQUAL:   return os << "=="; 
     case BinaryOperator::NOT_EQUAL:  return os << "!="; 
     case BinaryOperator::LOWER:   return os << "<"; 
     case BinaryOperator::LOWER_EQUAL: return os << "<="; 
     case BinaryOperator::GREATER:  return os << ">"; 
     case BinaryOperator::GREATER_EQUAL: return os << ">="; 
    } 

    assert(false); 
} 

typedef boost::variant< 
    double, 
    int, 
    std::string 
> Literal; 

struct Identifier 
{ 
    std::string name; 
}; 
BOOST_FUSION_ADAPT_STRUCT(Identifier, (std::string, name)) 

struct UnaryOperation; 
struct BinaryOperation; 
struct FunctionCall; 

typedef boost::variant< 
    Literal, 
    Identifier, 
    boost::recursive_wrapper<UnaryOperation>, 
    boost::recursive_wrapper<BinaryOperation>, 
    boost::recursive_wrapper<FunctionCall> 
> Expression; 

struct UnaryOperation 
{ 
    Expression rhs; 
    UnaryOperator op; 
}; 
BOOST_FUSION_ADAPT_STRUCT(UnaryOperation, (Expression,rhs)(UnaryOperator,op)) 

struct BinaryOperation 
{ 
    Expression rhs; 
    BinaryOperator op; 
    Expression lhs; 

    BinaryOperation() {} 
    BinaryOperation(Expression rhs, BinaryOperator op, Expression lhs) : rhs(rhs), op(op), lhs(lhs) {} 
}; 
BOOST_FUSION_ADAPT_STRUCT(BinaryOperation, (Expression,rhs)(BinaryOperator,op)(Expression,lhs)) 

struct FunctionCall 
{ 
    Identifier functionName; 
    std::vector<Expression> args; 
}; 
BOOST_FUSION_ADAPT_STRUCT(FunctionCall, (Identifier, functionName)(std::vector<Expression>, args)) 

struct Program 
{ 
    std::vector<Expression> statements; 
}; 
BOOST_FUSION_ADAPT_STRUCT(Program, (std::vector<Expression>, statements)) 

std::ostream& operator<<(std::ostream& os, const Expression& expr) 
{ 
    os << "Expression "; 
    struct v : boost::static_visitor<> { 
     v(std::ostream& os) : os(os) {} 
     std::ostream& os; 

     void operator()(Literal   const& e) const { os << "(literal: " << e       << ")"; } 
     void operator()(Identifier  const& e) const { os << "(identifier: " << e.name      << ")"; } 
     void operator()(UnaryOperation const& e) const { os << "(unary op: " << boost::fusion::as_vector(e) << ")"; } 
     void operator()(BinaryOperation const& e) const { os << "(binary op: " << boost::fusion::as_vector(e) << ")"; } 
     void operator()(FunctionCall const& e) const { 
      os << "(function call: " << e.functionName << "("; 
      if (e.args.size() > 0) os << e.args.front(); 
      for (auto it = e.args.begin() + 1; it != e.args.end(); it++) { os << ", " << *it; } 
      os << ")"; 
     } 
    }; 
    boost::apply_visitor(v(os), expr); 
    return os; 
} 

std::ostream& operator<<(std::ostream& os, const Program& prog) 
{ 
    os << "Program" << std::endl << "{" << std::endl; 
    for (const Expression& expr : prog.statements) 
    { 
     std::cout << "\t" << expr << std::endl; 
    } 
    os << "}" << std::endl; 

    return os; 
} 

template<typename Iterator, typename Skipper> 
struct BoltGrammar : qi::grammar<Iterator, Skipper, Program()> 
{ 
    BoltGrammar() : BoltGrammar::base_type(start, "start") 
    { 
     using namespace qi::labels; 

     equalOp.add 
      ("==", BinaryOperator::EQUAL) 
      ("!=", BinaryOperator::NOT_EQUAL); 
     lowerGreaterOp.add 
      ("<", BinaryOperator::LOWER) 
      ("<=", BinaryOperator::LOWER_EQUAL) 
      (">", BinaryOperator::GREATER) 
      (">=", BinaryOperator::GREATER_EQUAL); 
     shiftOp.add 
      ("<<", BinaryOperator::LEFT_SHIFT) 
      (">>", BinaryOperator::RIGHT_SHIFT); 
     addSubOp.add 
      ("+", BinaryOperator::ADD) 
      ("-", BinaryOperator::SUBTRACT); 
     multDivModOp.add 
      ("*", BinaryOperator::MULTIPLY) 
      ("/", BinaryOperator::DIVIDE) 
      ("%", BinaryOperator::MODULO); 

     equal  = lowerGreater [ _val=_1 ] >> -(equalOp  >> lowerGreater) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ]; 
     lowerGreater = shift  [ _val=_1 ] >> -(lowerGreaterOp >> shift)  [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ]; 
     shift  = addSub  [ _val=_1 ] >> -(shiftOp  >> addSub)  [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ]; 
     addSub  = multDivMod [ _val=_1 ] >> -(addSubOp  >> multDivMod) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ]; 
     multDivMod = value  [ _val=_1 ] >> -(multDivModOp >> value)  [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ]; 

     addSub  = multDivMod >> -(addSubOp  >> multDivMod); 
     multDivMod = value  >> -(multDivModOp >> value); 

     addSub  = multDivMod >> -(addSubOp  >> multDivMod) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ]; 
     multDivMod = value  >> -(multDivModOp >> value)  [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ]; 

     start  = qi::eps >> -expression % ';'; 
     expression = '(' >> expression >> ')' | equal | value; 
     value  = identifier | literal; 
     identifier = qi::lexeme[ascii::char_("a-zA-Z") >> *ascii::char_("0-9a-zA-Z")]; 

     qi::real_parser<double, qi::strict_real_policies<double> > strict_double; 

     literal  = quotedString | strict_double | qi::int_; 
     quotedString = qi::lexeme['"' >> +(ascii::char_ - '"') >> '"']; 

     qi::on_error<qi::fail>(start, std::cout << phx::val("Error! Expecting: ") << _4 << phx::val(" here: \"") << phx::construct<std::string>(_3, _2) << phx::val("\"") << std::endl); 

     BOOST_SPIRIT_DEBUG_NODES((start)(expression)(identifier)(literal)(quotedString) 
       (equal)(lowerGreater)(shift)(addSub)(multDivMod)(value) 
       ) 
    } 

    qi::symbols<char, BinaryOperator> equalOp, lowerGreaterOp, shiftOp, addSubOp, multDivModOp; 
    qi::rule<Iterator, Skipper, Expression()> equal, lowerGreater, shift, addSub, multDivMod; 
    qi::rule<Iterator, Skipper, Expression()> value; 

    qi::rule<Iterator, Skipper, Program()>  start; 
    qi::rule<Iterator, Skipper, Expression()> expression; 
    qi::rule<Iterator, Skipper, Identifier()> identifier; 
    qi::rule<Iterator, Skipper, Literal()>  literal; 
    qi::rule<Iterator, Skipper, std::string()> quotedString; 
}; 

typedef std::string::iterator Iterator; 
typedef boost::spirit::ascii::space_type Skipper; 

int main() 
{ 
    BoltGrammar<Iterator, Skipper> grammar; 

    std::string str("3; 4.2; \"lounge <c++>\"; 3 + 4;"); 

    Program prog; 
    Iterator iter = str.begin(), last = str.end(); 
    bool r = phrase_parse(iter, last, grammar, ascii::space, prog); 

    if (r && iter == last) 
    { 
     std::cout << "Parsing succeeded: " << prog << std::endl; 
    } 
    else 
    { 
     std::cout << "Parsing failed, remaining: " << std::string(iter, last) << std::endl; 
    } 

    return 0; 
} 

打印:

Parsing succeeded: Program 
{ 
    Expression (literal: 3) 
    Expression (literal: 4.2) 
    Expression (literal: lounge <c++>) 
    Expression (binary op: (Expression (literal: 3) + Expression (literal: 4))) 
}