2013-10-05 43 views
2

我正在努力在C++字符串文字分析器與提升精神。c + + boost精神attr_cast不會調用預期的tranform_attribute

這是我到目前爲止有:

#include <boost/config/warning_disable.hpp> 
#include <boost/spirit/include/qi.hpp> 
#include <boost/spirit/include/phoenix.hpp> 
#include <boost/spirit/home/support/iterators/line_pos_iterator.hpp> 
#include <boost/spirit/repository/include/qi_confix.hpp> 
#include <boost/spirit/include/phoenix_fusion.hpp> 
#include <boost/spirit/include/phoenix_stl.hpp> 

using namespace boost::spirit; 

#include <boost/fusion/include/adapt_struct.hpp> 

//////////////////////////////// 
// extra facilities 
struct get_line_f 
{ 
    template <typename> struct result { typedef size_t type; }; 
    template <typename It> size_t operator()(It const& pos_iter) const 
    { 
     return get_line(pos_iter); 
    } 
}; 

namespace boost { namespace spirit { namespace traits 
{ 
    template <> 
    struct transform_attribute<uint16_t, std::string, qi::domain> 
    { 
     typedef std::string& type; 
     static std::string pre(uint16_t& d) { return "pre16"; } 
     static void post(uint16_t& val, std::string& attr) { attr = "unicode16"; } 
     static void fail(uint16_t&) {} 
    }; 
}}} 

namespace boost { namespace spirit { namespace traits 
{ 
    template <> 
    struct transform_attribute<uint32_t, std::string, qi::domain> 
    { 
     typedef std::string& type; 
     static std::string pre(uint32_t& d) { return "pre32"; } 
     static void post(uint32_t& val, std::string& attr) { attr = "unicode32"; } 
     static void fail(uint32_t&) {} 
    }; 
}}} 

// 
//////////////////////////////// 

struct RangePosition 
{ 
    RangePosition() 
     : beginLine(-1) 
     , endLine(-1) 
    { 
    } 

    size_t beginLine; 
    size_t endLine; 
}; 

struct String : public RangePosition 
{ 
    String() 
     : RangePosition() 
     , value() 
     , source() 
    { 
    } 

    std::string value; 
    std::string source; 
}; 

BOOST_FUSION_ADAPT_STRUCT(String, 
          (std::string, value) 
          (std::string, source) 
          (size_t,  beginLine) 
          (size_t,  endLine) 
         ) 

template <typename Iterator> 
struct source_string : qi::grammar<Iterator, String(), qi::space_type> 
{ 
    struct escape_symbols : qi::symbols<char, char> 
    { 
     escape_symbols() 
     { 
      add 
       ("\\\'" , '\'') 
       ("\\\"" , '\"') 
       ("\\\?" , '\?') 
       ("\\\\" , '\\') 
       ("\\0"  , '\0') 
       ("\\a"  , '\a') 
       ("\\b"  , '\b') 
       ("\\f"  , '\f') 
       ("\\n"  , '\n') 
       ("\\r"  , '\r') 
       ("\\t"  , '\t') 
       ("\\v"  , '\v') 
      ; 
     } 

    } escape_symbol; 

    source_string() : source_string::base_type(start) 
    { 
     using qi::raw; 
     using qi::_val; 
     using qi::_1; 
     using qi::space; 
     using qi::omit; 
     using qi::no_case; 
     using qi::attr_cast; 
     using qi::print; 

     namespace phx = boost::phoenix; 
     using phx::at_c; 
     using phx::begin; 
     using phx::end; 
     using phx::construct; 
     using phx::ref; 

     escape %= escape_symbol; 

     character %= (no_case["\\x"] >> hex12) 
        | ("\\" >> oct123) 
        | escape 
        | (print - (lit('"') | '\\')); 

     unicode %= ("\\u" >> attr_cast(hex4)) 
        | ("\\U" >> attr_cast(hex8)); 

     string_section %= '"' >> *(unicode | character) >> '"'; 

     string %= string_section % omit[*space]; 

     start = raw[ 
         string[at_c<0>(_val) = _1] 
        ] 
        [ 
         at_c<1>(_val) = construct<std::string>(begin(_1), end(_1)), 
         at_c<2>(_val) = get_line_(begin(_1)), 
         at_c<3>(_val) = get_line_(end(_1)) 
        ] 
     ; 
    } 

    boost::phoenix::function<get_line_f> get_line_; 
    qi::rule<Iterator, String(), qi::space_type> start; 
    qi::rule<Iterator, std::string()> escape; 
    qi::uint_parser<char, 16, 1, 2> hex12; 
    qi::uint_parser<uint16_t, 16, 4, 4> hex4; 
    qi::uint_parser<uint32_t, 16, 8, 8> hex8; 
    qi::uint_parser<char, 8, 1, 3> oct123; 
    qi::rule<Iterator, std::string()> character; 
    qi::rule<Iterator, std::string()> unicode; 
    qi::rule<Iterator, std::string()> string_section; 
    qi::rule<Iterator, std::string()> string; 
}; 

和我的測試代碼是

std::string str[] = 
{ 
    "\"\\u1234\\U12345678\"", 

    "\"te\"\"st\"", 
    "\"te\" \"st\"", 
    "\"te\" \n \"st\"", 
    "\"\"", 
    "\"\\\"\"", 
    "\"test\"", 
    "\"test\" something", 
    "\"\\\'\\\"\\\?\\\\\\a\\b\\f\\n\\r\\t\\v\"", 
    "\"\\x61cd\\X3012\\x7z\"", 
    "\"\\141cd\\06012\\78\\778\"", 
    "\"te", 
    "\"te\nst\"", 
    "\"test\\\"", 
    "\"te\\st\"", 
    // 
}; 

typedef line_pos_iterator<std::string::const_iterator> Iterator; 

std::ostringstream result; 

for (size_t i = 0; i < sizeof(str)/sizeof(str[0]); ++i) 
{ 
    source_string<Iterator> g; 
    Iterator iter(str[i].begin()); 
    Iterator end(str[i].end()); 

    String string; 
    bool r = phrase_parse(iter, end, g, qi::space, string); 
    if (r) 
     result << string.beginLine << "-" << string.endLine << ": " << string.value << " === " << string.source << "\n"; 
    else 
     result << "Parsing failed\n"; 
} 

有人可以幫我爲什麼在這條規則:

 unicode %= ("\\u" >> attr_cast(hex4)) 
        | ("\\U" >> attr_cast(hex8)); 

attr_cast不調用我已經定義的transform_attribute?

namespace boost { namespace spirit { namespace traits 
{ 
    template <> 
    struct transform_attribute<uint16_t, std::string, qi::domain> 
    { 
     typedef std::string& type; 
     static std::string pre(uint16_t& d) { return "pre16"; } 
     static void post(uint16_t& val, std::string& attr) { attr = "unicode16"; } 
     static void fail(uint16_t&) {} 
    }; 
}}} 

namespace boost { namespace spirit { namespace traits 
{ 
    template <> 
    struct transform_attribute<uint32_t, std::string, qi::domain> 
    { 
     typedef std::string& type; 
     static std::string pre(uint32_t& d) { return "pre32"; } 
     static void post(uint32_t& val, std::string& attr) { attr = "unicode32"; } 
     static void fail(uint32_t&) {} 
    }; 
}}} 

回答

1

使內建原語類型表現「奇怪」,看起來像一個VeryBadIdea™。

假設你只是想解碼我建議一個更簡單的方法使用語義動作,例如,

  • https://github.com/sehe/spirit-v2-json/blob/master/JSON.cpp#L102

    char_ = +(
         ~encoding::char_(L"\"\\")) [ qi::_val += qi::_1 ] | 
          qi::lit(L"\x5C") >> (     // \ (reverse solidus) 
          qi::lit(L"\x22") [ qi::_val += L'"' ] | // " quotation mark U+0022 
          qi::lit(L"\x5C") [ qi::_val += L'\\' ] | // \ reverse solidus U+005C 
          qi::lit(L"\x2F") [ qi::_val += L'/' ] | /// solidus   U+002F 
          qi::lit(L"\x62") [ qi::_val += L'\b' ] | // b backspace  U+0008 
          qi::lit(L"\x66") [ qi::_val += L'\f' ] | // f form feed  U+000C 
          qi::lit(L"\x6E") [ qi::_val += L'\n' ] | // n line feed  U+000A 
          qi::lit(L"\x72") [ qi::_val += L'\r' ] | // r carriage return U+000D 
          qi::lit(L"\x74") [ qi::_val += L'\t' ] | // t tab    U+0009 
          qi::lit(L"\x75")       // uXXXX    U+XXXX 
           >> _4HEXDIG [ qi::_val += qi::_1 ] 
    

    這似乎很容易適應你的使用情況。

現在,如果你堅持,首先包裹類型(這樣你就不會「重新定義」的精神實質種類);其次,定製container insertion特質,因爲std::string(或者說std::vector<char>?)是一個容器類型。

我不會推薦這個。我喜歡把事情「簡單」和邏輯放在一個地方。很顯然,在使用像Spirit這樣的解析器生成器時,這是一個「有趣的」事情,因爲這麼多的似乎在幕後「神奇地」出現。但是,這是抽象的本質。我不認爲我想在這裏「抽象」解碼unicode escapes:他們覺得它們屬於問題域,而不是工具。

+0

謝謝,你可以擴展你的例子,以顯示它將如何解析\ Unnnnnnnn,如果我想解析字符串不在wstring – gsf

+0

@gsf哦,我顯然連接了一箇舊的分支,我很快就離開了wstring有:)我最近的分支[helper functions](https://github.com/sehe/spirit-v2-json/blob/nowide/JSON.cpp#L48)來自[semantic action](https:// github .COM/sehe /精神-V2-JSON/BLOB/nowide/JSON.cpp#L42)。它也使用了相同的方法([Karma generator](https://github.com/sehe/spirit-v2-json/blob/nowide/JSON.cpp#L183)) – sehe

+0

這是我第一件事試過。出於某種原因,儘管它不僅添加了函數的值,而且還添加了原始結果。我不得不忽略它,但是當我有一個以上的角色時,他們開始壓倒自己。你確定這實際上工作嗎? – gsf

相關問題