2012-06-15 78 views
1

是否有任何一種XML解析器封裝庫允許在配置或運行時切換實際的XML解析器引擎,而不是強迫我在libxml2,expat或Xalan-C++之間進行選擇?XML解析器封裝

回答

1

我寫了類似的東西而回:

struct xerces; 
struct msxml; 
struct rapid; 
struct tiny; 
struct pugixml; 

template <typename T> struct platform_manager; 
template <typename T> double parse_file(std::string const& f, QueryPerfCounter& qpc); 

template<class T> 
void demo(std::string const& f, size_t N = 10) { 
    platform_manager<T> pm; 
    QueryPerfCounter qpc; 
    std::vector<double> timing_data; 
    timing_data.reserve(N); 
    std::generate_n(std::back_inserter(timing_data), N, std::tr1::bind(&parse_file<typename T>, f, qpc)); 
    adobe::Statistics<double> s(timing_data.begin(), timing_data.end()); 
    std::cout << "Iteration count: " << s.count() << " Mean time: " << s.mean() << "s. Variance: " << s.variance() << "s.\n"; 
} 
/***************************************************************/ 
template <> 
struct platform_manager<msxml> { 
    platform_manager() {   
     if (FAILED(CoInitialize(NULL))) 
      throw std::runtime_error("CoCreateInstance failed"); 
    } 

    ~platform_manager() { 
     CoUninitialize(); 
    } 
}; 

template<> 
double parse_file<msxml>(std::string const& f, QueryPerfCounter& qpc) { 
    CComPtr<IXMLDOMDocument> pXMLDom; 
    HRESULT hr = CoCreateInstance(__uuidof(MSXML2::DOMDocument60), NULL, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&pXMLDom)); 
    CComPtr<IXMLDOMParseError> pXMLErr; 
    VARIANT_BOOL varStatus; 
    qpc.Start(); 
    if (FAILED(pXMLDom->load(CComVariant(f.c_str()), &varStatus))) 
     std::cout << "Parsing failed" << std::endl; 
    qpc.Stop(); 
    return qpc.Duration(QueryPerfCounter::seconds); 
} 

/***************************************************************/ 
#include <xercesc/parsers/XercesDOMParser.hpp> 
#include <xercesc/dom/DOM.hpp> 
#include <xercesc/sax/HandlerBase.hpp> 
#include <xercesc/util/XMLString.hpp> 
#include <xercesc/util/PlatformUtils.hpp> 

#ifdef XERCES_CPP_NAMESPACE_USE 
XERCES_CPP_NAMESPACE_USE 
#endif 

template <> 
struct platform_manager<xerces> { 
    platform_manager() try { 
     XMLPlatformUtils::Initialize(); 
    } catch (const XMLException& toCatch) { 
     char* message = XMLString::transcode(toCatch.getMessage()); 
     std::cout << "Failed to init: " << XMLString::transcode(message) << std::endl; 
     XMLString::release(&message); 
    } 

    ~platform_manager() { 
     XMLPlatformUtils::Terminate(); 
    } 
}; 

template<> 
double parse_file<xerces>(std::string const& f, QueryPerfCounter& qpc) { 
    double duration = 0; 
    std::tr1::shared_ptr<XercesDOMParser> parser(new XercesDOMParser()); 
    parser->setValidationScheme(XercesDOMParser::Val_Always); 
    parser->setDoNamespaces(true); // optional 

    std::tr1::shared_ptr<ErrorHandler> errHandler(new HandlerBase()); 
    parser->setErrorHandler(errHandler.get()); 

    try { 
     qpc.Start(); 
     parser->parse(f.c_str()); 
     qpc.Stop(); 
     duration = qpc.Duration(QueryPerfCounter::seconds); 
    } 
    catch (const XMLException& toCatch) { 
     char* message = XMLString::transcode(toCatch.getMessage()); 
     std::cout << "Exception message is: \n" 
      << message << "\n"; 
     XMLString::release(&message); 
    } 
    catch (const DOMException& toCatch) { 
     char* message = XMLString::transcode(toCatch.msg); 
     std::cout << "Exception message is: \n" 
      << message << "\n"; 
     XMLString::release(&message); 
    } 
    catch (...) { 
     std::cout << "Unexpected Exception \n" ; 
    } 
    return duration; 
} 

/***************************************************************/ 
#include "rapidxml.hpp" 
#include <vector> 
#include <fstream> 
#include <iterator> 

template <> 
struct platform_manager<rapid> {}; 

enum size_hint { B = 1, KB = 1024, MB = 1024 * 1024 }; 

double file_size(std::ifstream& f, size_hint factor = MB) { 
    f.seekg (0, std::ios::end); 
    size_t length = f.tellg(); 
    f.seekg (0, std::ios::beg); 
    return double(length)/factor; 
} 

template<> 
double parse_file<rapid>(std::string const& f, QueryPerfCounter& qpc) { 
    double duration = 0; 
    rapidxml::xml_document<> doc; 
    try { 
     qpc.Start(); 
     std::ifstream myfile(f.c_str()); 
     myfile.seekg (0, std::ios::end); 
     size_t length = myfile.tellg(); 
     myfile.seekg (0, std::ios::beg); 
     std::vector<char> buffer(length); 
     myfile.read(& buffer[0], length); 
     //buffer.reserve(length); 
     //buffer.insert(std::istreambuf_iterator<char>(myfile)), std::istreambuf_iterator<char>()); 
     //std::copy(std::istreambuf_iterator<char>(myfile), std::istreambuf_iterator<char>(), std::back_insert_iterator(buffer)); 
     buffer.push_back('\0'); 
     qpc.Stop(); 
     duration += qpc.Duration(QueryPerfCounter::seconds); 
     //std::cout << "Buffer load time: " << duration << "s" << std::endl; 

     //QueryPerfCounter qpc; 
     qpc.Start(); 
     doc.parse<rapidxml::parse_non_destructive>(&buffer[0]); 
     qpc.Stop(); 
     duration += qpc.Duration(QueryPerfCounter::seconds);   
    } catch (rapidxml::parse_error const& e) { 
     std::cout << e.what() << std::endl; 
    } catch (std::exception const& e) { 
     std::cout << e.what() << std::endl; 
    } 
    return duration; 
} 
/***************************************************************/ 
template <> 
struct platform_manager<tiny> {}; 

template<> 
double parse_file<tiny>(std::string const& f, QueryPerfCounter& qpc) { 
    tinyxml2::XMLDocument doc; 
    qpc.Start(); 
    doc.LoadFile(f.c_str()); 
    doc.PrintError(); // emits nothing on success 
    qpc.Stop(); 
    return qpc.Duration(QueryPerfCounter::seconds); 
} 
/***************************************************************/ 
struct banner_printer { 
    banner_printer(std::string const& libname, std::string const& input) : lib(libname), in(input) { 
     std::cout << "/*+------------------- BEGIN test for " << lib << " with file: " << in << " -------------------+*/" << std::endl; 
    } 
    ~banner_printer() { 
     std::cout << "/*+------------------- END test for " << lib << " with file: " << in << " -------------------+*/" << std::endl; 
    } 
private: 
    std::string lib, in; 
}; 
/***************************************************************/ 
#include "pugixml.hpp" 

template <> 
struct platform_manager<pugixml> {}; 

template<> 
double parse_file<pugixml>(std::string const& f, QueryPerfCounter& qpc) { 
    pugi::xml_document doc; 
    qpc.Start(); 
    pugi::xml_parse_result result = doc.load_file(f.c_str()); 
    qpc.Stop(); 
    if (!result) { 
     std::cout << "XML [" << f << "] parsed with errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n"; 
     std::cout << "Error description: " << result.description() << "\n"; 
     std::cout << "Error offset: " << result.offset << " (error at offset [..." << (result.offset) << "]\n\n"; 
    } 
    return qpc.Duration(QueryPerfCounter::seconds); 
} 
/***************************************************************/ 

int main() { 
    std::vector<std::string> v = parse_catalog("D:/Work/xml_parsers/perfcompare/benchmark/catalog.txt"); 
    std::for_each(v.begin(), v.end(), [](std::string const& s) { 
    { 
     std::ifstream f(s); 
     std::cout << "Input file name: " << s << " size: " << file_size(f) << "MB\n\n"; 
    } 
    { 
     banner_printer b("xerces", s); 
     demo<xerces>(s); 
    } 
    { 
     banner_printer b("rapid", s); 
     demo<rapid>(s); 
    } 
    { 
     banner_printer b("tiny", s); 
     demo<tiny>(s); 
    } 
    { 
     banner_printer b("pugi", s); 
     demo<pugixml>(s); 
    } 
    { 
     banner_printer b("MSXML6", s); 
     demo<msxml>(s); 
    } 
    } 
    ); 
    //expat_demo(argc, argv); 
    return 0; 
} 

它可能會或可能不會幫助你開始。我已經跳過頭文件和其他一些瑣事。我試圖保持界面簡單和完全相同。這意味着一些庫需要額外的輔助函數。

+0

非常有幫助的代碼。謝謝。人們應該由幾十人來提高這一點。 – BSalita