2013-10-06 419 views
11

Therearealreadyquestions這裏在Stackoverflow問爲什麼basic_fstream<uint8_t>不起作用。答案是:char_traits僅針對charwchar_t(在C++ 11中加char16_t,char32_t),您應該堅持使用basic_fstream<char>來讀取二進制數據並在需要時進行轉換。爲什麼這個專門的char_traits <uint8_t>和codecvt <uint8_t>用於basic_ifstream模板throw std :: bad_cast?

好吧,它不夠好! :)

無答案(我能找到)的說如何專門char_traits<uint8_t>,用basic_fstream模板中使用它,或者如果它甚至有可能。所以我想我會嘗試自己實現它。

在Windows 7 64bit上使用Visual Studio Express 2013 RC時,以及在Kubuntu GNU/Linux 13.04 64bit上使用g ++ - 4.7時,以下編譯沒有錯誤。但是它會在運行時拋出一個std :: bad_cast異常。我沒有使用libC++來訪問clang ++來測試這個組合。

#include <cinttypes> 
#include <cstring> 

#include <algorithm> 
#include <fstream> 
#include <iostream> 
#include <locale> 

#ifdef _WIN32 
    #define constexpr 
    #define NOEXCEPT throw() 
#else 
    #define NOEXCEPT noexcept 
#endif 

// Change this to char and it works. 
using byte_type = std::uint8_t; 

namespace std 
{ 
    // Specialization of std::char_traits 
    template <> struct char_traits<std::uint8_t> 
    { 
     using char_type = std::uint8_t; 
     using int_type = int; 
     using off_type = std::streamoff; 
     using pos_type = std::streampos; 
     using state_type = std::mbstate_t; 

     static void assign(char_type& value1, const char_type& value2) 
     { 
      value1 = value2; 
     } 

     static char_type* assign(char_type* ptr, std::size_t count, char_type value) 
     { 
      return static_cast<char_type*>(std::memset(ptr, value, count)); 
     } 

     static constexpr bool eq(const char_type& value1, const char_type& value2) NOEXCEPT 
     { 
      return value1 == value2; 
     } 

     static constexpr bool lt(const char_type value1, const char_type value2) NOEXCEPT 
     { 
      return value1 < value2; 
     } 

     static std::size_t length(const char_type* ptr) 
     { 
      std::size_t i = 0; 
      while (!eq(ptr[i], char_type())) 
      { 
       ++i; 
      } 
      return i; 
     } 

     static int compare(const char_type* ptr1, const char_type* ptr2, std::size_t count) 
     { 
      return std::memcmp(ptr1, ptr2, count); 
     } 

     static const char_type* find(const char_type* ptr, std::size_t count, const char_type& value) 
     { 
      return static_cast<const char_type*>(std::memchr(ptr, value, count)); 
     } 

     static char_type* move(char_type* dest, const char_type* src, std::size_t count) 
     { 
      return static_cast<char_type*>(std::memmove(dest, src, count)); 
     } 

     static char_type* copy(char_type* dest, const char_type* src, std::size_t count) 
     { 
      return static_cast<char_type*>(std::memcpy(dest, src, count)); 
     } 

     static constexpr char_type to_char_type(const int_type& value) NOEXCEPT 
     { 
      return static_cast<char_type>(value); 
     } 

     static constexpr int_type to_int_type(const char_type& value) NOEXCEPT 
     { 
      return static_cast<int_type>(value); 
     } 

     static constexpr bool eq_int_type(const int_type& value1, const int_type& value2) NOEXCEPT 
     { 
      return value1 == value2; 
     } 

     static constexpr int_type eof() NOEXCEPT 
     { 
      return static_cast<int_type>(std::char_traits<char>::eof()); 
     } 

     static constexpr int_type not_eof(const int_type& value) NOEXCEPT 
     { 
      return (value == eof()) ? 0 : value; 
     } 
    }; 

    // Specialization of std::codecvt 
    template<> class codecvt< std::uint8_t, char, std::mbstate_t > : public locale::facet, public codecvt_base 
    { 
    public: 
     using internal_type = std::uint8_t; 
     using external_type = char; 
     using state_type = std::mbstate_t; 

     static std::locale::id id; 

     codecvt(std::size_t refs = 0) 
      : locale::facet(refs) 
     {} 

     std::codecvt_base::result out(state_type& state, const internal_type* from, const internal_type* from_end, const internal_type*& from_next, external_type* to, external_type* to_end, external_type*& to_next) const 
     { 
      return do_out(state, from, from_end, from_next, to, to_end, to_next); 
     } 

     std::codecvt_base::result in(state_type& state, const external_type* from, const external_type* from_end, const external_type*& from_next, internal_type* to, internal_type* to_end, internal_type*& to_next) const 
     { 
      return do_in(state, from, from_end, from_next, to, to_end, to_next); 
     } 

     std::codecvt_base::result unshift(state_type& state, external_type* to, external_type* to_end, external_type*& to_next) const 
     { 
      return do_unshift(state, to, to_end, to_next); 
     } 

     int length(state_type& state, const external_type* from, const external_type* from_end, std::size_t max) const 
     { 
      return do_length(state, from, from_end, max); 
     } 

     int max_length() const NOEXCEPT 
     { 
      return do_max_length(); 
     } 

     int encoding() const NOEXCEPT 
     { 
      return do_encoding(); 
     } 

     bool always_noconv() const NOEXCEPT 
     { 
      return do_always_noconv(); 
     } 

    protected: 
     virtual ~codecvt() {} 
     virtual std::codecvt_base::result do_out(state_type& state, const internal_type* from, const internal_type* from_end, const internal_type*& from_next, external_type* to, external_type* to_end, external_type*& to_next) const; 
     virtual std::codecvt_base::result do_in(state_type& state, const external_type* from, const external_type* from_end, const external_type*& from_next, internal_type* to, internal_type* to_end, internal_type*& to_next) const; 
     virtual std::codecvt_base::result do_unshift(state_type& state, external_type* to, external_type* to_end, external_type*& to_next) const; 
     virtual int do_length(state_type& state, const external_type* from, const external_type* from_end, std::size_t max) const; 
     virtual int do_max_length() const NOEXCEPT; 
     virtual int do_encoding() const NOEXCEPT; 
     virtual bool do_always_noconv() const NOEXCEPT; 
    }; // class codecvt 

    locale::id codecvt< std::uint8_t, char, std::mbstate_t >::id; 

    codecvt_base::result codecvt< std::uint8_t, char, std::mbstate_t >::do_out(state_type& state, const internal_type* from, const internal_type* from_end, const internal_type*& from_next, external_type* to, external_type* to_end, external_type*& to_next) const 
    { 
     (void) state; (void) from_end; (void) to_end; // Unused parameters 
     from_next = from; 
     to_next = to; 
     return codecvt_base::noconv; 
    } 

    codecvt_base::result codecvt< std::uint8_t, char, std::mbstate_t >::do_in(state_type& state, const external_type* from, const external_type* from_end, const external_type*& from_next, internal_type* to, internal_type* to_end, internal_type*& to_next) const 
    { 
     (void) state; (void) from_end; (void) to_end; // Unused parameters 
     from_next = from; 
     to_next = to; 
     return std::codecvt_base::noconv; 
    } 

    codecvt_base::result codecvt< std::uint8_t, char, std::mbstate_t >::do_unshift(state_type& state, external_type* to, external_type* to_end, external_type*& to_next) const 
    { 
     (void) state; (void) to_end; // Unused perameters 
     to_next = to; 
     return std::codecvt_base::noconv; 
    } 

    int codecvt< std::uint8_t, char, std::mbstate_t >::do_length(state_type& state, const external_type* from, const external_type* from_end, std::size_t max) const 
    { 
     (void) state; // Unused parameter 
     return static_cast<int>(std::min<std::size_t>(max, static_cast<std::size_t>(from_end - from))); 
    } 

    int codecvt< std::uint8_t, char, std::mbstate_t >::do_max_length() const NOEXCEPT 
    { 
     return 1; 
    } 

    int codecvt< std::uint8_t, char, std::mbstate_t >::do_encoding() const NOEXCEPT 
    { 
     return 1; 
    } 

    bool codecvt< std::uint8_t, char, std::mbstate_t >::do_always_noconv() const NOEXCEPT 
    { 
     return true; 
    } 
} // namespace std 


int main(int argc, char *argv []) 
{ 
    if (argc < 2) 
    { 
     std::cerr << argv[0] << " {file to read}" << std::endl; 
     return EXIT_FAILURE; 
    } 

    using stream_type = std::basic_ifstream< byte_type, std::char_traits<byte_type> >; 

    stream_type stream(argv[1], std::ifstream::in | std::ifstream::binary); 
    if (stream.is_open() == false) 
    { 
     std::cerr << "file not found" << std::endl; 
     return EXIT_FAILURE; 
    } 
    stream.exceptions(std::ifstream::badbit); 

    static const auto read_size = 4; 
    stream_type::char_type buffer[read_size]; 

    stream.read(buffer, read_size); 

    std::cout << "Got:" << stream.gcount() << std::endl; 

    return EXIT_SUCCESS; 
} 

編譯和G ++運行和GNU/Linux:

$ g++ -std=c++11 -Wall -Wextra -pedantic stream.cpp -o stream && ./stream /dev/random 
terminate called after throwing an instance of 'std::bad_cast' 
    what(): std::bad_cast 
Aborted (core dumped) 

而且與Visual Studio Express的RC 2013:

First-chance exception at 0x76A6C41F in traits test.exe: Microsoft C++ exception: std::bad_cast at memory location 0x0038F978. 
Unhandled exception at 0x76A6C41F in traits test.exe: Microsoft C++ exception: std::bad_cast at memory location 0x0038F978. 

更改byte_typechar給出預期輸出:

$ g++ -std=c++11 -Wall -Wextra -pedantic stream.cpp -o stream && ./stream /dev/random 
Got:4 

這是爲什麼拋出std :: bad_cast,我該如何解決它?

+0

我正面臨着之前的任何地方相同的問題比你和這篇文章的回答@Cubbi是非常有用的!只是一個附註:在我看來,即使沒有'std :: char_traits '的專業化,即std :: codecvt和'imbue'技巧的特殊化,整個事情也能運行。我錯了嗎? –

回答

5

我能夠在我的gcc(AIX上的4.7.2)上重現bad_cast。

你得到它是GCC庫實現者優化basic_filebuf::xsgetn(被稱爲from basic_istream::read)來調用普通的C fread從文件中讀取,如果你流的語言環境是不可轉換(也就是,你不是要原因讀一個UTF-8或者也許GB18030文件到一個UTF-32字符串或者什麼),這絕對是正確的事情。要確定它是否爲非轉換,它會檢查上流傳入您的流中的語言環境的codecvt方面...哪些不存在。

你可以通過執行

std::cout << std::use_facet< 
        std::codecvt<std::uint8_t, char, std::mbstate_t> 
      >(stream.getloc()).always_noconv() << '\n'; 

我沒有訪問到Visual Studio,看看它爲什麼在那裏工作(做,他們只是叫basic_filebuf::sgetc()每一個字符在basic_fstream::read()?)再現異常,但使用在任何情況下,basic_filestream都需要爲內部和外部類型的組合提供一個codecvt方面(在這種情況下爲uint8_tchar)。

編輯:就快,最後缺少的部分是線

stream.imbue(std::locale(stream.getloc(), 
      new std::codecvt<uint8_t, char, std::mbstate_t>)); 

隨時隨地stream.read之前,或者灌輸全球:std::locale::global(std::locale(std::locale(), new std::codecvt<uint8_t, char, std::mbstate_t>));您構建basic_ifstream

+0

感謝您的建議。我已經更新了代碼以包含一個專門的codecvt ,但它現在在g ++和Visual Studio中都拋出std :: bad_cast。 –

+0

@DrTwox在添加imbue後爲我工作(參見編輯) – Cubbi

+0

謝謝!如果只有我可以不止一次地升級! –

相關問題