2014-01-30 49 views
0

考慮:如何在UTF-16字符串轉換爲UTF-8在C++

STDMETHODIMP CFileSystemAPI::setRRConfig(BSTR config_str, VARIANT* ret) 
{ 
mReportReaderFactory.reset(new sbis::report_reader::ReportReaderFactory()); 

USES_CONVERSION; 
std::string configuration_str = W2A(config_str); 

但config_str我得到了UTF-16的字符串。我如何在這段代碼中將它轉換爲UTF-8?

回答

-2
void encode_unicode_character(char* buffer, int* offset, wchar_t ucs_character) 
{ 
    if (ucs_character <= 0x7F) 
    { 
     // Plain single-byte ASCII. 
     buffer[(*offset)++] = (char) ucs_character; 
    } 
    else if (ucs_character <= 0x7FF) 
    { 
     // Two bytes. 
     buffer[(*offset)++] = 0xC0 | (ucs_character >> 6); 
     buffer[(*offset)++] = 0x80 | ((ucs_character >> 0) & 0x3F); 
    } 
    else if (ucs_character <= 0xFFFF) 
    { 
     // Three bytes. 
     buffer[(*offset)++] = 0xE0 | (ucs_character >> 12); 
     buffer[(*offset)++] = 0x80 | ((ucs_character >> 6) & 0x3F); 
     buffer[(*offset)++] = 0x80 | ((ucs_character >> 0) & 0x3F); 
    } 
    else if (ucs_character <= 0x1FFFFF) 
    { 
     // Four bytes. 
     buffer[(*offset)++] = 0xF0 | (ucs_character >> 18); 
     buffer[(*offset)++] = 0x80 | ((ucs_character >> 12) & 0x3F); 
     buffer[(*offset)++] = 0x80 | ((ucs_character >> 6) & 0x3F); 
     buffer[(*offset)++] = 0x80 | ((ucs_character >> 0) & 0x3F); 
    } 
    else if (ucs_character <= 0x3FFFFFF) 
    { 
     // Five bytes. 
     buffer[(*offset)++] = 0xF8 | (ucs_character >> 24); 
     buffer[(*offset)++] = 0x80 | ((ucs_character >> 18) & 0x3F); 
     buffer[(*offset)++] = 0x80 | ((ucs_character >> 12) & 0x3F); 
     buffer[(*offset)++] = 0x80 | ((ucs_character >> 6) & 0x3F); 
     buffer[(*offset)++] = 0x80 | ((ucs_character >> 0) & 0x3F); 
    } 
    else if (ucs_character <= 0x7FFFFFFF) 
    { 
     // Six bytes. 
     buffer[(*offset)++] = 0xFC | (ucs_character >> 30); 
     buffer[(*offset)++] = 0x80 | ((ucs_character >> 24) & 0x3F); 
     buffer[(*offset)++] = 0x80 | ((ucs_character >> 18) & 0x3F); 
     buffer[(*offset)++] = 0x80 | ((ucs_character >> 12) & 0x3F); 
     buffer[(*offset)++] = 0x80 | ((ucs_character >> 6) & 0x3F); 
     buffer[(*offset)++] = 0x80 | ((ucs_character >> 0) & 0x3F); 
    } 
    else 
    { 
     // Invalid char; don't encode anything. 
    } 
} 

ISO10646-2012它是所有你需要了解UCS。

+2

UCS不在問題中。而UCS不是UTF-16。您的代碼是否適用於UTF-16? – rubenvb

+0

@rubenvb,它工作肯定,你應該嘗試。 – kvv

+0

我沒有說utf-16是ucs,但它是utf-8的一部分。 – kvv

1

你可以做這樣的事情

std::string WstrToUtf8Str(const std::wstring& wstr) 
{ 
    std::string retStr; 
    if (!wstr.empty()) 
    { 
    int sizeRequired = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), -1, NULL, 0, NULL, NULL); 

    if (sizeRequired > 0) 
    { 
     std::vector<char> utf8String(sizeRequired); 
     int bytesConverted = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(),  
          -1, &utf8String[0], utf8String.size(), NULL, 
          NULL); 
     if (bytesConverted != 0) 
     { 
     retStr = &utf8String[0]; 
     } 
     else 
     { 
     std::stringstream err; 
     err << __FUNCTION__ 
      << " std::string WstrToUtf8Str failed to convert wstring '" 
      << wstr.c_str() << L"'"; 
     throw std::runtime_error(err.str()); 
     } 
    } 
    } 
    return retStr; 
} 

你可以給你的BSTR的功能作爲一個std :: wstring的