我在Linux中測試了代碼。
#include <iostream>
#include <map>
std::string str_get_substr(std::string str, int pos, int len){
std::map <int, std::string> get_char; int num_char = 0;
std::wstring wstr_char, wstr_get, tmp; std::string str_char;
std::wstring wsTmp(str.begin(), str.end()); wstr_char = wsTmp;
for (int i = 0; i < (int)wstr_char.size(); i++) {
if ((int)wstr_char[num_char] < 0){
tmp = wstr_char.substr(num_char, 2);
std::string sTmp(tmp.begin(), tmp.end()); str_char = sTmp;
get_char[i] = str_char; num_char = num_char + 2;
} else {
tmp = wstr_char.substr(num_char, 1);
std::string sTmp(tmp.begin(), tmp.end()); str_char = sTmp;
get_char[i] = str_char; num_char = num_char + 1;
}
if (num_char == (int)wstr_char.size()){break;}
} str_char.clear();
for (int i = 0; i < len; i++) {str_char = str_char + get_char[pos + i];}
return str_char;
};
int main (int argc, char * argv[]) {
std::string string = "äbcdefg";
std::cout << str_get_substr(string, 0, 1) << std::endl;
return 0;
}
除非你有一個庫爲你做,你最好的選擇可能是編寫一個函數讀取一個UTF-8字符串自己的第一個字符,通過讀取替代字節,並將其合成爲一個單個32位整數 - unicode代碼點 - 這是您的第一個字符。例如,請參閱此答案:http://stackoverflow.com/questions/2948308/how-do-i-read-utf-8-characters-via-a-pointer/2953960#2953960 – MicroVirus
大小寫轉換(和其他操作)在人物上是一個壞主意。它會打破字符組合,併爲字符與1:多映射(例如'ß'=>'SS') –
完全同意Mihai。然而,在這種情況下,我需要模仿維基百科的內部行爲進行分析,所以這是一條可以採取的途徑。 – Adrian