我想base64編碼包含泰文字符的utf8字符串。我正在使用內置的btoa
功能的瀏覽器。它適用於ascii文本,但泰國正在導致它拋出一個異常INVALID_CHARACTER_ERR: DOM Exception 5
。Javascript Base64編碼UTF8字符串在webkit/safari中失敗
下面是一個失敗(看起來像一個「n」是泰國的字符)的樣本
btoa('aก')
什麼我需要做的Base64編碼,非ASCII字符串?
我想base64編碼包含泰文字符的utf8字符串。我正在使用內置的btoa
功能的瀏覽器。它適用於ascii文本,但泰國正在導致它拋出一個異常INVALID_CHARACTER_ERR: DOM Exception 5
。Javascript Base64編碼UTF8字符串在webkit/safari中失敗
下面是一個失敗(看起來像一個「n」是泰國的字符)的樣本
btoa('aก')
什麼我需要做的Base64編碼,非ASCII字符串?
不幸的是,btoa/atob沒有在任何標準中指定,但firefox和webkit中的實現都在多字節字符上失敗,所以即使現在指定這些內置函數將不能支持多字節字符(作爲輸入並且輸出字符串必然會改變)。
這似乎是你唯一的選擇是將推出自己的base64編碼+解碼程序
var Base64 = {
encode: function(s) {
return btoa(unescape(encodeURIComponent(s)));
},
decode: function(s) {
return decodeURIComponent(escape(atob(s)));
}
};
嗯,我應該在這裏說逃避會被拋棄。 https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/escape 請分享一個解決方案,而不使用轉義。 – TechJS 2015-09-07 05:55:43
我知道這是老了,但我最近找UTF8-to-Base64編碼器也是如此。我在http://www.webtoolkit.info/javascript-base64.html找到了一個方便的小腳本,並在http://jsbase64.codeplex.com/處獲得了性能改進版本。
下面是腳本:
var B64 = {
alphabet: 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/=',
lookup: null,
ie: /MSIE /.test(navigator.userAgent),
ieo: /MSIE [67]/.test(navigator.userAgent),
encode: function (s) {
var buffer = B64.toUtf8(s),
position = -1,
len = buffer.length,
nan0, nan1, nan2, enc = [, , , ];
if (B64.ie) {
var result = [];
while (++position < len) {
nan0 = buffer[position];
nan1 = buffer[++position];
enc[0] = nan0 >> 2;
enc[1] = ((nan0 & 3) << 4) | (nan1 >> 4);
if (isNaN(nan1))
enc[2] = enc[3] = 64;
else {
nan2 = buffer[++position];
enc[2] = ((nan1 & 15) << 2) | (nan2 >> 6);
enc[3] = (isNaN(nan2)) ? 64 : nan2 & 63;
}
result.push(B64.alphabet.charAt(enc[0]), B64.alphabet.charAt(enc[1]), B64.alphabet.charAt(enc[2]), B64.alphabet.charAt(enc[3]));
}
return result.join('');
} else {
var result = '';
while (++position < len) {
nan0 = buffer[position];
nan1 = buffer[++position];
enc[0] = nan0 >> 2;
enc[1] = ((nan0 & 3) << 4) | (nan1 >> 4);
if (isNaN(nan1))
enc[2] = enc[3] = 64;
else {
nan2 = buffer[++position];
enc[2] = ((nan1 & 15) << 2) | (nan2 >> 6);
enc[3] = (isNaN(nan2)) ? 64 : nan2 & 63;
}
result += B64.alphabet[enc[0]] + B64.alphabet[enc[1]] + B64.alphabet[enc[2]] + B64.alphabet[enc[3]];
}
return result;
}
},
decode: function (s) {
if (s.length % 4)
throw new Error("InvalidCharacterError: 'B64.decode' failed: The string to be decoded is not correctly encoded.");
var buffer = B64.fromUtf8(s),
position = 0,
len = buffer.length;
if (B64.ieo) {
var result = [];
while (position < len) {
if (buffer[position] < 128)
result.push(String.fromCharCode(buffer[position++]));
else if (buffer[position] > 191 && buffer[position] < 224)
result.push(String.fromCharCode(((buffer[position++] & 31) << 6) | (buffer[position++] & 63)));
else
result.push(String.fromCharCode(((buffer[position++] & 15) << 12) | ((buffer[position++] & 63) << 6) | (buffer[position++] & 63)));
}
return result.join('');
} else {
var result = '';
while (position < len) {
if (buffer[position] < 128)
result += String.fromCharCode(buffer[position++]);
else if (buffer[position] > 191 && buffer[position] < 224)
result += String.fromCharCode(((buffer[position++] & 31) << 6) | (buffer[position++] & 63));
else
result += String.fromCharCode(((buffer[position++] & 15) << 12) | ((buffer[position++] & 63) << 6) | (buffer[position++] & 63));
}
return result;
}
},
toUtf8: function (s) {
var position = -1,
len = s.length,
chr, buffer = [];
if (/^[\x00-\x7f]*$/.test(s)) while (++position < len)
buffer.push(s.charCodeAt(position));
else while (++position < len) {
chr = s.charCodeAt(position);
if (chr < 128)
buffer.push(chr);
else if (chr < 2048)
buffer.push((chr >> 6) | 192, (chr & 63) | 128);
else
buffer.push((chr >> 12) | 224, ((chr >> 6) & 63) | 128, (chr & 63) | 128);
}
return buffer;
},
fromUtf8: function (s) {
var position = -1,
len, buffer = [],
enc = [, , , ];
if (!B64.lookup) {
len = B64.alphabet.length;
B64.lookup = {};
while (++position < len)
B64.lookup[B64.alphabet.charAt(position)] = position;
position = -1;
}
len = s.length;
while (++position < len) {
enc[0] = B64.lookup[s.charAt(position)];
enc[1] = B64.lookup[s.charAt(++position)];
buffer.push((enc[0] << 2) | (enc[1] >> 4));
enc[2] = B64.lookup[s.charAt(++position)];
if (enc[2] == 64)
break;
buffer.push(((enc[1] & 15) << 4) | (enc[2] >> 2));
enc[3] = B64.lookup[s.charAt(++position)];
if (enc[3] == 64)
break;
buffer.push(((enc[2] & 3) << 6) | enc[3]);
}
return buffer;
}
};
免責聲明:我沒有與泰國字符測試了這個特別的,但假設它會奏效。
Sav的
或者更具體地說,如果要複製/粘貼完整的解決方案:https://developer.mozilla.org/en/DOM/window.btoa#Unicode_Strings (如果你真的好奇它是如何工作的以及爲什麼,Monsur Hossain對其機制有一個很好的深入解釋,在 http://monsur.hossa.in/2012/07/20/utf-8-in-javascript.html) – ecmanaut 2012-07-20 04:04:31