0
我有一個應用程序檢測字符集編碼,當我測試一個Shift_JIS文件作爲輸入時,它返回一個字符集編碼類型爲EUC_JP。爲什麼Shift_JIS字符集編碼文件返回EUC_JP類型?
我已經使用了{「EUC_JP」,「Shift_JIS」,「UTF-8」},並作爲Charset實例一個接一個地傳遞給下面的方法來檢查字符集編碼。
下面是我的解碼
private Charset detectCharset(File file, Charset charset)
{
try
{
BufferedInputStream input = new BufferedInputStream(new FileInputStream(file));
CharsetDecoder decoder = charset.newDecoder();
decoder.reset();
byte[] buffer = new byte[BUFFER_SIZE];
boolean identified = false;
while ((input.read(buffer) != -1) && (!identified))
{
identified = identify(buffer, decoder);
}
input.close();
if (!identified)
{
charset = null;
}
return charset;
}
catch (Exception e)
{
return null;
}
}
private boolean identify(byte[] bytes, CharsetDecoder decoder)
{
boolean isIdentifies = true;
try
{
decoder.decode(ByteBuffer.wrap(bytes));
}
catch (CharacterCodingException e)
{
isIdentifies = false;
}
return isIdentifies;
}