根據正在使用的版本ID3 v2,文本字符串可能會或可能不會以一個字節爲前綴來告訴您字符串的實際編碼。不要使用IsTextUnicode()
來猜測編碼是什麼(尤其是因爲它可以報告false results)。
在ID3 v2高達v2.3,沒有編碼字節,文本是ISO-8859-1
或UCS-2
和UCS-2
字符串始終以BOM開頭,因此您知道字節排序。例如:
// prior to Delphi 2009 - String is Ansi
function Id3v2_string(currp: Pointer; datasize: Integer): String;
var
W: WideString;
I: Integer;
Ch: WideChar;
begin
Result := '';
if (datasize >= SizeOf(Word)) and ((PWord(currp)^ = $FEFF) or (PWord(currp)^= $FFFE)) then begin
// UCS-2 with BOM
W := WideCharLenToString(PWideChar(Integer(currp) + SizeOf(Word)), (datasize - SizeOf(Word)) div SizeOf(WideChar));
if PWord(currp)^ = $FFFE then begin
// BE, convert to LE
for I := 1 to Length(W) do begin
Ch := W[I];
W[I] := WideChar(((Word(Ch) and $FF) shl 8) or (Word(Ch) shr 8));
end;
end;
end else begin
// ISO-8859-1
I := MultiByteToWideChar(28591, 0, PAnsiChar(currp), datasize, nil, 0);
if I > 0 then begin
SetLength(W, I);
MultiByteToWideChar(28591, 0, PAnsiChar(currp), datasize, PWideChar(W), I);
end;
end;
Result := TrimRight(W);
end;
。
// Delphi 2009+ - String is Unicode
function Id3v2_string(currp: Pointer; datasize: Integer): String;
var
Enc: TEncoding;
function Convert(P: Pointer; Size: Integer): String;
var
Buf: TBytes;
begin
SetLength(Buf, Size);
if Size > 0 then Move(P^, Buf[0], Size);
Result := Enc.GetString(Buf);
end;
begin
Result := '';
if (datasize >= SizeOf(Word)) and ((PWord(currp)^ = $FEFF) or (PWord(currp)^ = $FFFE)) then begin
// UCS-2 with BOM
if PWord(currp)^ = $FFFE then begin
// BE
Enc := TEncoding.BigEndianUnicode;
end else begin
// LE
Enc := TEncoding.Unicode;
end;
Result := Convert(PWord(currp)+1, datasize - SizeOf(Word));
end else begin
// ISO-8859-1
Enc := TEncoding.GetEncoding(28591);
try
Result := Convert(currp, datasize);
finally
Enc.Free;
end;
end;
end;
ID3 V2.4切換UCS-2
到UTF-16
,並增加了對UTF-8
和UTF-16BE
支持未經BOM,例如:
// prior to Delphi 2009 - String is Ansi
function Id3v2_string(currp: Pointer; datasize: Integer; Encoding: Byte): String;
var
W: WideString;
I: Integer;
Ch: WideChar;
begin
Result := '';
case Encoding of
$00: begin
// ISO-8859-1
I := MultiByteToWideChar(28591, 0, PAnsiChar(currp), datasize, nil, 0);
if I > 0 then begin
SetLength(W, I);
MultiByteToWideChar(28591, 0, PAnsiChar(currp), datasize, PWideChar(W), I);
end;
end;
$01: begin
// UTF-16 with BOM
SetString(W, PWideChar(Integer(currp) + SizeOf(Word)), (datasize - SizeOf(Word)) div SizeOf(WideChar));
if PWord(currp)^ = $FFFE then begin
// BE, convert to LE
for I := 1 to Length(W) do begin
Ch := W[I];
W[I] := WideChar(((Word(Ch) and $FF) shl 8) or (Word(Ch) shr 8));
end;
end;
end;
$02: begin
// UTF-16BE without BOM, convert to LE
SetString(W, PWideChar(currp), datasize div SizeOf(WideChar));
for I := 1 to Length(W) do begin
Ch := W[I];
W[I] := WideChar(((Word(Ch) and $FF) shl 8) or (Word(Ch) shr 8));
end;
end;
$03: begin
// UTF-8
I := MultiByteToWideChar(65001, 0, PAnsiChar(currp), datasize, nil, 0);
if I > 0 then begin
SetLength(W, I);
MultiByteToWideChar(65001, 0, PAnsiChar(currp), datasize, PWideChar(W), I);
end;
end;
end;
Result := TrimRight(W);
end;
。
// Delphi 2009+ - String is Unicode
function Id3v2_string(currp: Pointer; datasize: Integer; Encoding: Byte): String;
var
Enc: TEncoding;
function Convert(P: Pointer; Size: Integer): String;
var
Buf: TBytes;
begin
SetLength(Buf, Size);
if Size > 0 then Move(P^, Buf[0], Size);
Result := Enc.GetString(Buf);
end;
begin
Result := '';
case Encoding of
$00: begin
// ISO-8859-1
Enc := TEncoding.GetEncoding(28591);
try
Result := Convert(currp, datasize);
finally
Enc.Free;
end;
end;
$01: begin
// UTF-16 with BOM
if PWord(currp)^ = $FFFE then begin
// BE
Enc := TEncoding.BigEndianUnicode;
end else begin
// LE
Enc := TEncoding.Unicode;
end;
Result := Convert(PWord(currp)+1, datasize - SizeOf(Word));
end;
$02: begin
// UTF-16BE without BOM
Enc := TEncoding.BigEndianUnicode;
Result := Convert(currp, datasize);
end;
$03: begin
// UTF-8
Enc := TEncoding.UTF8;
Result := Convert(currp, datasize);
end;
end;
Result := TrimRight(Result);
end;
明白了。謝謝! – Glenn1234 2012-03-03 02:28:10