Unicode字符串中的奇數字符

當談到獲得MP3 ID3 v2實現時，我遇到了一些問題。除了這個問題之外，我已經在大部分環境中工作，這可能與此無關。無論如何，我使用下面的代碼來處理檢索涉及文本的標題標籤的數據。Unicode字符串中的奇數字符

我碰到的是（我猜？）我在某些不同的字符串中遇到Unicode字符。我試圖將它轉換爲下面的方式，並且它可以工作。但是，我在字符串之前獲得了3美元的價值，之後獲得了3美元的3美元。有什麼我可以做的下面的代碼解析出來，或者我必須自己做？這些文件由iTunes編碼，如果有幫助的話。

function Id3v2_string(currp: pointer; datasize: integer): string; 
{ handles string processing for ID3v2 data } 
    const 
    IS_TEXT_UNICODE_UNICODE_MASK = $0F; 
    var 
    outstr: string; 
    uscan: integer; 
    begin 
    outstr := ''; 
    SetLength(outstr, datasize); 
    uscan := IS_TEXT_UNICODE_UNICODE_MASK; 
    if IsTextUnicode(currp, datasize, @uscan) then 
     outstr := WideCharToString(currp) 
    else 
     move(currp^, outstr[1], datasize); 
    Result := outstr; 
    end;

注意，我真的沒有興趣，因爲所有我希望做一個媒體庫，這是編輯ID3標籤，而不是播放文件 - 實施已經完成，除了少數像這樣的小問題一。

來源

2012-03-02 Glenn1234

根據正在使用的版本ID3 v2，文本字符串可能會或可能不會以一個字節爲前綴來告訴您字符串的實際編碼。不要使用IsTextUnicode()來猜測編碼是什麼（尤其是因爲它可以報告false results）。

在ID3 v2高達v2.3，沒有編碼字節，文本是ISO-8859-1或UCS-2和UCS-2字符串始終以BOM開頭，因此您知道字節排序。例如：

// prior to Delphi 2009 - String is Ansi 
function Id3v2_string(currp: Pointer; datasize: Integer): String; 
var 
    W: WideString; 
    I: Integer; 
    Ch: WideChar; 
begin 
    Result := ''; 
    if (datasize >= SizeOf(Word)) and ((PWord(currp)^ = $FEFF) or (PWord(currp)^= $FFFE)) then begin 
    // UCS-2 with BOM 
    W := WideCharLenToString(PWideChar(Integer(currp) + SizeOf(Word)), (datasize - SizeOf(Word)) div SizeOf(WideChar)); 
    if PWord(currp)^ = $FFFE then begin 
     // BE, convert to LE 
     for I := 1 to Length(W) do begin 
     Ch := W[I]; 
     W[I] := WideChar(((Word(Ch) and $FF) shl 8) or (Word(Ch) shr 8)); 
     end; 
    end; 
    end else begin 
    // ISO-8859-1 
    I := MultiByteToWideChar(28591, 0, PAnsiChar(currp), datasize, nil, 0); 
    if I > 0 then begin 
     SetLength(W, I); 
     MultiByteToWideChar(28591, 0, PAnsiChar(currp), datasize, PWideChar(W), I); 
    end; 
    end; 
    Result := TrimRight(W); 
end;

。

// Delphi 2009+ - String is Unicode 
function Id3v2_string(currp: Pointer; datasize: Integer): String; 
var 
    Enc: TEncoding; 

    function Convert(P: Pointer; Size: Integer): String; 
    var 
    Buf: TBytes; 
    begin 
    SetLength(Buf, Size); 
    if Size > 0 then Move(P^, Buf[0], Size); 
    Result := Enc.GetString(Buf); 
    end; 

begin 
    Result := ''; 
    if (datasize >= SizeOf(Word)) and ((PWord(currp)^ = $FEFF) or (PWord(currp)^ = $FFFE)) then begin 
    // UCS-2 with BOM 
    if PWord(currp)^ = $FFFE then begin 
     // BE 
     Enc := TEncoding.BigEndianUnicode; 
    end else begin 
     // LE 
     Enc := TEncoding.Unicode; 
    end; 
    Result := Convert(PWord(currp)+1, datasize - SizeOf(Word)); 
    end else begin 
    // ISO-8859-1 
    Enc := TEncoding.GetEncoding(28591); 
    try 
     Result := Convert(currp, datasize); 
    finally 
     Enc.Free; 
    end; 
    end; 
end;

ID3 V2.4切換UCS-2到UTF-16，並增加了對UTF-8和UTF-16BE支持未經BOM，例如：

// prior to Delphi 2009 - String is Ansi 
function Id3v2_string(currp: Pointer; datasize: Integer; Encoding: Byte): String; 
var 
    W: WideString; 
    I: Integer; 
    Ch: WideChar; 
begin 
    Result := ''; 

    case Encoding of 
    $00: begin 
     // ISO-8859-1 
     I := MultiByteToWideChar(28591, 0, PAnsiChar(currp), datasize, nil, 0); 
     if I > 0 then begin 
     SetLength(W, I); 
     MultiByteToWideChar(28591, 0, PAnsiChar(currp), datasize, PWideChar(W), I); 
     end; 
    end; 
    $01: begin 
     // UTF-16 with BOM 
     SetString(W, PWideChar(Integer(currp) + SizeOf(Word)), (datasize - SizeOf(Word)) div SizeOf(WideChar)); 
     if PWord(currp)^ = $FFFE then begin 
     // BE, convert to LE 
     for I := 1 to Length(W) do begin 
      Ch := W[I]; 
      W[I] := WideChar(((Word(Ch) and $FF) shl 8) or (Word(Ch) shr 8)); 
     end; 
     end; 
    end; 
    $02: begin 
     // UTF-16BE without BOM, convert to LE 
     SetString(W, PWideChar(currp), datasize div SizeOf(WideChar)); 
     for I := 1 to Length(W) do begin 
     Ch := W[I]; 
     W[I] := WideChar(((Word(Ch) and $FF) shl 8) or (Word(Ch) shr 8)); 
     end; 
    end; 
    $03: begin 
     // UTF-8 
     I := MultiByteToWideChar(65001, 0, PAnsiChar(currp), datasize, nil, 0); 
     if I > 0 then begin 
     SetLength(W, I); 
     MultiByteToWideChar(65001, 0, PAnsiChar(currp), datasize, PWideChar(W), I); 
     end; 
    end; 
    end; 
    Result := TrimRight(W); 
end;

。

// Delphi 2009+ - String is Unicode 
function Id3v2_string(currp: Pointer; datasize: Integer; Encoding: Byte): String; 
var 
    Enc: TEncoding; 

    function Convert(P: Pointer; Size: Integer): String; 
    var 
    Buf: TBytes; 
    begin 
    SetLength(Buf, Size); 
    if Size > 0 then Move(P^, Buf[0], Size); 
    Result := Enc.GetString(Buf); 
    end; 

begin 
    Result := ''; 

    case Encoding of 
    $00: begin 
     // ISO-8859-1 
     Enc := TEncoding.GetEncoding(28591); 
     try 
     Result := Convert(currp, datasize); 
     finally 
     Enc.Free; 
     end; 
    end; 
    $01: begin 
     // UTF-16 with BOM 
     if PWord(currp)^ = $FFFE then begin 
     // BE 
     Enc := TEncoding.BigEndianUnicode; 
     end else begin 
     // LE 
     Enc := TEncoding.Unicode; 
     end; 
     Result := Convert(PWord(currp)+1, datasize - SizeOf(Word)); 
    end; 
    $02: begin 
     // UTF-16BE without BOM 
     Enc := TEncoding.BigEndianUnicode; 
     Result := Convert(currp, datasize); 
    end; 
    $03: begin 
     // UTF-8 
     Enc := TEncoding.UTF8; 
     Result := Convert(currp, datasize); 
    end; 
    end; 
    Result := TrimRight(Result); 
end;

來源

2012-03-03 01:54:56

明白了。謝謝！ – Glenn1234 2012-03-03 02:28:10

Unicode字符串中的奇數字符

回答

相關問題