2017-09-19 128 views
1

我正在用golang解析一個來自google api的csv文件,該文件使用utf-16編碼,下面的代碼嘗試讀取一條記錄(跳過標題)並打印該記錄,但它提供我這個樣子,其輸出是怪異:解析csv文件時出現奇怪的輸出

, v=/09/20 00:35:42 k=Smartfren Andromax AD681H 

我想這可能與UTF-16編碼有關,但不知道細節,這裏是代碼: 包主要

import (
    "encoding/csv" 
    "io" 
    "log" 
    "net/http" 
    "strings" 
) 

var url = "http://storage.googleapis.com/play_public/supported_devices.csv" 

func main() { 

    resp, err := http.Get(url) 
    if err != nil { 
     return 
    } 
    defer resp.Body.Close() 

    r := csv.NewReader(resp.Body) 
    r.LazyQuotes = true 
    r.FieldsPerRecord = -1 
    // skip header 
    r.Read() 

    m := make(map[string]string) 
    for { 
     record, err := r.Read() 
     if err == io.EOF { 
      break 
     } 
     if err != nil { 
      log.Println(err) 
      continue 
     } 
     if len(record) >= 4 { 
      m[strings.TrimSpace(record[3])] = strings.TrimSpace(record[1]) 
      for k, v := range m { 
       log.Printf("k=%s, v=%s\n", k, v) 
      } 
      break 
     } 
    } 
} 

回答

3

如您所懷疑的,輸入數據必須從UTF-16編碼字符流轉換爲UTF-8編碼那些。您可以通過使用Go子存儲庫包golang.org/x/text/encoding/unicode

package main 

import (
    "encoding/csv" 
    "io" 
    "log" 
    "net/http" 
    "strings" 

    "golang.org/x/text/encoding/unicode" 
) 

var url = "http://storage.googleapis.com/play_public/supported_devices.csv" 

func main() { 

    resp, err := http.Get(url) 
    if err != nil { 
     return 
    } 
    defer resp.Body.Close() 

    dec := unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder() 
    reader := dec.Reader(resp.Body) 

    r := csv.NewReader(reader) 
    r.LazyQuotes = true 
    r.FieldsPerRecord = -1 
    // skip header 
    r.Read() 

    m := make(map[string]string) 
    for { 
     record, err := r.Read() 
     if err == io.EOF { 
      break 
     } 
     if err != nil { 
      log.Println(err) 
      continue 
     } 
     if len(record) >= 4 { 
      m[strings.TrimSpace(record[3])] = strings.TrimSpace(record[1]) 
      for k, v := range m { 
       log.Printf("k=%s, v=%s\n", k, v) 
      } 
      break 
     } 
    } 
}