2011-12-01 72 views
0

我試圖用SAX解析器解析UTF-8的XML文件,我使用的解析器,但它導致一個例外是消息「需要一個元素」黑莓解析UTF-8 XML文件

<?xml version='1.0' encoding='UTF-8' standalone='yes' ?> 
<config> 
<filepath>/mnt/sdcard/Audio_Recorder/anonymous22242.3gp</filepath> 
<filename>anonymous22242.3gp</filename> 
<annotation> 
    <file>anonymous22242.3gp</file> 
    <timestamp>0:06</timestamp> 
    <note>test1</note> 
</annotation> 
<annotation> 
    <file>anonymous22242.3gp</file> 
    <timestamp>0:09</timestamp> 
    <note>لول</note> 
</annotation> 
<annotation> 
    <file>anonymous22242.3gp</file> 
    <timestamp>0:09</timestamp> 
    <note>لولو</note> 
</annotation> 
</config> 


 private static String fileDirectory; 
private final static ArrayList<String> allFileNames = new ArrayList<String>(); 
private final static ArrayList<String[]> allAnnotations = new ArrayList<String[]>(); 
private static String[] currentAnnotation = new String[3]; 

public static void main(String[] args) { 
// TODO Auto-generated method stub 
try { 

    SAXParserFactory factory = SAXParserFactory.newInstance(); 
    SAXParser playbackParser = factory.newSAXParser(); 

    DefaultHandler handler = new DefaultHandler() { 

     boolean audioFullPath = false; 
     boolean audioName = false; 
     boolean annotationFile = false; 
     boolean annotationTimestamp = false; 
     boolean annotationNote = false; 

     public void startElement(String uri, String localName, 
       String qName, Attributes attributes) 
       throws SAXException { 

      System.out.println("Start Element :" + qName); 

      if (qName.equalsIgnoreCase("filepath")) { 
       audioFullPath = true; 
      } 

      if (qName.equalsIgnoreCase("filename")) { 
       audioName = true; 
      } 

      if (qName.equalsIgnoreCase("file")) { 
       annotationFile = true; 
      } 

      if (qName.equalsIgnoreCase("timestamp")) { 
       annotationTimestamp = true; 
      } 

      if (qName.equalsIgnoreCase("note")) { 
       annotationNote = true; 
      } 

     } 

     public void endElement(String uri, String localName, 
       String qName) throws SAXException { 

      System.out.println("End Element :" + qName); 

     } 

     public void characters(char ch[], int start, int length) 
       throws SAXException { 

      if (audioFullPath) { 
       String filePath = new String(ch, start, length); 
       System.out.println("Full Path : " + filePath); 
       fileDirectory = filePath; 
       audioFullPath = false; 
      } 

      if (audioName) { 
       String fileName = new String(ch, start, length); 
       System.out.println("File Name : " + fileName); 
       allFileNames.add(fileName); 
       audioName = false; 
      } 

      if (annotationFile) { 
       String fileName = new String(ch, start, length); 
       currentAnnotation[0] = fileName; 
       annotationFile = false; 
      } 

      if (annotationTimestamp) { 
       String timestamp = new String(ch, start, length); 
       currentAnnotation[1] = timestamp; 
       annotationTimestamp = false; 
      } 
      if (annotationNote) { 
       String note = new String(ch, start, length); 
       currentAnnotation[2] = note; 
       annotationNote = false; 
       allAnnotations.add(currentAnnotation); 
      } 

     } 

    }; 

    InputStream inputStream = getStream("http://www.example.com/example.xml"); 
    Reader xmlReader = new InputStreamReader(inputStream, "UTF-8"); 

    InputSource xmlSource = new InputSource(xmlReader); 
    xmlSource.setEncoding("UTF-8"); 

    playbackParser.parse(xmlSource, handler); 

    System.out.println(fileDirectory); 
    System.out.println(allFileNames); 
    System.out.println(allAnnotations); 

} catch (Exception e) { 
    e.printStackTrace(); 
} 
} 
} 

public Static InputStream getStream(String url) 
{ 
    try 
    { 
     connection = getConnection(url); 
     connection.setRequestProperty("User-Agent",System.getProperty("microedition.profiles")); 
     connection.setRequestProperty("Connection", "Keep-Alive"); 
     connection.setRequestProperty("Content-Type", "text/xml; charset=UTF-8"); 

     inputStream = connection.openInputStream(); 
     return inputStream; 
    } 
    catch(Exception e) 
    { 
     System.out.println("NNNNNNN "+e.getMessage()); 

     return null; 
     } 

    } 

public HttpConnection getConnection(String url) 
{ 

    try 
    { 
     connection = (HttpConnection) Connector.open(url+getConnectionString()); 


    } 
    catch(Exception e) 

    { 

    } 


    return connection; 
} 

但是當我傳遞給parse方法InputStream的,而不是它的InputSource分析文件,但仍然有

0123之間用阿拉伯語字符的問題
playbackParser.parse(inputStream, handler); 
+0

「ArrayList 」無法爲BlackBerry編譯,因爲BlackBerries只運行java-me。爲什麼在這個BlackBerry標籤? –

回答

1

您顯示的XML包含未編碼的阿拉伯字符。這違反了XML聲明的Encoding,這意味着XML格式不正確。 SAX解析器按順序逐個處理數據,觸發每件作品的事件。它不會檢測到這種編碼錯誤,直到它到達包含那些錯誤字符的片段。對此你無能爲力。 XML需要由其原始作者修復。

+0

包含來自其他語言的字符時如何編碼XML? –

+1

需要使用在XML的prolog的'Encoding'屬性中指定的字符集對XML中的所有字符進行編碼。在這種情況下,您的示例XML的字符集是「UTF-8」(當沒有指定時,它是XML的默認字符集)。生成XML時,阿拉伯字符不是UTF-8編碼,而是錯誤。在UTF-8中,Unicode字符串「لول」被編碼爲字節八位字節「D9 84 D9 88 D9 84」,而「لولو」被編碼爲「D9 84 D9 88 D9 84 D9 88」。 –

+0

@Remy不幸的是,在這種情況下,stackoverflow是一個基於字符的問答網站,所以很難判斷這個文件是否被正確編碼,因爲這些字符是從編輯器正確轉換後的編輯器的副本文件系統表示。我不認爲任何人會更高興地看到d9 84 d9 88 d9 84的cp1252表示形式。 –