2014-10-09 18 views
0

我按照this link
如何使用XmlPullParser,的AsyncTask Android的一個URL

登錄貓讀取XML文件:

warning : org.xmlPull.v1.XmlPullParserException : expected : START_TAG { null } rss (position:START_TAG <html>@2:7 in java.io.InputStreamReader @41232f00)<br/> error : NullPoiterException

我一直在尋找和使用一些基本的測試方法但parser.require在readRss方法不跑到rss標籤
我想檢索標題的內容並鏈接標記到項目
對不起我的英文能力,我是新的android編程
非常感謝!從URL

XML內容

<rss xmlns:slash="http://purl.org/rss/1.0/modules/slash/" version="2.0"> 
    <channel> 
    <title></title> 
    <description></description> 
    <pubDate></pubDate> 
    <generator></generator> 
    <link></link> 

    <item> 
     <title>CONTENTS</title> 
     <link>http://test.html/</link> 
    </item> 

    </channel> 
</rss> 

XmlParser的代碼

public class XmlParser { 

private static final String nameSpace = null; 

public XmlParser(){ 

} 

public List<Item> parse(InputStream inputStream) 
     throws XmlPullParserException, IOException { 
    try { 
     XmlPullParser parser = Xml.newPullParser(); 
     parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, false); 
     parser.setInput(inputStream, null); 
     parser.nextTag(); 
     return readRss(parser); 
    } finally { 
     inputStream.close(); 
    } 
} 

private List<Item> readRss(XmlPullParser parser) 
     throws XmlPullParserException, IOException { 
    List<Item> itemList = new ArrayList<Item>(); 
    int eventType = parser.getEventType(); 

    System.out.println(parser.getName(); // <==== position is html 

    parser.require(XmlPullParser.START_TAG, nameSpace, "rss"); // <---- error 
     /** Code in here is failed**/ 

    while (parser.next() != XmlPullParser.END_TAG) { 
     if (eventType!= XmlPullParser.START_TAG) { 
      continue; 
     } 
     String name = parser.getName(); 

     // Starts by looking for the Rss tag 
     if (name.equals("item")) { 
       itemList.add(readItem(parser)); 
     } else { 
      skip(parser); 
     } 
    } 
    return itemList; 
} 

// This class represents a single entry (post) in the XML feed. 
// It includes the data members "title," "link," and "summary." 
public static class Item{ 
    String title = ""; 
    String link = ""; 

    private Item() { 
    } 
    private Item(String title){ 
     this.title = title; 
    } 

    private Item(String title, String link) { 
     this.title = title; 
     this.link = link; 
    } 
} 

// Parses the contents of an item. If it encounters a title, link tag, hands 
// them 
// off 
// to their respective &quot;read&quot; methods for processing. Otherwise, 
// skips the tag. 

private Item readItem(XmlPullParser parser) throws XmlPullParserException, 
     IOException { 
    parser.require(XmlPullParser.START_TAG, nameSpace, "item"); 
    String title = null; 
    String link = null; 
    while (parser.next() != XmlPullParser.END_TAG) { 
     if (parser.getEventType() != XmlPullParser.START_TAG) { 
      continue; 
     } 
     String name = parser.getName(); 
     if (name.equals("title")) { 
      title = readTitle(parser); 
     /*} else if (name.equals("link")) { 
      link = readLink(parser);*/ 
     }else{ 
      skip(parser); 
     } 
    } 
    return new Item(title); 
    //return new Item(title, link); 
} 

// Skips tags the parser isn't interested in. Uses depth to handle nested tags. i.e., 
// if the next tag after a START_TAG isn't a matching END_TAG, it keeps going until it 
// finds the matching END_TAG (as indicated by the value of "depth" being 0). 

private void skip(XmlPullParser parser) throws XmlPullParserException, IOException { 
    if(parser.getEventType() != XmlPullParser.START_TAG){ 
     throw new IllegalStateException(); 
    } 
    int depth = 1; 
    while(depth != 0){ 
     switch (parser.next()) { 
     case XmlPullParser.END_TAG: 
      depth--; 
      break; 
     case XmlPullParser.START_TAG: 
      depth++; 
      break; 
     } 
    } 
} 

// Processes title tags in the Rss. 
private String readTitle(XmlPullParser parser) 
     throws XmlPullParserException, IOException { 
    parser.require(XmlPullParser.START_TAG, nameSpace, "title"); 
    String title = readTitle(parser); 
    parser.require(XmlPullParser.END_TAG, nameSpace, "title"); 
    return title; 
} 

// Processes link tags in the Rss. 
/*private String readLink(XmlPullParser parser) 
     throws XmlPullParserException, IOException { 
    parser.require(XmlPullParser.START_TAG, nameSpace, "link"); 
    String link = readLink(parser); 
    parser.require(XmlPullParser.END_TAG, nameSpace, "link"); 
    return link; 
} 

}

ListActivity代碼

public class MainActivity extends ListActivity { 

static final String URL = "http://vnexpress.net/rss/du-lich.rss"; 
ArrayAdapter<Item> arrAdapter; 

@Override 
protected void onCreate(Bundle savedInstanceState) { 
    super.onCreate(savedInstanceState); 
    setContentView(R.layout.activity_main); 
    new buildDataXml().execute(URL); 
} 

private class buildDataXml extends AsyncTask<String, Void, List<Item>> { 
    @Override 
    protected List<Item> doInBackground(String... params) { 
     try { 
      return loadXmlFromUrl(params[0]); 
     } catch (Exception e) { 
      e.printStackTrace(); 
      return null; 
     } 
    } 

    @Override 
    protected void onPostExecute(List<Item> result) { 
     super.onPostExecute(result); 
     System.out.println(result.get(0)); 
     arrAdapter = new ArrayAdapter<Item>(getApplicationContext(), android.R.layout.simple_list_item_1, result); 
     setListAdapter(arrAdapter); 
    } 
} 

private List<Item> loadXmlFromUrl(String strUrl) throws XmlPullParserException, IOException{ 
    InputStream inputStream = null; 
    List<Item> item = null; 
    XmlParser parser = new XmlParser(); 

    try { 
     inputStream = downloadUrl(strUrl); 
     System.out.println("InputStream : "+inputStream.available()); 

     item = parser.parse(inputStream); 
     System.out.println("Item Size : " + item.size()); 

    // Makes sure that the InputStream is closed after the app is 
    // finished using it. 
    } finally { 
     if (inputStream != null) { 
      inputStream.close(); 
     } 
    } 

    return item; 
} 

private InputStream downloadUrl(String strUrl) throws IOException{ 
    URL url = new URL(strUrl); 
    HttpURLConnection conn = (HttpURLConnection) url.openConnection(); 
    conn.setReadTimeout(10000 /* milliseconds */); 
    conn.setConnectTimeout(15000 /* milliseconds */); 
    conn.setRequestMethod("GET"); 
    conn.setDoInput(true); 
    conn.setRequestProperty("Content-Type", "application/xml"); 
    // Starts the query 
    conn.connect(); 
    InputStream stream = conn.getInputStream(); 
    return stream; 
} 

回答

2

您的HTTP請求未返回RSS,你可能會是期待,而不是它返回一個HTML頁面這就是爲什麼你解析失敗,因爲

Snipet最新的返回

<!DOCTYPE html> 
<html><!-- InstanceBegin template="/Templates/common.dwt" codeOutsideHTMLIsLocked="false" --> 
<head> 
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> 
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1, maximum-scale=1, user-scalable=0"> 
<meta name="apple-mobile-web-app-capable" content="yes" /> 
<meta name="apple-mobile-web-app-title" content="Vnexpress.net" /> 
<!-- iPad icons --> 
<link rel="apple-touch-icon-precomposed" href="http://st.f1.vnecdn.net/responsive/images/logos/72x72.png" sizes="72x72"> 
<link rel="apple-touch-icon-precomposed" href="http://st.f1.vnecdn.net/responsive/images/logos/114x114.png" sizes="144x144"> 
<!-- iPhone and iPod touch icons --> 
<link rel="apple-touch-icon-precomposed" href="http://st.f1.vnecdn.net/responsive/images/logos/57x57.png" sizes="57x57"> 
<link rel="apple-touch-icon-precomposed" href="http://st.f1.vnecdn.net/responsive/images/logos/114x114.png" sizes="114x114"> 
<!-- Nokia Symbian --> 
<link rel="nokia-touch-icon" href="http://st.f1.vnecdn.net/responsive/images/logos/57x57.png"> 
<!-- Android icon precomposed so it takes precedence --> 
<link rel="apple-touch-icon-precomposed" href="http://st.f1.vnecdn.net/responsive/images/logos/114x114.png" sizes="1x1"> 
<!-- InstanceBeginEditable name="doctitle" --> 
<title>Tin nhanh VnExpress - Äá»c báo, tin tức online 24h</title> 
<!-- InstanceEndEditable --> 
<link rel="stylesheet" href="http://st.f3.vnecdn.net/responsive/c/v2/general.css" media="all" /> 
<!-- link rel="stylesheet" href="http://st.f3.vnecdn.net/responsive/css/iphone.css" media="all"/--> 
<!-- InstanceBeginEditable name="css" --> 
<!-- InstanceEndEditable --> 
<script src="http://st.f4.vnecdn.net/responsive/libs/jquery-1.7.1.min.js"></script> 
<script language="javascript" type="text/javascript"> 
     var interactions_url = 'http://interactions.vnexpress.net'; 
     var base_url   = 'http://vnexpress.net'; 
     var css_url    = 'http://st.f3.vnecdn.net/responsive/c/v2'; 
     var js_url    = 'http://st.f2.vnecdn.net/responsive/j/v2'; 
     var flash_url   = 'http://st.f4.vnecdn.net/responsive/f/v2'; 
     var img_url    = 'http://st.f1.vnecdn.net/responsive/i/v2'; 
     var image_cloud   = 'http://l.f29.img.vnecdn.net'; 
     var PageHot    = 0; 
     var device_env   = 2; 
     var site_id    = 1000000; 
     var SITE_ID    = 1000000; 
     var PAGE_FOLDER   = 1001005; 
     var PAGE_DETAIL   = 0   //setTypingMode(1); 
     </script> 
<script src="http://st.polyad.net/library/2014/VneShowAds.js"></script> 
<script src="http://st.polyad.net/library/2014/vneads.js"></script> 
<script type="text/javascript" src="http://st.f2.vnecdn.net/responsive/j/v2/utils/utils.js"></script> 
<script type="text/javascript" src="http://st.f2.vnecdn.net/responsive/j/v2/utils/crawler.js"></script> 

<script type="text/javascript" src="http://st.f2.vnecdn.net/responsive/j/v2/interactions/parser_v2.js"></script> 
<script language="javascript" type="text/javascript"> 
     var PageHot    = 0; 
     var site_id    = 1000000; 
     var PAGE_FOLDER   = 1001005; 
</script> 

<!-- InstanceBeginEditable name="javascript" --> 
<!-- InstanceEndEditable --> 
<!-- TRACKING GOOGLE --> 
     <script> 
      (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ 
      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), 
      m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) 
      })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); 
     //beta vnexpress reponsive 
      ga('create', 'UA-249346-69', 'vnexpress.net'); 
      ga('send', 'pageview'); 
     //new tracker 
      ga('create', 'UA-249346-21', 'auto', {'name': 'newTracker'}); 
      ga('newTracker.send', 'pageview'); 

     //new tracker 1 
      ga('create', 'UA-249346-22', 'auto', {'name': 'newTracker1'}); 
      ga('newTracker1.send', 'pageview'); 
     </script> 
     <script type="text/javascript"> 
     /* 
     var _gaq = _gaq || []; 
     _gaq.push(['_setAccount', 'UA-249346-21']); 
     _gaq.push(['_trackPageview']); 

     _gaq.push(['b._setAccount', 'UA-249346-22']); 
     _gaq.push(['b._trackPageview']); 

     _gaq.push(['c._setAccount', 'UA-249346-1']); 
     _gaq.push(['c._trackPageview']); 
     <?php echo $this->ShowGaByCate($parent_category_id);?> 

     (function() { 
     var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true; 
     ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; 
     var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); 
     })(); 
     */ 
     </script> 
     <script> 
     if(PAGE_FOLDER==1000000) 
     { 
      var _siteId="6"; 
      /// track cu trang chu// 

       (function(){ 
       var e=document.createElement("script");e.type="text/javascript",e.async=!0,e.src="http://st.l.a.eclick.vn/ea.js"; 
       var t=document.getElementsByTagName("script")[0];t.parentNode.insertBefore(e,t)})(); 
     }else 
     { 
       var _siteId = "5"; 
       // track cu trang con // 
       (function(){ 
       var e=document.createElement("script");e.type="text/javascript",e.async=!0,e.src="http://st.l.a.eclick.vn/ea.js"; 
       var t=document.getElementsByTagName("script")[0];t.parentNode.insertBefore(e,t)})(); 
     } 
     </script> 
     <!-- END TRACKING GOOGLE --> 
</head> 

<body> 
<div class="block_more_info" id="box_col_left" style="left: -240px;"> 
    <div class="box_width_common"> 
     <div class="btn_control_col_left"><img alt="" src="http://st.f1.vnecdn.net/responsive/i/v2/graphics/img_left_panel.gif"></div> 
     <div class="block_scoll_menu"> 
     <div class="block_search"> 
      <form id="search" target="_blank" method="get" 
.... 

編輯:尋找更好的代碼後你似乎認爲parser:require將XML元素,直到rss但不爲真require驗證解析器當前事件是否與指定的事件匹配,如果提供的名稱空間與提供的名稱空間匹配,並且元素名稱(如果事件是START_TAGEND_TAG

也在尋找到HTTP流量我發現你的要求越來越重定向到一個錯誤頁面

GET /rss/du-lich.rss HTTP/1.1 
Content-Type: application/rss+xml 
User-Agent: Dalvik/2.0.0 (Linux; U; Android L Build/LPV81B) 
Host: vnexpress.net 
Connection: Keep-Alive 
Accept-Encoding: gzip 

HTTP/1.1 302 Moved Temporarily 
Server: Fengine/1.5.2 
Date: Fri, 10 Oct 2014 09:20:56 GMT 
Content-Type: text/html 
Transfer-Encoding: chunked 
Set-Cookie: device_env=1; expires=Fri, 17-Oct-2014 09:20:55 GMT; path=/; domain=vnexpress.net 
Location: /error4.html 
Content-Encoding: gzip 
Vary: Accept-Encoding 
server: web_141.173 

所以我調查遠一點,結果發現,HTTP server基礎上,User agent可能是限制訪問後以下行似乎修復

conn.setRequestProperty("User-Agent", "Fiddler"); 

具有固定的,我相信它是一個很好的起點,你繼續和修復你有

解析錯誤0

END編輯

相關問題