請幫助解析來自互聯網的文件。如何解析遠程文件?
import pprint
import xml.dom.minidom
from xml.dom.minidom import Node
import requests
addr = requests.get('http://fh79272k.bget.ru/py_test/books.xml')
print(addr.status_code)
doc = xml.dom.minidom.parse(str(addr)) # load doc into object
# usually parsed up front
mapping = {}
for node in doc.getElementsByTagName("book"): # traverse DOM object
isbn = node.getAttribute("isbn") # via DOM object API
L = node.getElementsByTagName("title")
for node2 in L:
title = ""
for node3 in node2.childNodes:
if node3.nodeType == Node.TEXT_NODE:
title += node3.data
mapping[isbn] = title
# mapping now has the same value as in the SAX example
pprint.pprint(mapping)
該腳本不起作用。錯誤消息是:
Traceback (most recent call last): File "C:\VINT\OPENSERVER\OpenServer\domains\localhost\python\parse_html\1\dombook.py", line 14, in doc = xml.dom.minidom.parse(str(addr)) # load doc into object File "C:\Python33\lib\xml\dom\minidom.py", line 1960, in parse return expatbuilder.parse(file) File "C:\Python33\lib\xml\dom\expatbuilder.py", line 908, in parse fp = open(file, 'rb') OSError: [Errno 22] Invalid argument: ''
XML:
<catalog>
<book isbn="0-596-00128-2">
<title>Python & XML</title>
<date>December 2001</date>
<author>Jones, Drake</author>
</book>
<book isbn="0-596-15810-6">
<title>Programming Python, 4th Edition</title>
<date>October 2010</date>
<author>Lutz</author>
</book>
<book isbn="0-596-15806-8">
<title>Learning Python, 4th Edition</title>
<date>September 2009</date>
<author>Lutz</author>
</book>
<book isbn="0-596-15808-4">
<title>Python Pocket Reference, 4th Edition</title>
<date>October 2009</date>
<author>Lutz</author>
</book>
<book isbn="0-596-00797-3">
<title>Python Cookbook, 2nd Edition</title>
<date>March 2005</date>
<author>Martelli, Ravenscroft, Ascher</author>
</book>
<book isbn="0-596-10046-9">
<title>Python in a Nutshell, 2nd Edition</title>
<date>July 2006</date>
<author>Martelli</author>
</book>
<!--
plus many more Python books that should appear here
-->
</catalog>