我試圖用python檢索XML標記的一些信息,我的實現是爲每個情況都保存一個字典標記ID,所有子數據,但我不知道如何處理從文本節點提取數據的事實,謝謝。Python XML DOM收集元素數據
我的代碼:
from xml.dom.minidom import *
import requests
print("GETTING XML...")
resp = requests.get('http://infocar.dgt.es/datex2/dgt/SituationPublication/all/content.xml', stream = True) #XML that I need
if resp.status_code != 200:
raise ApiError('GET /tasks/ {}'.format(resp.status_code))
print("XML RECIBIDO 200 OK")
#resp.raw.decode_content = True
print("GUARDANDO XML")
with open("DGT_DATEX.xml", "wb") as handle:
for data in (resp.iter_content()):
handle.write(data)
print("XML GUARDADO")
print("INICIANDO PARSEO..")
dom3 = parse("DGT_DATEX.xml")
print(dom3)#memory dir
print("DATEX PARSEADO")
def getText(nodelist):
dict = {}
listofdata = list()
for node in nodelistofPayloadTag:
if node.nodeType != node.TEXT_NODE:
dict[node.getAttribute('id')] = listofdata
listofdata = goDeep(node.childNodes ,listofdata)
print(str.format("El diccionario antes de ser retornado es {0}", dict))
return dict
def goDeep(childsOfElement, l):
for i in childsOfElement:
if i.nodeType != i.TEXT_NODE:
goDeep(i.childNodes, l)
else:
l.append(i.data)
return l
def getSituation(payloadTag):
getText(payloadTag.childNodes)
def getPayLoad(dom):
print(str.format("Tag to be processed:{0}",dom.getElementsByTagNameNS('*', 'payloadPublication')[0]))
getSituation(dom.getElementsByTagNameNS('*', 'payloadPublication')[0])
print(str.format("Verificando que el dato retornado es un diccionario, {0}, y contiene {1}", type(getPayLoad(dom3)), getPayLoad(dom3)))
必須使用lxml.etree進行嘗試嗎?和.xpath(「// * [name()='_ 0:situation']」))? –