-4
我已經編寫了用於解析文件的python腳本。如何使用python解析具有多個頂級元素的文件
python腳本:
from xml.dom.minidom import parse
import xml.dom.minidom
DOMTree = xml.dom.minidom.parse("details.xml")
CallDetailRecord = DOMTree.documentElement
def getText(data):
detail = str(data)
#match = re.search(r'(.*\s)(false).*|(.*\s)(true).*',detail,re.IGNORECASE)
match_false = re.search(r'(.*\s)(false).*',detail,re.IGNORECASE)
if (match_false):
return match_false.group(2)
match_true = re.search(r'(.*\s)(true).*',detail,re.IGNORECASE)
if (match_true):
return match_true.group(2)
org_addr = CallDetailRecord.getElementsByTagName("origAddress")
for record in org_addr:
ton_1 = record.getElementsByTagName("ton")[0]
npi_1 = record.getElementsByTagName("npi")[0]
pid_1 = record.getElementsByTagName("pid")[0]
msdn_1 = record.getElementsByTagName("msisdn")[0]
org_ton = ton_1.childNodes[0].data
org_npi = npi_1.childNodes[0].data
org_pid = pid_1.childNodes[0].data
org_msdn = msdn_1.childNodes[0].data
recp_addr = CallDetailRecord.getElementsByTagName("recipAddress")
for record in recp_addr:
ton_1 = record.getElementsByTagName("ton")[0]
npi_1 = record.getElementsByTagName("npi")[0]
pid_1 = record.getElementsByTagName("pid")[0]
msdn_1 = record.getElementsByTagName("msisdn")[0]
rec_ton = ton_1.childNodes[0].data
rec_npi = npi_1.childNodes[0].data
rec_pid = pid_1.childNodes[0].data
rec_msdn = msdn_1.childNodes[0].data
dgti_addr = CallDetailRecord.getElementsByTagName("dgtiAddress")
for record in dgti_addr:
ton_1 = record.getElementsByTagName("ton")[0]
npi_1 = record.getElementsByTagName("npi")[0]
pid_1 = record.getElementsByTagName("pid")[0]
msdn_1 = record.getElementsByTagName("msisdn")[0]
dgti_ton = ton_1.childNodes[0].data
dgti_npi = npi_1.childNodes[0].data
dgti_pid = pid_1.childNodes[0].data
dgti_msdn = msdn_1.childNodes[0].data
calling_line_id = CallDetailRecord.getElementsByTagName("callingLineId")
for record in calling_line_id:
ton_1 = record.getElementsByTagName("ton")[0]
npi_1 = record.getElementsByTagName("npi")[0]
pid_1 = record.getElementsByTagName("pid")[0]
msdn_1 = record.getElementsByTagName("msisdn")[0]
clid_ton = ton_1.childNodes[0].data
clid_npi = npi_1.childNodes[0].data
clid_pid = pid_1.childNodes[0].data
clid_msdn = msdn_1.childNodes[0].data
untransl_OrigAddress = CallDetailRecord.getElementsByTagName("untranslOrigAddress")
sub_time = CallDetailRecord.getElementsByTagName("submitTime")[0]
if(sub_time):
sub_time_value = sub_time.childNodes[0].data
print " \n SUBMIT TIME: %s \n" %sub_time_value
sub_date = CallDetailRecord.getElementsByTagName("submitDate")[0]
if(sub_date):
sub_date_value = sub_date.childNodes[0].data
print " \n SUBMIT DATE: %s\n" %sub_time_value
termin_time = CallDetailRecord.getElementsByTagName("terminTime")[0]
if(termin_time):
termin_time_value = termin_time.childNodes[0].data
print " \n TERMIN TIME: %s \n" %termin_time_value
termin_date = CallDetailRecord.getElementsByTagName("terminDate")[0]
if(termin_date):
termin_date_value = termin_date.childNodes[0].data
print " \n TERMIN DATE: %s\n" %termin_time_value
status = CallDetailRecord.getElementsByTagName("status")[0]
if(status):
status_value = status.childNodes[0].data
print " \n STATUS: %s\n" %status_value
msglength = CallDetailRecord.getElementsByTagName("lengthOfMessage")[0]
if(msglength):
msglength_value = msglength.childNodes[0].data
print " \n MESSAGE LENGTH: %s\n" %msglength_value
prioIndicator = CallDetailRecord.getElementsByTagName("prioIndicator")[0]
if (prioIndicator):
#print prioIndicator.childNodes[0].data
prioIndicator_value = getText(prioIndicator.childNodes[0])
print " \n PRIO INDICATOR: %s\n" %prioIndicator_value
縮小尺寸,我沒有張貼我的整個腳本。
輸入文件:
<CallDetailRecord>
<origAddress>
<ton>international</ton>
<npi>telephone</npi>
<pid>plmn</pid>
<msisdn>32410000</msisdn>
</origAddress>
<recipAddress>
<ton>international</ton>
<npi>telephone</npi>
<pid>plmn</pid>
<msisdn>918337807718</msisdn>
</recipAddress>
<submitDate>14-08-20</submitDate>
<submitTime>19:36:29</submitTime>
<status>deleted</status>
<terminDate>14-08-23</terminDate>
<terminTime>19:51:52</terminTime>
<lengthOfMessage>38</lengthOfMessage>
<prioIndicator><false/></prioIndicator>
<deferIndicator><true/></deferIndicator>
<notifIndicator><false/></notifIndicator>
<recipIntlMobileSubId>26204487</recipIntlMobileSubId>
<callingLineId>
<ton>international</ton>
<npi>telephone</npi>
<pid>plmn</pid>
<msisdn>32410000</msisdn>
</callingLineId>
<smsContentDcs>0</smsContentDcs>
<messageReference>13</messageReference>
<deliveryAttempts>151</deliveryAttempts>
<untranslOrigAddress>
<ton>international</ton>
<npi>telephone</npi>
<pid>plmn</pid>
<msisdn>32410000</msisdn>
</untranslOrigAddress>
<tpDCS>0</tpDCS>
<genericUrgencyLevel>bulk</genericUrgencyLevel>
<teleserviceId>4098</teleserviceId>
<recipNetworkType>gsm</recipNetworkType>
<rbdlFlags1>
10000000000000000000000000000000
</rbdlFlags1>
</CallDetailRecord>
腳本工作正常,此文件。但是,假設考慮我有多個
CallDetailRecord>,那麼如何解析該文件。
例:
<CallDetailRecord>
.
.
.
</CallDetailRecord>
<CallDetailRecord>
.
.
.
</CallDetailRecord>
<CallDetailRecord>
.
.
.
</CallDetailRecord>
希望等待一些不錯的成績:)!
這是一個巨大的代碼轉儲。 [如何創建一個最小,完整和可驗證的示例](https://stackoverflow.com/help/mcve) – CoryKramer 2014-09-01 16:17:05