2014-09-01 19 views
-4

我已經編寫了用於解析文件的python腳本。如何使用python解析具有多個頂級元素的文件

python腳本:

from xml.dom.minidom import parse 

import xml.dom.minidom 

DOMTree = xml.dom.minidom.parse("details.xml") 

CallDetailRecord = DOMTree.documentElement 

def getText(data): 

     detail = str(data) 
     #match = re.search(r'(.*\s)(false).*|(.*\s)(true).*',detail,re.IGNORECASE) 
     match_false = re.search(r'(.*\s)(false).*',detail,re.IGNORECASE) 
     if (match_false): 
       return match_false.group(2) 
     match_true = re.search(r'(.*\s)(true).*',detail,re.IGNORECASE) 
     if (match_true): 
       return match_true.group(2) 



org_addr = CallDetailRecord.getElementsByTagName("origAddress") 

for record in org_addr: 

     ton_1 = record.getElementsByTagName("ton")[0] 
     npi_1 = record.getElementsByTagName("npi")[0] 
     pid_1 = record.getElementsByTagName("pid")[0] 
     msdn_1 = record.getElementsByTagName("msisdn")[0] 
     org_ton = ton_1.childNodes[0].data 
     org_npi = npi_1.childNodes[0].data 
     org_pid = pid_1.childNodes[0].data 
     org_msdn = msdn_1.childNodes[0].data 

recp_addr = CallDetailRecord.getElementsByTagName("recipAddress") 

for record in recp_addr: 
     ton_1 = record.getElementsByTagName("ton")[0] 
     npi_1 = record.getElementsByTagName("npi")[0] 
     pid_1 = record.getElementsByTagName("pid")[0] 
     msdn_1 = record.getElementsByTagName("msisdn")[0] 
     rec_ton = ton_1.childNodes[0].data 
     rec_npi = npi_1.childNodes[0].data 
     rec_pid = pid_1.childNodes[0].data 
     rec_msdn = msdn_1.childNodes[0].data 

dgti_addr = CallDetailRecord.getElementsByTagName("dgtiAddress") 

for record in dgti_addr: 

     ton_1 = record.getElementsByTagName("ton")[0] 
     npi_1 = record.getElementsByTagName("npi")[0] 
     pid_1 = record.getElementsByTagName("pid")[0] 
     msdn_1 = record.getElementsByTagName("msisdn")[0] 
     dgti_ton = ton_1.childNodes[0].data 
     dgti_npi = npi_1.childNodes[0].data 
     dgti_pid = pid_1.childNodes[0].data 
     dgti_msdn = msdn_1.childNodes[0].data 

calling_line_id = CallDetailRecord.getElementsByTagName("callingLineId") 

for record in calling_line_id: 

     ton_1 = record.getElementsByTagName("ton")[0] 
     npi_1 = record.getElementsByTagName("npi")[0] 
     pid_1 = record.getElementsByTagName("pid")[0] 
     msdn_1 = record.getElementsByTagName("msisdn")[0] 
     clid_ton = ton_1.childNodes[0].data 
     clid_npi = npi_1.childNodes[0].data 
     clid_pid = pid_1.childNodes[0].data 
     clid_msdn = msdn_1.childNodes[0].data 

untransl_OrigAddress = CallDetailRecord.getElementsByTagName("untranslOrigAddress") 


sub_time = CallDetailRecord.getElementsByTagName("submitTime")[0] 

if(sub_time): 

     sub_time_value = sub_time.childNodes[0].data 
     print " \n SUBMIT TIME: %s \n" %sub_time_value 

sub_date = CallDetailRecord.getElementsByTagName("submitDate")[0] 

if(sub_date): 

     sub_date_value = sub_date.childNodes[0].data 
     print " \n SUBMIT DATE: %s\n" %sub_time_value 

termin_time = CallDetailRecord.getElementsByTagName("terminTime")[0] 

if(termin_time): 

     termin_time_value = termin_time.childNodes[0].data 
     print " \n TERMIN TIME: %s \n" %termin_time_value 

termin_date = CallDetailRecord.getElementsByTagName("terminDate")[0] 

if(termin_date): 

     termin_date_value = termin_date.childNodes[0].data 
     print " \n TERMIN DATE: %s\n" %termin_time_value 

status = CallDetailRecord.getElementsByTagName("status")[0] 

if(status): 

     status_value = status.childNodes[0].data 
     print " \n STATUS: %s\n" %status_value 

msglength = CallDetailRecord.getElementsByTagName("lengthOfMessage")[0] 

if(msglength): 
     msglength_value = msglength.childNodes[0].data 
     print " \n MESSAGE LENGTH: %s\n" %msglength_value 

prioIndicator = CallDetailRecord.getElementsByTagName("prioIndicator")[0] 

if (prioIndicator): 

     #print prioIndicator.childNodes[0].data 
     prioIndicator_value = getText(prioIndicator.childNodes[0]) 
     print " \n PRIO INDICATOR: %s\n" %prioIndicator_value 

縮小尺寸,我沒有張貼我的整個腳本。

輸入文件:

<CallDetailRecord> 
<origAddress> 
    <ton>international</ton> 
    <npi>telephone</npi> 
    <pid>plmn</pid> 
    <msisdn>32410000</msisdn> 
</origAddress> 
<recipAddress> 
    <ton>international</ton> 
    <npi>telephone</npi> 
     <pid>plmn</pid> 
     <msisdn>918337807718</msisdn> 
</recipAddress> 
<submitDate>14-08-20</submitDate> 
<submitTime>19:36:29</submitTime> 
<status>deleted</status> 
<terminDate>14-08-23</terminDate> 
<terminTime>19:51:52</terminTime> 
<lengthOfMessage>38</lengthOfMessage> 
<prioIndicator><false/></prioIndicator> 
<deferIndicator><true/></deferIndicator> 
<notifIndicator><false/></notifIndicator> 
<recipIntlMobileSubId>26204487</recipIntlMobileSubId> 
<callingLineId> 
    <ton>international</ton> 
     <npi>telephone</npi> 
     <pid>plmn</pid> 
     <msisdn>32410000</msisdn> 
</callingLineId> 
<smsContentDcs>0</smsContentDcs> 
<messageReference>13</messageReference> 
<deliveryAttempts>151</deliveryAttempts> 
<untranslOrigAddress> 
    <ton>international</ton> 
     <npi>telephone</npi> 
     <pid>plmn</pid> 
     <msisdn>32410000</msisdn> 
</untranslOrigAddress> 
<tpDCS>0</tpDCS> 
<genericUrgencyLevel>bulk</genericUrgencyLevel> 
<teleserviceId>4098</teleserviceId> 
<recipNetworkType>gsm</recipNetworkType> 
<rbdlFlags1> 
    10000000000000000000000000000000 
</rbdlFlags1> 
</CallDetailRecord> 

腳本工作正常,此文件。但是,假設考慮我有多個
CallDetailRecord>,那麼如何解析該文件。

例:

<CallDetailRecord> 
    . 
    . 
    . 
</CallDetailRecord> 
<CallDetailRecord> 
    . 
    . 
    . 
</CallDetailRecord> 
<CallDetailRecord> 
    . 
    . 
    . 
</CallDetailRecord> 

希望等待一些不錯的成績:)!

+2

這是一個巨大的代碼轉儲。 [如何創建一個最小,完整和可驗證的示例](https://stackoverflow.com/help/mcve) – CoryKramer 2014-09-01 16:17:05

回答

0

使用包裝類來解析此文件。包住文件,其中包含多個頂端元件到像這樣

<包裝的包裝>

#your file 

< /包裝>

,然後開始解析與根元素的文件。解析器將使用包含文件中所有元素的根元素包裝器構造一個文檔。

相關問題