2017-03-13 61 views
1

我已經解析了這個XML文件。也許我只是沒有抄好,但它的確定,所以,在這裏它是:xml列表的Python函數

 <?xml version="1.0" encoding="UTF-8"?> 
     <!DOCTYPE raml SYSTEM 'raml20.dtd'> 
     <raml version="2.0" xmlns="raml20.xsd"> 
     <cmData type="actual"> 
      <managedObject class="LN" distName="PTR" id="2425"> 
       <p name="aak">220</p> 
       <p name="orp">05</p> 
       <p name="name">Portro</p> 
       <p name="optres">false</p> 
       <p name="optblu">false</p> 
       <p name="aoptdet">false</p> 
       <p name="advcell">false</p> 
       <list name="sibList"> 
       <item> 
        <p name="sibcity">177</p> 
        <p name="sibrep">2</p> 
       </item> 
       <item> 
        <p name="sibcity">177</p> 
        <p name="sibrep">1</p> 
       </item> 
       </list> 
      </managedObject> 
      <managedObject class="LN" distName="KRNS" id="93886"> 
       <p name="aak">150</p> 
       <p name="orp">05</p> 
       <p name="name">Portro</p> 
       <p name="optres">false</p> 
       <p name="optblu">tru</p> 
       <p name="aoptdet">false</p> 
       <p name="advcell">true</p> 
       <list name="sibList"> 
       <item> 
        <p name="sibcity">177</p> 
        <p name="sibrep">1</p> 
       </item> 
       <item> 
        <p name="sibcity">180</p> 
        <p name="sibrep">2</p> 
       </item> 
       </list> 
      </managedObject> 
      .... 
      <managedObject> 
      ... 
      </managedObject> 

      ... 
     </cmData> 
     </raml> 

我需要從第一managedObject經過的所有「managedObject」,並比較各參數(P名)與參數(AAK ,orp等)從另一個managedObjects獲取,並獲取它們的不同參數和值的輸出,如果沒有不同的參數值,則什麼也不做。我編寫了比較代碼,但我不知道如何通過列表(它被命名爲「sibList」)並比較參數。我寫了這個功能,其中關鍵是「P名」和值「P名」的價值:

temp = [] 
for i in temp_ln: 
    for j, k in zip(i.getchildren(), i): 
     temp.append([i.get('distName'), j.get('name'), j.text]) 

    tempdict = {} 
    for i in temp_ln: 
     td = {} 
     for j in i.getchildren(): 
      td.update({j.get('name'): j.text}) 
     tempdict.update({i.get('distName'): td}) 


elements_list = {} 
    if j.get('name') == 'sibList': 
      for item in j.getchildren(): 
       for w in item.getchildren(): 
        elements_list.update({ w.get('name'): w.text}) 

     main_dif = {} 
     for key, value in tempdict.iteritems(): 
      dif_k = {} 
      for k, v in value.iteritems(): 
       try: 
        a = ref[k] 
       except: 
        a = None 
       if v != a: 
        if k == 'name': 
         pass 
        else: 
         dif_k.update({k:(v, a)}) 
      main_dif.update({key:dif_k}) 
+0

你的問題很難理解。嗯,基本上你想遍歷XML並在所有'managedObject'節點上工作?你嘗試過'lxml'或'BeautifulSoup'嗎? – techouse

+0

是的,我試過了。 我已更新我的代碼。 但現在我無法將sibList聲明爲特定的managedObject。 最後,我需要帶有managedObjects的excel文件作爲列,參數作爲行。值將是文本,例如:220,05,Portro等 – jovicbg

+0

而且我需要提及的是,我使用了etree解析器。 @techouse – jovicbg

回答

1

下面是解析XML文件的解決方案,每個managedObject與所有其他人進行比較,並打印出由此產生的diff對象。

import json 
from xml.etree import ElementTree 


tree = ElementTree.parse('raml20.xml') 

ns = {'ns': 'raml20.xsd'} 
nsP, nsList, nsItem = ('{%s}%s' % (ns['ns'], i) for i in ('p', 'list', 'item')) 


def pkv(o): 
    """Return dict with name:text of p elements""" 
    return {k.attrib['name']: k.text for k in o.iter(nsP)} 


def parse(tree): 
    root = tree.getroot() 
    objs = {} 
    for mo in root.findall('./ns:cmData/ns:managedObject', ns): 
     obj = pkv(mo) 
     for i in mo.iter(nsList): 
      obj[i.attrib['name']] = [pkv(j) for j in i.iter(nsItem)] 
     objs[mo.attrib['distName']] = obj 
    return objs 


def diff_dicts(d1, d2, ignore_keys=set()): 
    """Return dict with differences between the dicts provided as arguments""" 
    k1 = set(d1.keys()) 
    k2 = set(d2.keys()) 
    diff = {} 
    diff.update(
     {i: (d1[i], d2[i]) for i in (k1 & k2) - ignore_keys if d1[i] != d2[i]}) 
    diff.update({i: (d1.get(i), d2.get(i)) for i in (k1^k2) - ignore_keys}) 
    return diff 


def diff_lists(l1, l2): 
    """Return dict with differences between lists of dicts provided as arguments""" 
    diff = {} 
    # note: assumes that lists are of same length 
    for i, (d1, d2) in enumerate(zip(l1, l2)): 
     d = diff_dicts(d1, d2) 
     if d: 
      diff[i] = d 
    return diff 


def diff_objects(o1, o2): 
    """Return dict with differences between two objects (dicts) provided as arguments""" 
    listkeys = set(
     i for o in (o1, o2) for i in o if isinstance(o.get(i), list)) 
    diff = diff_dicts(o1, o2, listkeys) 
    for i in listkeys: 
     if i in o1 and i in o2: 
      diff.update({i: diff_lists(o1[i], o2[i])}) 
     else: 
      diff.update({i: (o1.get(i), o2.get(i))}) 
    return diff 


def compare_objects(objs): 
    diffs = [] 
    keys = list(objs) 
    for k1, k2 in zip(keys[:-1], keys[1:]): 
     o1, o2 = objs[k1], objs[k2] 
     diff = diff_objects(o1, o2) 
     if diff: 
      diffs.append((k1, k2, diff)) 
    return diffs 


res = compare_objects(parse(tree)) 
print(json.dumps(res, indent=2)) 

我用下面的raml20.xml文件測試:

<?xml version="1.0" encoding="UTF-8"?> 
<!DOCTYPE raml SYSTEM 'raml20.dtd'> 
<raml version="2.0" xmlns="raml20.xsd"> 
    <cmData type="actual"> 
    <managedObject class="LN" distName="PTR" id="2425"> 
     <p name="aak">220</p> 
     <p name="orp">05</p> 
     <p name="name">Portro</p> 
     <p name="optres">false</p> 
     <p name="optblu">false</p> 
     <p name="aoptdet">false</p> 
     <p name="advcell">false</p> 
     <list name="sibList"> 
     <item> 
      <p name="sibcity">177</p> 
      <p name="sibrep">2</p> 
     </item> 
     <item> 
      <p name="sibcity">177</p> 
      <p name="sibrep">1</p> 
     </item> 
     </list> 
    </managedObject> 
    <managedObject class="LN" distName="KRNS" id="93886"> 
     <p name="aak">150</p> 
     <p name="orp">05</p> 
     <p name="name">Portro</p> 
     <p name="optres">false</p> 
     <p name="optblu">tru</p> 
     <p name="aoptdet">false</p> 
     <p name="advcell">true</p> 
     <list name="sibList"> 
     <item> 
      <p name="sibcity">177</p> 
      <p name="sibrep">1</p> 
     </item> 
     <item> 
      <p name="sibcity">180</p> 
      <p name="sibrep">2</p> 
     </item> 
     </list> 
    </managedObject> 
    </cmData> 
</raml> 

產生的差異對象是:

[ 
    [ 
    "PTR", 
    "KRNS", 
    { 
     "advcell": [ 
     "false", 
     "true" 
     ], 
     "optblu": [ 
     "false", 
     "tru" 
     ], 
     "sibcity": [ 
     "177", 
     "180" 
     ], 
     "aak": [ 
     "220", 
     "150" 
     ], 
     "sibrep": [ 
     "1", 
     "2" 
     ], 
     "sibList": { 
     "0": { 
      "sibrep": [ 
      "2", 
      "1" 
      ] 
     }, 
     "1": { 
      "sibcity": [ 
      "177", 
      "180" 
      ], 
      "sibrep": [ 
      "1", 
      "2" 
      ] 
     } 
     } 
    } 
    ] 
] 
+0

謝謝你;)@ jcbsv – jovicbg

+0

@jovicbg不客氣。如果答案解決了您的問題,請將其標記爲已接受的答案。 – jcbsv

+0

這太好了。 現在我需要比較所有managedObjects與一個,不同的。 – jovicbg