2017-06-22 64 views
0

我有以下代碼:如何訪問xml中下一個標記的文本內容?

public String depRel() throws SAXException, IOException, 
     ParserConfigurationException, ClassNotFoundException, 
     ClassCastException { 
    String xmlString = Features.dependencyGraph(); 
    ; 

    String result = ""; 
    String dependent = ""; 
    String governor = ""; 
    String type = ""; 

    // System.out.println("A value is :" + xmlString); 
    // aici il convertesc ca sa il pot citi ca si xml 
    Document document = convertStringToDocument(xmlString); 
    document.getDocumentElement().normalize(); 
    Element root = document.getDocumentElement(); 
    NodeList nList = document.getElementsByTagName("dependencies"); 
    for (int temp = 0; temp < nList.getLength(); temp++) { 
     Node node = nList.item(temp); 
     if (node.getNodeType() == Node.ELEMENT_NODE) { 
      Element eElement1 = (Element) node; 

     } 
     NodeList nodesDocPart = node.getChildNodes(); 
     for (int temp2 = 0; temp2 < nodesDocPart.getLength(); temp2++) { 

      Node n = nodesDocPart.item(temp2); 

      if (n.getNodeType() == Node.ELEMENT_NODE) { 
       Element el1 = (Element) n; 
       type = el1.getAttribute("type"); 
      } 

      // /////////////////////////////////////////////////sentence///////////////////////////////////////////// 
      NodeList nodesSentencePart = n.getChildNodes(); 
      for (int temp3 = 0; temp3 < nodesSentencePart.getLength(); temp3++) { 
       Node sentence = nodesSentencePart.item(temp3); 
       if (sentence.getNodeType() == Node.ELEMENT_NODE) { 

        Element eElement4 = (Element) sentence; 
        if (eElement4.getTagName().equals("dependent")) { 
         dependent = eElement4.getTextContent(); 
        } 
        if (eElement4.getTagName().equals("governor")) { 
         governor = eElement4.getTextContent(); 


enter code here 

而接下來的XML格式,它描述了一個依賴圖的句子。 這句話是:在用視黃酸或PMA刺激後,在純化的人單核細胞和巨噬細胞中,在U937前體細胞系中產生人類免疫缺陷病毒1型(HIV-1)後代。

<dependencies style="typed"> 
    <dep type="det"> 
    <governor idx="2">production</governor> 
    <dependent idx="1">The</dependent> 
    </dep> 
    <dep type="nsubjpass"> 
    <governor idx="14">followed</governor> 
    <dependent idx="2">production</dependent> 
    </dep> 
    <dep type="case"> 
    <governor idx="7">type</governor> 
    <dependent idx="3">of</dependent> 
    </dep> 
    <dep type="amod"> 
    <governor idx="7">type</governor> 
    <dependent idx="4">human</dependent> 
    </dep> 
    <dep type="compound"> 
    <governor idx="7">type</governor> 
    <dependent idx="5">immunodeficiency</dependent> 
    </dep> 
    <dep type="compound"> 
    <governor idx="7">type</governor> 
    <dependent idx="6">virus</dependent> 
    </dep> 
    <dep type="nmod:of"> 
    <governor idx="2">production</governor> 
    <dependent idx="7">type</dependent> 
    </dep> 
    <dep type="nummod"> 
    <governor idx="7">type</governor> 
    <dependent idx="8">1</dependent> 
    </dep> 
    <dep type="punct"> 
    <governor idx="10">HIV-1</governor> 
    <dependent idx="9">-LRB-</dependent> 
    </dep> 
    <dep type="appos"> 
    <governor idx="7">type</governor> 
    <dependent idx="10">HIV-1</dependent> 
    </dep> 
    <dep type="punct"> 
    <governor idx="10">HIV-1</governor> 
    <dependent idx="11">-RRB-</dependent> 
    </dep> 
    <dep type="dep"> 
    <governor idx="7">type</governor> 
    <dependent idx="12">progeny</dependent> 
    </dep> 
    <dep type="auxpass"> 
    <governor idx="14">followed</governor> 
    <dependent idx="13">was</dependent> 
    </dep> 
    <dep type="case"> 
    <governor idx="20">line</governor> 
    <dependent idx="15">in</dependent> 
    </dep> 
    <dep type="det"> 
    <governor idx="20">line</governor> 
    <dependent idx="16">the</dependent> 
    </dep> 
    <dep type="compound"> 
    <governor idx="20">line</governor> 
    <dependent idx="17">U937</dependent> 
    </dep> 
    <dep type="amod"> 
    <governor idx="20">line</governor> 
    <dependent idx="18">promonocytic</dependent> 
    </dep> 
    <dep type="compound"> 
    <governor idx="20">line</governor> 
    <dependent idx="19">cell</dependent> 
    </dep> 
    <dep type="nmod:in"> 
    <governor idx="14">followed</governor> 
    <dependent idx="20">line</dependent> 
    </dep> 
    <dep type="case"> 
    <governor idx="22">stimulation</governor> 
    <dependent idx="21">after</dependent> 
    </dep> 
    <dep type="nmod:after"> 
    <governor idx="14">followed</governor> 
    <dependent idx="22">stimulation</dependent> 
    </dep> 
    <dep type="dep"> 
    <governor idx="26">acid</governor> 
    <dependent idx="23">either</dependent> 
    </dep> 
    <dep type="case"> 
    <governor idx="26">acid</governor> 
    <dependent idx="24">with</dependent> 
    </dep> 
    <dep type="amod"> 
    <governor idx="26">acid</governor> 
    <dependent idx="25">retinoic</dependent> 
    </dep> 
    <dep type="nmod:with"> 
    <governor idx="22">stimulation</governor> 
    <dependent idx="26">acid</dependent> 
    </dep> 
    <dep type="cc"> 
    <governor idx="26">acid</governor> 
    <dependent idx="27">or</dependent> 
    </dep> 
    <dep type="nmod:with"> 
    <governor idx="22">stimulation</governor> 
    <dependent idx="28">PMA</dependent> 
    </dep> 
    <dep type="conj:or"> 
    <governor idx="26">acid</governor> 
    <dependent idx="28">PMA</dependent> 
    </dep> 
    <dep type="punct"> 
    <governor idx="14">followed</governor> 
    <dependent idx="29">,</dependent> 
    </dep> 
    <dep type="cc"> 
    <governor idx="14">followed</governor> 
    <dependent idx="30">and</dependent> 
    </dep> 
    <dep type="case"> 
    <governor idx="34">monocytes</governor> 
    <dependent idx="31">in</dependent> 
    </dep> 
    <dep type="amod"> 
    <governor idx="34">monocytes</governor> 
    <dependent idx="32">purified</dependent> 
    </dep> 
    <dep type="amod"> 
    <governor idx="34">monocytes</governor> 
    <dependent idx="33">human</dependent> 
    </dep> 
    <dep type="conj:and"> 
    <governor idx="14">followed</governor> 
    <dependent idx="34">monocytes</dependent> 
    </dep> 
    <dep type="cc"> 
    <governor idx="34">monocytes</governor> 
    <dependent idx="35">and</dependent> 
    </dep> 
    <dep type="conj:and"> 
    <governor idx="14">followed</governor> 
    <dependent idx="36">macrophages</dependent> 
    </dep> 
    <dep type="conj:and"> 
    <governor idx="34">monocytes</governor> 
    <dependent idx="36">macrophages</dependent> 
    </dep> 
    <dep type="punct"> 
    <governor idx="14">followed</governor> 
    <dependent idx="37">.</dependent> 
     </dep> 

如果我的標籤「州長」我怎麼可以訪問標記「依賴」?因爲我想獲得一個word.How我可以把它所有的州長和所有家屬?

+0

'我想獲得的所有word'州長和所有的家屬,是什麼這裏有個詞嗎?是「省長」節點的文本嗎? – SomeDude

+0

這個詞是從,我將parse.I必須保留判決也和從句子中的每個字的找州長及家屬 – Nadd

回答

0

看來你想收集governor/dependent/word s。 你可以使用下面的代碼來獲得這種類的集合 - 我稱之爲GovernorDependentNode

class GovernorDependentNode 
{ 
    Node governor; 
    Node dependent; 
    String word; 
} 

List<GovernorDependentNode> getNodes(String word, InputSource is) 
{ 
    List<GovernorDependentNode> gdNodes = new ArrayList<GovernorDependentNode>(); 
    try 
    { 

     Object govs = XPathFactory.newInstance().newXPath().evaluate("//dep/governor[.='" + word + "']", is, XPathConstants.NODESET); 
     if (govs != null) 
     { 
      NodeList gNodes = (NodeList)govs; 
      for (int i = 0; i < gNodes.getLength(); i++) 
      { 
       GovernorDependentNode gdNode = new GovernorDependentNode(); 
       Node gNode = gNodes.item(i); 
       gdNode.governor = gNode; 
       gdNode.word = word; 
       NodeList childNodes = gNode.getParentNode().getChildNodes(); 
       for (int j = 0; j < childNodes.getLength(); j++) 
       { 
        Node n = childNodes.item(j); 
        if (n.getNodeName().equals("dependent")) 
        { 
         gdNode.dependent = n; 
         break; 
        } 
       } 
       gdNodes.add(gdNode); 

      } 
     } 
    } 
    catch (Exception e) 
    { 
     e.printStackTrace(); 
    } 

    return gdNodes; 
} 

使用類似方法:

InputSource is = new InputSource(new StringReader(xmlString)); 
List<GovernorDependentNode> nodes = getNodes("yourWord", is); 

的方法getNodes首先使用獲取給定字governor節點xpath://dep/governor[.='word']

可能有幾個,例如following這個詞有9個節點,所以應該爲它們中的每一個節點獲得dependent節點,並且使用信息 - 調控器,從屬節點和給定詞來構造一個類。

爲了打印節點列表,你可以使用:

List<GovernorDependentNode> nodes = getNodes("followed", inputSource); 
for (GovernorDependentNode node : nodes) 
{ 
     System.out.println("Word : " + node.word); 
     System.out.println("Governor : " + node.governor.getTextContent()); 
     System.out.println("Dependent : " + node.dependent.getTextContent()); 

} 

輸出是:

Word : followed 
Governor : followed 
Dependent : production 
Word : followed 
Governor : followed 
Dependent : was 
Word : followed 
Governor : followed 
Dependent : line 
Word : followed 
Governor : followed 
Dependent : stimulation 
Word : followed 
Governor : followed 
Dependent : , 
Word : followed 
Governor : followed 
Dependent : and 
Word : followed 
Governor : followed 
Dependent : monocytes 
Word : followed 
Governor : followed 
Dependent : macrophages 
Word : followed 
Governor : followed 
Dependent : . 
+0

我不應該在解析XML文件的xmlString句子中的單詞CURENT呢?因爲當我打電話像輸入源的梅託德是=新的InputSource(新StringReader(的xmlString))編譯器知道的xmlString不是XML?這僅僅是XML格式 – Nadd

+0

一個String,編譯器不知道這是否是XML或不。如果字符串不是xml格式,則XPathFactory.newInstance()。newXPath()。evaluate')行將引發異常。在將它傳遞給方法之前,您可以檢查字符串是否爲xml。 – SomeDude

+0

當運行此代碼的異常出現在這一行:。\t \t \t \t \t \t \t \t對象GOVS = XPathFactory.newInstance()newXPath()評估( 「// DEP /調速[=」「 + +字「']」,是,XPathConstants.NODESET); – Nadd

相關問題