2017-09-14 30 views
0

作爲一個例子,我從斯坦福分析器得到以下解析樹。我如何提取像S和SBAR這樣的標籤以最終提取子句。我嘗試了一小段代碼(這顯然是不正確的)作爲一個起點,使用了不同的Tree方法,但沒有給出我想要的結果。解析節點標籤以可能地從句法樹中提取子句

代碼:

for (Tree subtree: parseTree.getLeaves()){ 
      if (subtree.label().equals("S")||subtree.label().equals("SBAR")) 
       System.out.println("SUBTREE:::"+"\t"+ subtree.getLeaves()); 
     } 

解析樹:

(ROOT 
     (S 
     (NP 
      (NP (DT A) (NNP Bristol) (NN hospital)) 
      (SBAR 
      (WHNP (WDT that)) 
      (S 
       (VP (VBD retained) 
       (NP 
        (NP (DT the) (NNS hearts)) 
        (PP (IN of) 
        (NP 
         (NP (CD 300) (NNS children)) 
         (SBAR 
         (WHNP (WP who)) 
         (S 
          (VP (VBD died) 
          (PP (IN in) 
           (NP (JJ complex) (NNS operations))))))))))))) 
     (VP (VBD behaved) 
      (ADVP (IN in) (DT a)) 
      ('' '') 
      (S 
      (VP (VBG cavalier) ('' '') 
       (NP (NN fashion)))) 
      (PP (IN towards) 
      (NP (DT the) (NNS parents)))) 
     (. .))) 

回答

0

這裏是通過一棵樹去,並找到SSBAR一些示例代碼:

package edu.stanford.nlp.examples; 

import edu.stanford.nlp.ling.*; 
import edu.stanford.nlp.pipeline.*; 
import edu.stanford.nlp.trees.*; 

import java.util.*; 

public class FindSAndSBARInTreeExample { 

    public static void findSAndSBAR(Tree tree) { 
    for (Tree subtree : tree.getChildrenAsList()) { 
     if (subtree.label().value().equals("S") || subtree.label().value().equals("SBAR")) { 
     System.out.println("---"); 
     System.out.println(subtree.yieldWords()); 
     } 
     findSAndSBAR(subtree); 
    } 
    } 

    public static void main(String[] args) { 
    // set up pipeline properties 
    Properties props = new Properties(); 
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse"); 
    // use faster shift reduce parser 
    props.setProperty("parse.model", "edu/stanford/nlp/models/srparser/englishSR.ser.gz"); 
    props.setProperty("parse.maxlen", "100"); 
    // set up Stanford CoreNLP pipeline 
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props); 
    // build annotation for a review 
    Annotation annotation = 
     new Annotation(
      "A Bristol hospital that retained the hearts of 300 children who died in " + 
       "complex operations behaved in a \"cavalier fashion\" towards the parents"); 
    // annotate 
    pipeline.annotate(annotation); 
    // get tree 
    Tree tree = 
     annotation.get(CoreAnnotations.SentencesAnnotation.class).get(0).get(TreeCoreAnnotations.TreeAnnotation.class); 
    System.out.println(tree); 
    // find S and SBAR 
    findSAndSBAR(tree); 
    } 
} 
0

另一種方式這是否使用Tregex。以下是一些示例代碼:

package edu.stanford.nlp.examples; 

import edu.stanford.nlp.ling.*; 
import edu.stanford.nlp.pipeline.*; 
import edu.stanford.nlp.trees.*; 
import edu.stanford.nlp.trees.tregex.*; 
import edu.stanford.nlp.util.*; 

import java.util.*; 

public class TregexUsageExample { 

    public static void main(String[] args) { 
    // set up pipeline 
    Properties props = new Properties(); 
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse"); 
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props); 
    // Spanish example 
    Annotation annotation = 
     new Annotation(
      "A Bristol hospital that retained the hearts of 300 children who died in " + 
       "complex operations behaved in a \"cavalier fashion\" towards the parents"); 
    pipeline.annotate(annotation); 
    // get first sentence 
    CoreMap firstSentence = annotation.get(CoreAnnotations.SentencesAnnotation.class).get(0); 
    Tree firstSentenceTree = firstSentence.get(TreeCoreAnnotations.TreeAnnotation.class); 
    // use Tregex to match 
    String SorSBARPattern = "/SBAR|^S$/"; 
    TregexPattern SorSBARTregexPattern = TregexPattern.compile(SorSBARPattern); 
    TregexMatcher SorSBARTregexMatcher = SorSBARTregexPattern.matcher(firstSentenceTree); 
    while (SorSBARTregexMatcher.find()) { 
     SorSBARTregexMatcher.getMatch().pennPrint(); 
    } 
    } 
} 
+0

事實上,我會建議在我的原始答案中使用Tregex。 – StanfordNLPHelp