2014-07-22 125 views
1

我被推薦使用其中一個API(Jena,OpenRDF或Protege)來轉換我從OpenIE4.1 jar文件生成的輸出(可從http://knowitall.github.io/openie/下載)。下面是樣本OpenIE4.1輸出格式:置信度得分隨後主語,謂語,賓語三重使用RDF API(Jena,OpenRDF或Protege)轉換OpenIE輸出

The rail launchers are conceptually similar to the underslung SM-1 
    0.93 (The rail launchers; are; conceptually similar to the underslung SM-1) 

我打算產生遵循這個模式從上述輸出三元組(事實上,數百上述輸出已通過處理組自由文本文檔生成的,只有信心得分比一定值時將被處理):

鑑於

subject: The rail launchers 
    predicate: are 
    object: conceptually similar to the underslung SM-1 

(置信度得分被忽略)

  1. 創建爲每個不同的一個空節點標識符:受試者在文件中(我們稱之爲bnode_s)
  2. 創建爲每個不同的一個空節點標識符:對象的文件中(我們稱之爲bnode_o)
  3. 定義一個URI對於每個不同的謂詞

BTW,我也有產生比三胞胎更多的輸出,例如 約翰送給瑪麗一件生日禮物 (約翰福音;放棄;瑪麗;生日禮物) 這對產品RDF三聯體來說更爲複雜。

但是,我不熟悉上面提到的任何API,也不知道API可能採用的輸入格式。

+0

我看到了你以前的問題,但這裏沒有足夠的自足。例如,你說你想要「爲文件中的每個不同主體創建一個空白節點標識符」,但是你沒有說明「文件中的不同:主體」是什麼。由於您在詢問如何使用其中一個庫生成某些特定輸出,因此首先生成一些示例輸出。例如,你給我們看的句子應該產生的RDF是什麼?是(......; ...; ...)應該映射到三元組,還是僅僅是這個特定數據的幸運事故? –

+0

你想要輸出或多或少像http://pastebin.com/sFwfqLFM? –

+0

我的意思是應該爲上例中的句子生成的RDF。 –

回答

1
import org.apache.jena.riot.Lang; 
import org.apache.jena.riot.RDFDataMgr; 

import com.hp.hpl.jena.rdf.model.Model; 
import com.hp.hpl.jena.rdf.model.ModelFactory; 
import com.hp.hpl.jena.rdf.model.Property; 
import com.hp.hpl.jena.rdf.model.Resource; 
import com.hp.hpl.jena.util.URIref; 
import com.hp.hpl.jena.vocabulary.RDF; 
import com.hp.hpl.jena.vocabulary.RDFS; 
import com.hp.hpl.jena.vocabulary.XSD; 

public class OpenIETripletConversionExample { 
    public static void main(String[] args) { 
     // Create the model and define some prefixes (for nice serialization in RDF/XML and TTL) 
     Model model = ModelFactory.createDefaultModel(); 
     String NS = "http://stackoverflow.com/q/24897405/1281433/"; 
     model.setNsPrefix("", NS); 
     model.setNsPrefix("rdf", RDF.getURI()); 
     model.setNsPrefix("xsd", XSD.getURI()); 
     model.setNsPrefix("rdfs", RDFS.getURI()); 

     // Preserve the confidence level (optional). 
     Property confidence = model.createProperty(NS+"confidence"); 

     // Define some triplets to convert. 
     Object[][] triplets = { 
       { 0.57, "The quick brown fox", "jumped", "over the lazy dog." }, 
       { 0.93, "The rail launchers", "are", "conceptually similar to the underslung SM-1." } 
     }; 

     // For each triplet, create a resource representing the sentence, as well as the subject, 
     // predicate, and object, and then add the triples to the model. 
     for (Object[] triplet : triplets) { 
      Resource statement = model.createResource(); 
      Resource subject = model.createResource().addProperty(RDFS.label, (String) triplet[1]); 
      Property predicate = model.createProperty(NS+URIref.encode((String) triplet[2])); 
      Resource object = model.createResource().addProperty(RDFS.label, (String) triplet[3]); 

      statement.addLiteral(confidence, triplet[0]); 
      statement.addProperty(RDF.subject, subject); 
      statement.addProperty(RDF.predicate, predicate); 
      statement.addProperty(RDF.object, object); 
     } 

     // Show the model in a few different formats. 
     RDFDataMgr.write(System.out, model, Lang.TTL); 
     RDFDataMgr.write(System.out, model, Lang.RDFXML); 
     RDFDataMgr.write(System.out, model, Lang.NTRIPLES); 
    } 
} 
@prefix :  <http://stackoverflow.com/q/24897405/1281433/> . 
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . 
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . 
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . 

[ rdf:object  [ rdfs:label "conceptually similar to the underslung SM-1." ] ; 
    rdf:predicate :are ; 
    rdf:subject [ rdfs:label "The rail launchers" ] ; 
    :confidence "0.93"^^xsd:double 
] . 

[ rdf:object  [ rdfs:label "over the lazy dog." ] ; 
    rdf:predicate :jumped ; 
    rdf:subject [ rdfs:label "The quick brown fox" ] ; 
    :confidence "0.57"^^xsd:double 
] . 
<rdf:RDF 
    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" 
    xmlns="http://stackoverflow.com/q/24897405/1281433/" 
    xmlns:xsd="http://www.w3.org/2001/XMLSchema#" 
    xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"> 
    <rdf:Description> 
    <rdf:object rdf:parseType="Resource"> 
     <rdfs:label>conceptually similar to the underslung SM-1.</rdfs:label> 
    </rdf:object> 
    <rdf:predicate rdf:resource="http://stackoverflow.com/q/24897405/1281433/are"/> 
    <rdf:subject rdf:parseType="Resource"> 
     <rdfs:label>The rail launchers</rdfs:label> 
    </rdf:subject> 
    <confidence rdf:datatype="http://www.w3.org/2001/XMLSchema#double" 
    >0.93</confidence> 
    </rdf:Description> 
    <rdf:Description> 
    <rdf:object rdf:parseType="Resource"> 
     <rdfs:label>over the lazy dog.</rdfs:label> 
    </rdf:object> 
    <rdf:predicate rdf:resource="http://stackoverflow.com/q/24897405/1281433/jumped"/> 
    <rdf:subject rdf:parseType="Resource"> 
     <rdfs:label>The quick brown fox</rdfs:label> 
    </rdf:subject> 
    <confidence rdf:datatype="http://www.w3.org/2001/XMLSchema#double" 
    >0.57</confidence> 
    </rdf:Description> 
</rdf:RDF> 
_:BX2D492663e1X3A1475ff7864cX3AX2D7ffe <http://www.w3.org/2000/01/rdf-schema#label> "The quick brown fox" . 
_:BX2D492663e1X3A1475ff7864cX3AX2D7ffc <http://www.w3.org/1999/02/22-rdf-syntax-ns#object> _:BX2D492663e1X3A1475ff7864cX3AX2D7ffa . 
_:BX2D492663e1X3A1475ff7864cX3AX2D7ffc <http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate> <http://stackoverflow.com/q/24897405/1281433/are> . 
_:BX2D492663e1X3A1475ff7864cX3AX2D7ffc <http://www.w3.org/1999/02/22-rdf-syntax-ns#subject> _:BX2D492663e1X3A1475ff7864cX3AX2D7ffb . 
_:BX2D492663e1X3A1475ff7864cX3AX2D7ffc <http://stackoverflow.com/q/24897405/1281433/confidence> "0.93"^^<http://www.w3.org/2001/XMLSchema#double> . 
_:BX2D492663e1X3A1475ff7864cX3AX2D7ffa <http://www.w3.org/2000/01/rdf-schema#label> "conceptually similar to the underslung SM-1." . 
_:BX2D492663e1X3A1475ff7864cX3AX2D7fff <http://www.w3.org/1999/02/22-rdf-syntax-ns#object> _:BX2D492663e1X3A1475ff7864cX3AX2D7ffd . 
_:BX2D492663e1X3A1475ff7864cX3AX2D7fff <http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate> <http://stackoverflow.com/q/24897405/1281433/jumped> . 
_:BX2D492663e1X3A1475ff7864cX3AX2D7fff <http://www.w3.org/1999/02/22-rdf-syntax-ns#subject> _:BX2D492663e1X3A1475ff7864cX3AX2D7ffe . 
_:BX2D492663e1X3A1475ff7864cX3AX2D7fff <http://stackoverflow.com/q/24897405/1281433/confidence> "0.57"^^<http://www.w3.org/2001/XMLSchema#double> . 
_:BX2D492663e1X3A1475ff7864cX3AX2D7ffd <http://www.w3.org/2000/01/rdf-schema#label> "over the lazy dog." . 
_:BX2D492663e1X3A1475ff7864cX3AX2D7ffb <http://www.w3.org/2000/01/rdf-schema#label> "The rail launchers" . 
+0

謝謝你的例子。你使用的是什麼耶拿API版本? 2.11.2? –

+0

不確定,但我希望這可以與最新版本一起使用。 –

+0

@HappyJubilee爲什麼確切的版本很重要,但?你有沒有試過這個代碼,它不工作?如果是這樣,你有什麼錯誤問題? –

0

爲了完整(因爲OP問幾個API)來我重複@Joshua泰勒的解決方案,但使用OpenRDF芝麻API,而不是耶拿:

import java.io.UnsupportedEncodingException; 
import java.net.URLEncoder; 

import org.openrdf.model.Model; 
import org.openrdf.model.Resource; 
import org.openrdf.model.URI; 
import org.openrdf.model.ValueFactory; 
import org.openrdf.model.impl.LinkedHashModel; 
import org.openrdf.model.impl.ValueFactoryImpl; 
import org.openrdf.model.util.Literals; 
import org.openrdf.model.vocabulary.RDF; 
import org.openrdf.model.vocabulary.RDFS; 
import org.openrdf.model.vocabulary.XMLSchema; 
import org.openrdf.rio.RDFFormat; 
import org.openrdf.rio.RDFHandlerException; 
import org.openrdf.rio.Rio; 

public class OpenIETripletConversionExample { 
     public static void main(String[] args) throws UnsupportedEncodingException, RDFHandlerException { 
      // Create the model and define some prefixes (for nice serialization in RDF/XML and TTL) 
      Model model = new LinkedHashModel(); 
      String NS = "http://stackoverflow.com/q/24897405/1281433/"; 
      model.setNamespace("rdf", RDF.NAMESPACE); 
      model.setNamespace("rdfs", RDFS.NAMESPACE); 
      model.setNamespace("xsd", XMLSchema.NAMESPACE); 
      model.setNamespace("", NS); 

      // Create a ValueFactory we can use to create resources and statements 
      ValueFactory vf = ValueFactoryImpl.getInstance(); 

      // Preserve the confidence level (optional). 
      URI confidence = vf.createURI(NS, "confidence"); 

      // Define some triplets to convert. 
      Object[][] triplets = { 
        { 0.57, "The quick brown fox", "jumped", "over the lazy dog." }, 
        { 0.93, "The rail launchers", "are", "conceptually similar to the underslung SM-1." } 
      }; 

      // For each triplet, create a resource representing the sentence, as well as the subject, 
      // predicate, and object, and then add the triples to the model. 
      for (Object[] triplet : triplets) { 
       Resource sentence = vf.createBNode(); 
       Resource subject = vf.createBNode(); 
       URI predicate = vf.createURI(NS, URLEncoder.encode((String) triplet[2], "utf-8")); 
       Resource object = vf.createBNode(); 

       model.add(subject, RDFS.LABEL, Literals.createLiteral(vf, triplet[1])); 
       model.add(object, RDFS.LABEL, Literals.createLiteral(vf, (String)triplet[3])); 

       model.add(sentence, confidence, Literals.createLiteral(vf, triplet[0])); 
       model.add(sentence, RDF.SUBJECT, subject); 
       model.add(sentence, RDF.PREDICATE, predicate); 
       model.add(sentence, RDF.OBJECT, object); 
      } 

      // Show the model in a few different formats. 
      Rio.write(model, System.out, RDFFormat.TURTLE); 
      Rio.write(model, System.out, RDFFormat.RDFXML); 
      Rio.write(model, System.out, RDFFormat.NTRIPLES); 

     } 
}