2013-10-29 92 views
2

我寫了兩個java類,應該將* .nt文件轉換爲neo4j數據庫。第一個使用的loadTriples()的SPARQL-插件
https://github.com/neo4j-contrib/sparqlplugin/blob/master/src/test/java/org/neo4j/server/plugin/sparql/BerlinDatasetTest.java 該函數是第一類的源代碼:Neo4j SPARQL查詢停止工作

package src; 

import com.tinkerpop.blueprints.TransactionalGraph; 
import com.tinkerpop.blueprints.impls.neo4j.Neo4jGraph; 
import com.tinkerpop.blueprints.oupls.sail.GraphSail; 
import com.tinkerpop.blueprints.util.wrappers.batch.BatchGraph; 
import com.tinkerpop.blueprints.util.wrappers.batch.VertexIDType; 
import java.io.File; 
import java.net.URI; 
import java.net.URL; 
import org.openrdf.repository.sail.SailRepository; 
import org.openrdf.repository.sail.SailRepositoryConnection; 
import org.openrdf.rio.RDFFormat; 
import org.openrdf.rio.helpers.BasicParserSettings; 
import org.openrdf.sail.Sail; 


public class QUADParser2 { 

    private File f; 
    private String DB_PATH = "db/graphdb_qp2"; 


    public QUADParser2(File input_file) { 
     this.f = input_file; 

    } 


    public void parseFile() throws Exception { 

     Neo4jGraph neo4jGraph = new Neo4jGraph(DB_PATH); 
     int FST_INDEX = 1; // buffer size must be positive 
     BatchGraph<TransactionalGraph> neo = new BatchGraph<TransactionalGraph>(neo4jGraph, VertexIDType.NUMBER, FST_INDEX); 
     Sail sail = new GraphSail(neo4jGraph); 
     sail.initialize(); 

     SailRepositoryConnection connection; 
     try 
     { 
      connection = new SailRepository(sail).getConnection(); 

      URI uri = f.toURI(); 
      URL url = uri.toURL(); 
      //URL url = getClass().getResource(f.getPath()); 
      System.out.println("Loading " + url + ": "); 

      connection.getParserConfig().addNonFatalError(BasicParserSettings.VERIFY_DATATYPE_VALUES); 
      connection.add(url, null, RDFFormat.NTRIPLES); 
      connection.commit(); 
      connection.close(); 
     } 
     catch (Exception e1) 
     { 
      e1.printStackTrace(System.out); 
     } 
     System.out.print("Done."); 
     sail.shutDown(); 
     neo.shutdown(); 

    } 


} 

第二類使用只是Neo4j的庫來轉換將* .nt文件轉換爲neo4j數據庫。這是it's源代碼:

package src; 

import java.io.BufferedReader; 
import java.io.File; 
import java.io.FileReader; 
import java.util.ArrayList; 
import java.util.Iterator; 
import org.neo4j.graphdb.DynamicRelationshipType; 
import org.neo4j.graphdb.GraphDatabaseService; 
import org.neo4j.graphdb.Node; 
import org.neo4j.graphdb.Relationship; 
import org.neo4j.graphdb.Transaction; 
import org.neo4j.graphdb.factory.GraphDatabaseFactory; 
import org.neo4j.tooling.GlobalGraphOperations; 


public class QUADParser41 { 
    GraphDatabaseService graphDb; 

    private File f; 

    private boolean init = false; 
    private String G_NAME = "N";//"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances"; 
    private String DB_PATH = "db/graphdb_qp41"; 


    public QUADParser41(String input_db_path, File input_file) { 
     this.DB_PATH = input_db_path; 
     this.f = input_file; 
    } 


    public String[] getTriple(String line) { 
     //parse a triple 
     String[] output; 
     // remove _._ 
     line = line.substring(0, line.length() - 1); 
     // remove _<_ 
     line = line.replace("<", ""); 
     // remove _"_ 
     line = line.replace("\"", ""); 

     // use _>_ to split the String into an array 
     output = line.split("> "); 

     output[2] = output[2].trim(); 

     return output; 
    } 




    public void parseFile() { 
     this.init = false; 

     graphDb = new GraphDatabaseFactory().newEmbeddedDatabase(this.DB_PATH); 

     ArrayList<String[]> triples = new ArrayList<String[]>(); 

     try { 
      BufferedReader bfr = new BufferedReader(new FileReader(f)); 

      // Create nodes 
      String current = ""; 
      while ((current = bfr.readLine()) != null) { 
       String[] triple = getTriple(current); 

       // collect triples 
       triples.add(triple); 

       // group triples to avoid memory issues 
       if (triples.size() > 10000) { 
        System.out.println("adding up to 10k nodes"); 
        addNodes(triples); 
        triples.clear(); 
       } 
      } 
      // ad remaining triples 
      addNodes(triples); 
      triples.clear(); 

      // remove custom_key property from all nodes 
      // remove node with id 0 
      cleanNodes(); 

      this.graphDb.shutdown(); 

     } catch (Exception e) { 
      e.printStackTrace(System.out); 
     } 

    } 



    /** 
    * Removes custom_key property from all nodes 
    * and removes the node with 0 
    */ 
    public void cleanNodes() { 
     int c = 0; 

     System.out.println("cleaning nodes"); 
     // remove custom_key property from nodes 
     Transaction tx = graphDb.beginTx(); 
     Iterator<Node> nodes = GlobalGraphOperations.at(graphDb).getAllNodes().iterator(); 
     try { 
      while (nodes.hasNext()) { 
       Node n = nodes.next(); 
       if (n.getId() == 0) n.delete(); 
       else 
        if (n.hasProperty("custom_key")) { 
         // if property esists: remove it 
         if (n.removeProperty("custom_key") != null) 
          c++; 
        } 
      } 
      tx.success(); 
     } finally { 
      tx.finish(); 
     } 

     System.out.println("Cleaning is done, cleaned " + c + " nodes"); 

    } 


    /** 
    * Returns true if node with given custom_key exists in current graphDb 
    * @param custom_key 
    * @return Node, if exists. null otherwise 
    */ 
    public Node findNode(String custom_key) { 
     Iterator<Node> nodes = GlobalGraphOperations.at(graphDb).getAllNodes().iterator(); 
     while (nodes.hasNext()) { 
      Node n = nodes.next(); 
      if (n.hasProperty("custom_key")) 
       if (n.getProperty("custom_key").equals(custom_key)) 
        return n; 
     } 
     return null; 
    } 



    public void addNodes(ArrayList<String[]> triples) { 
     ArrayList<Relationship> MASTER_RELS = new ArrayList<Relationship>(); 

     Transaction transaction = graphDb.beginTx(); 

     try { 
      // Create nodes 
      // blueprints 
      if (init == false) { 
       Node bp_meta = graphDb.createNode(); 
       bp_meta.setProperty("value", "urn:com.tinkerpop.blueprints.pgm.oupls.sail:namespaces"); 
       init = true; 
       System.out.println("* Added meta node"); 
      } 
      // add actual content 
      for (int i = 0; i < triples.size(); i++) { 
       // subject 
       // get a list of properties. each array 
       // contains a (key,value) pair of all properties 
       // which should be created for this node 
       ArrayList<String[]> nprops = getPropertyList(triples.get(i)[0]); 
       // Search node by cursom_key property 
       Node s = findNode(nprops.get(nprops.size() - 1)[1]); 
       // Create node if it doesnt exist yet 
       if (s == null) { 
        s = graphDb.createNode(); 
        for (int j = 0; j < nprops.size(); j++) { 
         s.setProperty(nprops.get(j)[0], nprops.get(j)[1]); 
        } 
       } 

       // object (second node) 
       // Create property list 
       // contains a (key,value) pair of all properties 
       // which should be created for this node 
       nprops = getPropertyList(triples.get(i)[2]); 
       Node o = findNode(nprops.get(nprops.size() - 1)[1]); 
       if (o == null) { 
        o = graphDb.createNode(); 
        for (int j = 0; j < nprops.size(); j++) 
         o.setProperty(nprops.get(j)[0], nprops.get(j)[1]); 
       } 

       // predicate is the relationship name 
       //create relationship object and add properties 
       DynamicRelationshipType drt = DynamicRelationshipType.withName(triples.get(i)[1]); 

       Relationship p = s.createRelationshipTo(o, drt); 
       p.setProperty("cp", G_NAME + " U " + triples.get(i)[1]); 
       p.setProperty("c", G_NAME); 
       p.setProperty("p", "U " + triples.get(i)[1]); 
       if (MASTER_RELS.indexOf(p) >= 0) System.out.println("double relationship!"); 
       else MASTER_RELS.add(p); 

      } 
      // end transaction 
      transaction.success(); 
     } finally { 
      transaction.finish(); 
      System.out.println("done with adding nodes"); 
      System.out.println("processing next 10k nodes"); 
     } 
    } 



    /** 
    * Create property list for given triple element 
    * @param entity An element of a triple 
    * @return List of (key,value) pairs. Those are 
    * the properties which should be created for this node 
    */ 
    public ArrayList<String[]> getPropertyList(String entity) { 
     ArrayList<String[]> plist = new ArrayList<String[]>(); 

     String[] prop = new String[2]; 
     if (entity.contains("http://")) { 
      if (entity.contains("^^")) { 
       //literal type 
       prop[0] = "value"; 
       prop[1] = entity.split("\\^\\^")[0]; // x^^ 
       plist.add(prop); 
       prop = new String[2]; 
       prop[0] = "type"; 
       prop[1] = entity.split("\\^\\^")[1]; // ^^y 
       plist.add(prop); 
       prop = new String[2]; 
       prop[0] = "kind"; 
       prop[1] = "literal"; 
       plist.add(prop); 
       prop = new String[2]; 
       prop[0] = "custom_key"; 
       prop[1] = plist.get(0)[1] + plist.get(1)[1] + plist.get(2)[1]; 
       plist.add(prop); 
      } else { 
       //uri 
       prop[0] = "value"; 
       prop[1] = entity; 
       plist.add(prop); 
       prop = new String[2]; 
       prop[0] = "kind"; 
       prop[1] = "uri"; 
       plist.add(prop); 
       prop = new String[2]; 
       prop[0] = "custom_key"; 
       prop[1] = plist.get(0)[1] + plist.get(1)[1]; 
       plist.add(prop); 
      } 
     } else if (entity.contains("@")) { 
      // +lang 
      prop[0] = "value"; 
      prop[1] = entity.split("@")[0]; // [email protected] 
      plist.add(prop); 
      prop = new String[2]; 
      prop[0] = "lang"; 
      prop[1] = entity.split("@")[1]; // @y 
      plist.add(prop); 
      prop = new String[2]; 
      prop[0] = "kind"; 
      prop[1] = "literal"; 
      plist.add(prop); 
      prop = new String[2]; 
      prop[0] = "custom_key"; 
      prop[1] = plist.get(0)[1] + plist.get(1)[1] + plist.get(2)[1]; 
      plist.add(prop); 
     } else { 
      // simple literal like "xyz" 
      prop[0] = "value"; 
      prop[1] = entity; 
      plist.add(prop); 
      prop = new String[2]; 
      prop[0] = "kind"; 
      prop[1] = "literal"; 
      plist.add(prop); 
      prop = new String[2]; 
      prop[0] = "custom_key"; 
      prop[1] = plist.get(0)[1] + plist.get(1)[1]; 
      plist.add(prop); 
     } 
     return plist; 
    } 


} 

第二類應該作爲 一流確實創造完全相同的數據庫。我寫了一個測試類,比較所有節點和屬性;這個班告訴我兩個數據庫沒有任何區別。 這是(相當小)* .NT測試數據(IVE稱之爲q6_test.nt)

<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product1> <http://www.w3.org/2000/01/rdf-schema#label> "Car" . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product2> <http://www.w3.org/2000/01/rdf-schema#label> "Orange" . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product3> <http://www.w3.org/2000/01/rdf-schema#label> "Cherry" . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product3> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product4> <http://www.w3.org/2000/01/rdf-schema#label> "Cookie" . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product4> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product5> <http://www.w3.org/2000/01/rdf-schema#label> "Bike" . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product5> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product6> <http://www.w3.org/2000/01/rdf-schema#label> "Pen" . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product6> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product7> <http://www.w3.org/2000/01/rdf-schema#label> "Paper" . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product7> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product8> <http://www.w3.org/2000/01/rdf-schema#label> "Book" . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product8> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product9> <http://www.w3.org/2000/01/rdf-schema#label> "Shoe" . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product9> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product10> <http://www.w3.org/2000/01/rdf-schema#label> "Shirt" . 
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product10> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> . 

我正嘗試在其上運行的數據集以下查詢

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/> 
SELECT ?product ?label WHERE { 
    ?product rdfs:label ?label 
    ?product rdf:type bsbm:Product 
    FILTER regex(?label, "r")} 

我的問題:如果我在第一個類創建的數據庫上運行此查詢,此查詢會得到很多結果,但如果我嘗試在由第二個類創建的數據庫上運行此查詢,我只能得到第一個v的結果當我運行它時(特別是如果我在每次運行之間等待一兩分鐘)。此外,查詢始終工作在由第二類創建的,如果我改用以下行查詢數據庫文件夾 :

?product rdfs:label ?label 
?product rdf:type bsbm:Product 


(但我想如果可能的話,要解決這個不碰查詢) 這是我如何測試查詢我的數據集:
1)運行Java類
2)拆下的Neo4j /數據/文件夾graph.db
3)從Neo4j的刪除密鑰存儲和RRD文件的所有內容/ data /文件夾
4)運行的Neo4j(等待,直到它運行)
5)停止的Neo4j
6)拆下的Neo4j /數據/ graph.db夾
7的所有內容)複製這是數據庫文件夾的所有內容我的 的Java類創建到Neo4j的/數據/文件夾graph.db
8)開始的Neo4j
9)運行查詢
(我可能不必須做所有這些步驟,但我想額外確認到 工作在一個新的數據庫上。
我的系統:
Neo4j的版本:社區1.9.4(Windows中,從zip壓縮包安裝)
附加:香港專業教育學院更新了小鬼和SPARQL的插件的藍圖 - 庫 是最近的我可以找到(版本2.5.0)
操作系統:Windows 7(Service Pack 1)
Java:JDK 1。7

回答