我寫了兩個java類,應該將* .nt文件轉換爲neo4j數據庫。第一個使用的loadTriples()的SPARQL-插件
https://github.com/neo4j-contrib/sparqlplugin/blob/master/src/test/java/org/neo4j/server/plugin/sparql/BerlinDatasetTest.java 該函數是第一類的源代碼:Neo4j SPARQL查詢停止工作
package src;
import com.tinkerpop.blueprints.TransactionalGraph;
import com.tinkerpop.blueprints.impls.neo4j.Neo4jGraph;
import com.tinkerpop.blueprints.oupls.sail.GraphSail;
import com.tinkerpop.blueprints.util.wrappers.batch.BatchGraph;
import com.tinkerpop.blueprints.util.wrappers.batch.VertexIDType;
import java.io.File;
import java.net.URI;
import java.net.URL;
import org.openrdf.repository.sail.SailRepository;
import org.openrdf.repository.sail.SailRepositoryConnection;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.helpers.BasicParserSettings;
import org.openrdf.sail.Sail;
public class QUADParser2 {
private File f;
private String DB_PATH = "db/graphdb_qp2";
public QUADParser2(File input_file) {
this.f = input_file;
}
public void parseFile() throws Exception {
Neo4jGraph neo4jGraph = new Neo4jGraph(DB_PATH);
int FST_INDEX = 1; // buffer size must be positive
BatchGraph<TransactionalGraph> neo = new BatchGraph<TransactionalGraph>(neo4jGraph, VertexIDType.NUMBER, FST_INDEX);
Sail sail = new GraphSail(neo4jGraph);
sail.initialize();
SailRepositoryConnection connection;
try
{
connection = new SailRepository(sail).getConnection();
URI uri = f.toURI();
URL url = uri.toURL();
//URL url = getClass().getResource(f.getPath());
System.out.println("Loading " + url + ": ");
connection.getParserConfig().addNonFatalError(BasicParserSettings.VERIFY_DATATYPE_VALUES);
connection.add(url, null, RDFFormat.NTRIPLES);
connection.commit();
connection.close();
}
catch (Exception e1)
{
e1.printStackTrace(System.out);
}
System.out.print("Done.");
sail.shutDown();
neo.shutdown();
}
}
的第二類使用只是Neo4j的庫來轉換將* .nt文件轉換爲neo4j數據庫。這是it's源代碼:
package src;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Iterator;
import org.neo4j.graphdb.DynamicRelationshipType;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.Transaction;
import org.neo4j.graphdb.factory.GraphDatabaseFactory;
import org.neo4j.tooling.GlobalGraphOperations;
public class QUADParser41 {
GraphDatabaseService graphDb;
private File f;
private boolean init = false;
private String G_NAME = "N";//"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances";
private String DB_PATH = "db/graphdb_qp41";
public QUADParser41(String input_db_path, File input_file) {
this.DB_PATH = input_db_path;
this.f = input_file;
}
public String[] getTriple(String line) {
//parse a triple
String[] output;
// remove _._
line = line.substring(0, line.length() - 1);
// remove _<_
line = line.replace("<", "");
// remove _"_
line = line.replace("\"", "");
// use _>_ to split the String into an array
output = line.split("> ");
output[2] = output[2].trim();
return output;
}
public void parseFile() {
this.init = false;
graphDb = new GraphDatabaseFactory().newEmbeddedDatabase(this.DB_PATH);
ArrayList<String[]> triples = new ArrayList<String[]>();
try {
BufferedReader bfr = new BufferedReader(new FileReader(f));
// Create nodes
String current = "";
while ((current = bfr.readLine()) != null) {
String[] triple = getTriple(current);
// collect triples
triples.add(triple);
// group triples to avoid memory issues
if (triples.size() > 10000) {
System.out.println("adding up to 10k nodes");
addNodes(triples);
triples.clear();
}
}
// ad remaining triples
addNodes(triples);
triples.clear();
// remove custom_key property from all nodes
// remove node with id 0
cleanNodes();
this.graphDb.shutdown();
} catch (Exception e) {
e.printStackTrace(System.out);
}
}
/**
* Removes custom_key property from all nodes
* and removes the node with 0
*/
public void cleanNodes() {
int c = 0;
System.out.println("cleaning nodes");
// remove custom_key property from nodes
Transaction tx = graphDb.beginTx();
Iterator<Node> nodes = GlobalGraphOperations.at(graphDb).getAllNodes().iterator();
try {
while (nodes.hasNext()) {
Node n = nodes.next();
if (n.getId() == 0) n.delete();
else
if (n.hasProperty("custom_key")) {
// if property esists: remove it
if (n.removeProperty("custom_key") != null)
c++;
}
}
tx.success();
} finally {
tx.finish();
}
System.out.println("Cleaning is done, cleaned " + c + " nodes");
}
/**
* Returns true if node with given custom_key exists in current graphDb
* @param custom_key
* @return Node, if exists. null otherwise
*/
public Node findNode(String custom_key) {
Iterator<Node> nodes = GlobalGraphOperations.at(graphDb).getAllNodes().iterator();
while (nodes.hasNext()) {
Node n = nodes.next();
if (n.hasProperty("custom_key"))
if (n.getProperty("custom_key").equals(custom_key))
return n;
}
return null;
}
public void addNodes(ArrayList<String[]> triples) {
ArrayList<Relationship> MASTER_RELS = new ArrayList<Relationship>();
Transaction transaction = graphDb.beginTx();
try {
// Create nodes
// blueprints
if (init == false) {
Node bp_meta = graphDb.createNode();
bp_meta.setProperty("value", "urn:com.tinkerpop.blueprints.pgm.oupls.sail:namespaces");
init = true;
System.out.println("* Added meta node");
}
// add actual content
for (int i = 0; i < triples.size(); i++) {
// subject
// get a list of properties. each array
// contains a (key,value) pair of all properties
// which should be created for this node
ArrayList<String[]> nprops = getPropertyList(triples.get(i)[0]);
// Search node by cursom_key property
Node s = findNode(nprops.get(nprops.size() - 1)[1]);
// Create node if it doesnt exist yet
if (s == null) {
s = graphDb.createNode();
for (int j = 0; j < nprops.size(); j++) {
s.setProperty(nprops.get(j)[0], nprops.get(j)[1]);
}
}
// object (second node)
// Create property list
// contains a (key,value) pair of all properties
// which should be created for this node
nprops = getPropertyList(triples.get(i)[2]);
Node o = findNode(nprops.get(nprops.size() - 1)[1]);
if (o == null) {
o = graphDb.createNode();
for (int j = 0; j < nprops.size(); j++)
o.setProperty(nprops.get(j)[0], nprops.get(j)[1]);
}
// predicate is the relationship name
//create relationship object and add properties
DynamicRelationshipType drt = DynamicRelationshipType.withName(triples.get(i)[1]);
Relationship p = s.createRelationshipTo(o, drt);
p.setProperty("cp", G_NAME + " U " + triples.get(i)[1]);
p.setProperty("c", G_NAME);
p.setProperty("p", "U " + triples.get(i)[1]);
if (MASTER_RELS.indexOf(p) >= 0) System.out.println("double relationship!");
else MASTER_RELS.add(p);
}
// end transaction
transaction.success();
} finally {
transaction.finish();
System.out.println("done with adding nodes");
System.out.println("processing next 10k nodes");
}
}
/**
* Create property list for given triple element
* @param entity An element of a triple
* @return List of (key,value) pairs. Those are
* the properties which should be created for this node
*/
public ArrayList<String[]> getPropertyList(String entity) {
ArrayList<String[]> plist = new ArrayList<String[]>();
String[] prop = new String[2];
if (entity.contains("http://")) {
if (entity.contains("^^")) {
//literal type
prop[0] = "value";
prop[1] = entity.split("\\^\\^")[0]; // x^^
plist.add(prop);
prop = new String[2];
prop[0] = "type";
prop[1] = entity.split("\\^\\^")[1]; // ^^y
plist.add(prop);
prop = new String[2];
prop[0] = "kind";
prop[1] = "literal";
plist.add(prop);
prop = new String[2];
prop[0] = "custom_key";
prop[1] = plist.get(0)[1] + plist.get(1)[1] + plist.get(2)[1];
plist.add(prop);
} else {
//uri
prop[0] = "value";
prop[1] = entity;
plist.add(prop);
prop = new String[2];
prop[0] = "kind";
prop[1] = "uri";
plist.add(prop);
prop = new String[2];
prop[0] = "custom_key";
prop[1] = plist.get(0)[1] + plist.get(1)[1];
plist.add(prop);
}
} else if (entity.contains("@")) {
// +lang
prop[0] = "value";
prop[1] = entity.split("@")[0]; // [email protected]
plist.add(prop);
prop = new String[2];
prop[0] = "lang";
prop[1] = entity.split("@")[1]; // @y
plist.add(prop);
prop = new String[2];
prop[0] = "kind";
prop[1] = "literal";
plist.add(prop);
prop = new String[2];
prop[0] = "custom_key";
prop[1] = plist.get(0)[1] + plist.get(1)[1] + plist.get(2)[1];
plist.add(prop);
} else {
// simple literal like "xyz"
prop[0] = "value";
prop[1] = entity;
plist.add(prop);
prop = new String[2];
prop[0] = "kind";
prop[1] = "literal";
plist.add(prop);
prop = new String[2];
prop[0] = "custom_key";
prop[1] = plist.get(0)[1] + plist.get(1)[1];
plist.add(prop);
}
return plist;
}
}
第二類應該作爲 一流確實創造完全相同的數據庫。我寫了一個測試類,比較所有節點和屬性;這個班告訴我兩個數據庫沒有任何區別。 這是(相當小)* .NT測試數據(IVE稱之爲q6_test.nt)
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product1> <http://www.w3.org/2000/01/rdf-schema#label> "Car" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product2> <http://www.w3.org/2000/01/rdf-schema#label> "Orange" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product3> <http://www.w3.org/2000/01/rdf-schema#label> "Cherry" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product3> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product4> <http://www.w3.org/2000/01/rdf-schema#label> "Cookie" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product4> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product5> <http://www.w3.org/2000/01/rdf-schema#label> "Bike" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product5> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product6> <http://www.w3.org/2000/01/rdf-schema#label> "Pen" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product6> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product7> <http://www.w3.org/2000/01/rdf-schema#label> "Paper" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product7> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product8> <http://www.w3.org/2000/01/rdf-schema#label> "Book" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product8> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product9> <http://www.w3.org/2000/01/rdf-schema#label> "Shoe" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product9> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product10> <http://www.w3.org/2000/01/rdf-schema#label> "Shirt" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product10> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
我正嘗試在其上運行的數據集以下查詢:
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>
SELECT ?product ?label WHERE {
?product rdfs:label ?label
?product rdf:type bsbm:Product
FILTER regex(?label, "r")}
我的問題:如果我在第一個類創建的數據庫上運行此查詢,此查詢會得到很多結果,但如果我嘗試在由第二個類創建的數據庫上運行此查詢,我只能得到第一個v的結果當我運行它時(特別是如果我在每次運行之間等待一兩分鐘)。此外,查詢始終工作在由第二類創建的,如果我改用以下行查詢數據庫文件夾 :
?product rdfs:label ?label
?product rdf:type bsbm:Product
(但我想如果可能的話,要解決這個不碰查詢) 這是我如何測試查詢我的數據集:
1)運行Java類
2)拆下的Neo4j /數據/文件夾graph.db
3)從Neo4j的刪除密鑰存儲和RRD文件的所有內容/ data /文件夾
4)運行的Neo4j(等待,直到它運行)
5)停止的Neo4j
6)拆下的Neo4j /數據/ graph.db夾
7的所有內容)複製這是數據庫文件夾的所有內容我的 的Java類創建到Neo4j的/數據/文件夾graph.db
8)開始的Neo4j
9)運行查詢
(我可能不必須做所有這些步驟,但我想額外確認到 工作在一個新的數據庫上。
我的系統:
Neo4j的版本:社區1.9.4(Windows中,從zip壓縮包安裝)
附加:香港專業教育學院更新了小鬼和SPARQL的插件的藍圖 - 庫 是最近的我可以找到(版本2.5.0)
操作系統:Windows 7(Service Pack 1)
Java:JDK 1。7