2011-04-09 66 views
1

我想使用SPARQL查詢鏈接電影數據庫的本地版本。該文件採用N-Triples格式,其大小約爲450mb。我正在使用servlet來實現。現在,當我通過查詢,大約需要超過五分鐘的servlet來處理它,並在年底,我得到以下異常:無法查詢鏈接電影數據庫的本地版本

type Exception report 

message 

description The server encountered an internal error() that prevented it from fulfilling this request. 

exception 

javax.servlet.ServletException: Servlet execution threw an exception 


root cause 

java.lang.OutOfMemoryError: Java heap space 
    java.util.Arrays.copyOfRange(Arrays.java:3209) 
    java.lang.String.<init>(String.java:215) 
    java.lang.StringBuilder.toString(StringBuilder.java:430) 
    org.openjena.riot.tokens.TokenizerText.allBetween(TokenizerText.java:732) 
    org.openjena.riot.tokens.TokenizerText.parseToken(TokenizerText.java:152) 
    org.openjena.riot.tokens.TokenizerText.hasNext(TokenizerText.java:69) 
    org.openjena.atlas.iterator.PeekIterator.fill(PeekIterator.java:37) 
    org.openjena.atlas.iterator.PeekIterator.next(PeekIterator.java:77) 
    org.openjena.riot.lang.LangBase.nextToken(LangBase.java:145) 
    org.openjena.riot.lang.LangNTriples.parseOne(LangNTriples.java:59) 
    org.openjena.riot.lang.LangNTriples.parseOne(LangNTriples.java:21) 
    org.openjena.riot.lang.LangNTuple.runParser(LangNTuple.java:58) 
    org.openjena.riot.lang.LangBase.parse(LangBase.java:75) 
    org.openjena.riot.system.JenaReaderNTriples2.readWorker(JenaReaderNTriples2.java:28) 
    org.openjena.riot.system.JenaReaderRIOT.readImpl(JenaReaderRIOT.java:124) 
    org.openjena.riot.system.JenaReaderRIOT.read(JenaReaderRIOT.java:79) 
    com.hp.hpl.jena.rdf.model.impl.ModelCom.read(ModelCom.java:226) 
    com.hp.hpl.jena.util.FileManager.readModelWorker(FileManager.java:395) 
    com.hp.hpl.jena.util.FileManager.loadModelWorker(FileManager.java:299) 
    com.hp.hpl.jena.util.FileManager.loadModel(FileManager.java:250) 
    ServletExample.runQuery(ServletExample.java:92) 
    ServletExample.doGet(ServletExample.java:62) 
    javax.servlet.http.HttpServlet.service(HttpServlet.java:627) 
    javax.servlet.http.HttpServlet.service(HttpServlet.java:729) 


note The full stack trace of the root cause is available in the Apache Tomcat/5.5.31 logs. 

我的代碼是:

import java.io.IOException; 
import java.io.PrintWriter; 

import javax.servlet.ServletException; 
import javax.servlet.http.*; 

import com.hp.hpl.jena.query.*; 
import com.hp.hpl.jena.rdf.model.*; 
import com.hp.hpl.jena.util.FileManager; 

public class ServletExample 
    extends HttpServlet 
{ 
    /***********************************/ 
    /* Constants      */ 
    /***********************************/ 

    private static final long serialVersionUID = 1L; 

    public static final String SPARQL_ENDPOINT = "http://data.linkedmdb.org/sparql"; 

    public static final String QUERY ="PREFIX m: <http://data.linkedmdb.org/resource/movie/>" 
+"SELECT DISTINCT ?actorName WHERE {"+ 
"?dir1 m:director_name \"Sofia Coppola\"."+ 
"?dir2 m:director_name \"Francis Ford Coppola\"."+ 
"?dir1film m:director ?dir1;"+ 
"m:actor ?actor."+ 
"?dir2film m:director ?dir2;"+ 
"m:actor ?actor."+ 
"?actor m:actor_name ?actorName."+ 
"}"; 
     /*"PREFIX m: <http://data.linkedmdb.org/resource/movie/>\n" + 
      "SELECT DISTINCT ?actorName WHERE {\n" + 
      " ?dir1  m:director_name %dir_name_1%.\n" + 
      " ?dir2  m:director_name %dir_name_2%.\n" + 
      " ?dir1film m:director ?dir1;\n" + 
      "   m:actor ?actor.\n" + 
      " ?dir2film m:director ?dir2;\n" + 
      "   m:actor ?actor.\n" + 
      " ?actor m:actor_name ?actorName.\n" + 
      "}\n" + 
      "";*/ 

    private static final String HEADER = "<html>\n" + 
      "  <head>\n" + 
      "  <title>results</title>\n" + 
      "   <link href=\"simple.css\" type=\"text/css\" rel=\"stylesheet\" />\n" + 
      "  </head>\n" + 
      "  <body>\n" + 
      ""; 

    private static final String FOOTER = "</body></html>"; 

    /** 
    * Respond to HTTP GET request. Will need to be mounted against some URL 
    * pattern in web.xml 
    */ 
    @Override 
    protected void doGet(HttpServletRequest req, HttpServletResponse resp) 
     throws ServletException, IOException 
    { 
     String dir1 = req.getParameter("dir1");//"Sofia"; 
     String dir2 = req.getParameter("dir2");//"Francis Ford Coppola"; 
     //String dir1 = "Sofia"; 
     //String dir2 = "Francis Ford Coppola"; 
     if (dir1 == null || dir2 == null || dir1.isEmpty() || dir2.isEmpty()) { 
      noInput(resp); 
     } 
     else { 
      runQuery(resp, dir1, dir2); 
     } 
    } 

    protected void noInput(HttpServletResponse resp) 
     throws IOException 
    { 
     header(resp); 
     resp.getWriter().println("<p>Please select director names as query params <code>dir1</code> and <code>dir2</code></p>"); 
     footer(resp); 
    } 

    protected void footer(HttpServletResponse resp) throws IOException { 
     resp.getWriter().println(FOOTER); 
    } 

    protected void header(HttpServletResponse resp) throws IOException { 
     resp.getWriter().println(HEADER); 
    } 

    protected void runQuery(HttpServletResponse resp, String dir1, String dir2) 
     throws IOException 
    { 
     PrintWriter out = resp.getWriter(); 

     // Set up the query 
    //  String q = QUERY.replace("%dir_name_1%", "\"" + dir1 + "\"") 
    //     .replace("%dir_name_2%", "\"" + dir2 + "\""); 
     String q=QUERY; 
     Query query = QueryFactory.create(q) ; 
     Model model = FileManager.get().loadModel("e:\\applications\\linkedmdb-18-05-2009-dump\\dump.nt"); 
     // QueryExecution qexec = QueryExecutionFactory.sparqlService(SPARQL_ENDPOINT, query); 

     //com.hp.hpl.jena.query.Query query = QueryFactory.create(QUERY); 
     QueryExecution qexec = QueryExecutionFactory.create(query, model); 
     // perform the query 
     ResultSet results = qexec.execSelect(); 

     // generate the output 
     header(resp); 
     if (!results.hasNext()) { 
      out.println("<p>No results, sorry.</p>"); 
     } 
     else { 
      out.println("<h1>Results</h1>"); 
      while (results.hasNext()) { 
       QuerySolution qs = results.next(); 
       String actorName = qs.getLiteral("actorName").getLexicalForm(); 
       out.println(String.format("<div>Actor named: %s</div>", actorName)); 
      } 
     } 
     footer(resp); 
    } 
} 

有有什麼辦法來解決這個異常?

回答

1

看來你使用耶拿/ RIOT加載在內存中的所有數據。據我所知,LinkedIMDB足夠大,可以爲您提供這種方法的問題。你正在做的是把所有的數據庫帶到內存中。

增加您的JVM堆可能是一個可行的解決方案,但如果你的數據還在不斷增加也不會擴大。

正確的解決辦法是去而設計的這種規模的數據集的耶拿的其他配置。它們是:

  1. Jena SDB,它使用關係數據庫作爲後端。
  2. Jena TDB,它採用了基於B樹索引,以加快查詢本地Java存儲。它比(1)更好地縮放。

(可選)您可以選擇可擴展的RDF數據庫,例如4store並通過Jena ARQ查詢您的數據。這個解決方案是迄今爲止可以擴展並且性能更好的解決方案。

0

Java虛擬機(JVM)中的堆內存不足。要麼增加JVM可用的堆內存量,要麼設計軟件以使用較少的內存,例如以較小的塊處理這些內容。

要增加堆內存,這些參數添加到您的servlet容器的服務器或應用服務器的啓動腳本,地方執行的java二進制文件。這告訴JVM,它最多可以使用512兆內存,如果這是不夠的,嘗試用較大的值:

-Xmx512m 

這是很難說如何提高軟件使用更少的內存,而不會看到實際碼。

+0

你能準確地知道它在日食helios中的位置嗎?我無法找到它 – ProgramME 2011-04-09 14:02:45

+0

如何啓動應用程序服務器?一個地方可能是運行 - >運行配置 - > VM參數。 – 2011-04-09 14:04:29

+0

的是,我得到相同exception.i加入你右擊input.html說(文件開始用的applcation,然後運行配置和VM arguments.i我張貼的代碼 – ProgramME 2011-04-09 14:19:01