2017-07-10 54 views
0

。我有以下行提交:儘管7Zip指示包含該類的jar文件存在於運行該程序的uberjar中,但我仍然收到<code>NoClassDefFoundError</code>,該文件引發了一個「NoClassDefFoundError」,儘管該jar文件指示類存在

spark-submit --class org.dia.red.ctakes.spark.CtakesSparkMain target/spark-ctakes-0.1-job.jar 

被拋出的錯誤是:

Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/uima/cas/FSIndex 
     at org.dia.red.ctakes.spark.CtakesSparkMain.main(CtakesSparkMain.java:50) 
     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) 
     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 
     at java.lang.reflect.Method.invoke(Method.java:498) 
     at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:743) 
     at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:187) 
     at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:212) 
     at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:126) 
     at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) 
Caused by: java.lang.ClassNotFoundException: org.apache.uima.cas.FSIndex 
     at java.net.URLClassLoader.findClass(URLClassLoader.java:381) 
     at java.lang.ClassLoader.loadClass(ClassLoader.java:424) 
     at java.lang.ClassLoader.loadClass(ClassLoader.java:357) 
     ... 10 more 

CtakesSparkMain類下面調用CtakesFunction類:

package org.dia.red.ctakes.spark; 

import java.util.List; 
import java.io.PrintWriter; 

import org.apache.spark.SparkConf; 
import org.apache.spark.api.java.JavaRDD; 
import org.apache.spark.api.java.JavaSparkContext; 

import org.apache.uima.jcas.cas.FSArray; 
import org.apache.spark.api.java.function.Function; 
import org.apache.spark.storage.StorageLevel; 
import org.json.JSONObject; 


public class CtakesSparkMain { 

    /** 
    * @param args 
    */ 
    public static void main(String[] args) throws Exception { 


     SparkConf conf = new SparkConf().setAppName("ctakes"); 
     JavaSparkContext sc = new JavaSparkContext(conf); 

     JavaRDD<String> lines = sc.textFile("/mnt/d/metistream/ctakes-streaming/SparkStreamingCTK/testdata100.txt").map(new CtakesFunction()); 

     String first = lines.take(2).get(0); 
     PrintWriter out = new PrintWriter("/mnt/d/metistream/ctakes-streaming/SparkStreamingCTK/test_outputs/output.txt"); 
     out.println(first); 
     out.close(); 
     sc.close(); 

    } 
} 

CtakesFunction:

package org.dia.red.ctakes.spark; 

import java.io.ByteArrayOutputStream; 
import java.io.IOException; 
import java.io.ObjectInputStream; 
import java.util.ArrayList; 
import java.util.Arrays; 
import java.util.Iterator; 

import org.apache.ctakes.typesystem.type.refsem.OntologyConcept; 
import org.apache.ctakes.typesystem.type.textsem.*; 
import org.apache.uima.UIMAException; 
import org.apache.uima.cas.FSIndex; 
import org.apache.uima.cas.Type; 

import org.apache.uima.UIMAException; 

import org.apache.uima.jcas.JCas; 
import org.apache.uima.analysis_engine.AnalysisEngineDescription; 
import org.apache.uima.cas.impl.XmiCasSerializer; 
import org.apache.uima.fit.factory.JCasFactory; 
import org.apache.uima.fit.pipeline.SimplePipeline; 

import org.apache.uima.jcas.cas.FSArray; 
import org.apache.uima.util.XMLSerializer; 
import org.apache.spark.api.java.function.Function; 

import it.cnr.iac.CTAKESClinicalPipelineFactory; 
import org.json.*; 

/** 
* @author Selina Chu, Michael Starch, and Giuseppe Totaro 
* 
*/ 

public class CtakesFunction implements Function<String, String> { 

    transient JCas jcas = null; 
    transient AnalysisEngineDescription aed = null; 

    private void setup() throws UIMAException { 

     System.setProperty("ctakes.umlsuser", ""); 
     System.setProperty("ctakes.umlspw", ""); 
     this.jcas = JCasFactory.createJCas(); 
     this.aed = CTAKESClinicalPipelineFactory.getDefaultPipeline(); 

    } 

    private void readObject(ObjectInputStream in) { 
     try { 
      in.defaultReadObject(); 
      this.setup(); 
     } catch (ClassNotFoundException e) { 
      e.printStackTrace(); 
     } catch (IOException e) { 
      e.printStackTrace(); 
     } catch (UIMAException e) { 
      e.printStackTrace(); 
     } 
    } 

    @Override 
    public String call(String paragraph) throws Exception { 

     this.jcas.setDocumentText(paragraph); 

     ByteArrayOutputStream baos = new ByteArrayOutputStream(); 
     SimplePipeline.runPipeline(this.jcas, this.aed); 
     FSIndex index = this.jcas.getAnnotationIndex(IdentifiedAnnotation.type); 
     Iterator iter = index.iterator(); 


     JSONArray annotationsArray = new JSONArray(); 
     JSONObject allAnnotations = new JSONObject(); 

     ArrayList<String> types = new ArrayList<String>(); 

     types.add("org.apache.ctakes.typesystem.type.textsem.SignSymptomMention"); 
     types.add("org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention"); 
     types.add("org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention"); 
     types.add("org.apache.ctakes.typesystem.type.textsem.ProcedureMention"); 
     types.add("import org.apache.ctakes.typesystem.type.textsem.MedicationMention"); 

     String type; 
     String[] splitType; 
     FSArray snomedArray; 
     ArrayList<String> snomedStringArray = new ArrayList<String>(); 

     while (iter.hasNext()){ 
      IdentifiedAnnotation annotation = (IdentifiedAnnotation)iter.next(); 
      type = annotation.getType().toString(); 
      if (types.contains(type)){ 
       JSONObject annotations = new JSONObject(); 

       splitType = type.split("[.]"); 
       annotations.put("id", annotation.getId()); 
       annotations.put("subject", annotation.getSubject()); 
       annotations.put("type", splitType[splitType.length - 1]); 
       annotations.put("text", annotation.getCoveredText()); 
       annotations.put("polarity", annotation.getPolarity()); 
       annotations.put("confidence", annotation.getConfidence()); 

       snomedArray = annotation.getOntologyConceptArr(); 
       for (int i = 0; i < snomedArray.size(); i++){ 
        snomedStringArray.add(((OntologyConcept)snomedArray.get(i)).getCode()); 
       } 
       annotations.put("snomed_codes", snomedStringArray); 
       snomedStringArray.clear(); 
       annotationsArray.put(annotations); 
      } 

     } 

     allAnnotations.put("Annotations", annotationsArray); 
     this.jcas.reset(); 
     return allAnnotations.toString(); 
    } 
} 

我試圖修改存儲庫@https://github.com/selinachu/SparkStreamingCTK以利用正常的Spark而不是SparkStreaming(和Spark 2.0),但尚未能解決此問題。

回答

0

受YuGagarin反饋的啓發,我使用SBT組裝來組裝UberJarCTAKES本身。將所有東西編譯成一個「真正的」胖罐解決了上述問題。

但是,我應該指出,我目前正在處理的cTAKES和Spark仍然存在一些殘留問題。

+0

你正在使用什麼版本的cTakes和Spark?你遇到了什麼其他問題? – YuGagarin

+0

我使用的是補丁版本的cTAKES 4.00(補丁概述在這裏:https://issues.apache.org/jira/browse/CTAKES-445)。對於Spark,目前利用'2.1.0'。目前正在對我的項目進行全面重建,所以我可以在一個小時內重現我遇到的cTAKES快速管道的錯誤。 (奇怪的是,cTAKES默認管道工作正常。) – mongolol

+0

很高興聽到。那麼你是否讓cTakes 4.0與Spark一起工作?我正在研究類似的項目btw(ICD9/ICD10代碼),並自己處理類似的問題...... – YuGagarin

1

這是因爲這不完全是maven爲這個項目生成的超級jar。 Spark-submit無法從jar中的jar中加載類。這需要一個特殊的類加載器。正確的做法是爆炸所有的瓶子,把所有包含的類放在超級罐子裏,這與maven shade plugin相似https://maven.apache.org/plugins/maven-shade-plugin/

所以你必須改變pom.xml文件來爲這個項目生成正確的超級罐子。

+1

這不提供問題的答案。一旦你有足夠的[聲譽](https://stackoverflow.com/help/whats-reputation),你將可以[對任何帖子發表評論](https://stackoverflow.com/help/privileges/comment);相反,[提供不需要提問者澄清的答案](https://meta.stackexchange.com/questions/214173/why-do-i-need-50-reputation-to-comment-what-c​​an- I-DO-代替)。 - [來自評論](/ review/low-quality-posts/17169637) – Beloo

+0

這並沒有真正回答這個問題。如果您有不同的問題,可以通過單擊[提問](https://stackoverflow.com/questions/ask)來提問。您可以[添加賞金](https://stackoverflow.com/help/privileges/set-bounties)在您擁有足夠的[聲譽](https://stackoverflow.com/help/)後吸引更多關注此問題什麼聲譽)。 - [來自評論](/ review/low-quality-posts/17169637) – mkl

相關問題