2016-12-02 363 views
0

我試圖用Java代碼,並得到錯誤從Cloudant獲取數據,無法連接火花Cloudant

我試着用下面Spark和cloudant火花版本,

星火2.0.0 ,

火花2.0.1,

火花2.0.2

爲所有版本獲取相同的錯誤,因爲下面發佈的錯誤。

如果我添加scala依賴來解決錯誤,這個錯誤比它與Spark庫衝突。

下面是我的Java代碼,

package spark.cloudant.connecter; 

import org.apache.spark.SparkConf; 
import org.apache.spark.api.java.JavaSparkContext; 
import org.apache.spark.sql.Dataset; 
import org.apache.spark.sql.SQLContext; 
import com.cloudant.spark.*; 

public class cloudantconnecter { 
    public static void main(String[] args) throws Exception { 

     try { 
      SparkConf sparkConf = new SparkConf().setAppName("spark cloudant connecter").setMaster("local[*]"); 
      sparkConf.set("spark.streaming.concurrentJobs", "30"); 

      JavaSparkContext sc = new JavaSparkContext(sparkConf); 

      SQLContext sqlContext = new SQLContext(sc); 
      System.out.print("initialization successfully"); 


      Dataset<org.apache.spark.sql.Row> st = sqlContext.read().format("com.cloudant.spark") 
        .option("cloudant.host", "HOSTNAME").option("cloudant.username", "USERNAME") 
        .option("cloudant.password", "PASSWORD").load("DATABASENAME"); 

      st.printSchema(); 


     } catch (

     Exception e) { 
      e.printStackTrace(); 
     } 
    } 
} 

Maven依賴

<dependencies> 
     <dependency> 
      <groupId>org.apache.spark</groupId> 
      <artifactId>spark-core_2.10</artifactId> 
      <version>2.0.0</version> 
     </dependency> 
     <dependency> 
      <groupId>org.apache.spark</groupId> 
      <artifactId>spark-mllib_2.10</artifactId> 
      <version>2.0.0</version> 
     </dependency> 
     <dependency> 
      <groupId>cloudant-labs</groupId> 
      <artifactId>spark-cloudant</artifactId> 
      <version>2.0.0-s_2.11</version> 
     </dependency> 
    </dependencies> 

獲取錯誤的詳細信息,

Exception in thread "main" java.lang.NoSuchMethodError: scala/Predef$.ArrowAssoc(Ljava/lang/Object;)Ljava/lang/Object; (loaded from file:/C:/Users/Administrator/.m2/repository/org/scala-lang/scala-library/2.10.6/scala-library-2.10.6.jar by [email protected]) called from class scalaj.http.HttpConstants$ (loaded from file:/C:/Users/Administrator/.m2/repository/org/scalaj/scalaj-http_2.11/2.3.0/scalaj-http_2.11-2.3.0.jar by [email protected]). 
    at scalaj.http.HttpConstants$.liftedTree1$1(Http.scala:637) 
    at scalaj.http.HttpConstants$.<init>(Http.scala:636) 
    at scalaj.http.HttpConstants$.<clinit>(Http.scala) 
    at scalaj.http.BaseHttp$.$lessinit$greater$default$2(Http.scala:754) 
    at scalaj.http.Http$.<init>(Http.scala:738) 
    at scalaj.http.Http$.<clinit>(Http.scala) 
    at com.cloudant.spark.common.JsonStoreDataAccess.getQueryResult(JsonStoreDataAccess.scala:152) 
    at com.cloudant.spark.common.JsonStoreDataAccess.getTotalRows(JsonStoreDataAccess.scala:99) 
    at com.cloudant.spark.common.JsonStoreRDD.totalRows$lzycompute(JsonStoreRDD.scala:56) 
    at com.cloudant.spark.common.JsonStoreRDD.totalRows(JsonStoreRDD.scala:55) 
    at com.cloudant.spark.common.JsonStoreRDD.totalPartition$lzycompute(JsonStoreRDD.scala:59) 
    at com.cloudant.spark.common.JsonStoreRDD.totalPartition(JsonStoreRDD.scala:58) 
    at com.cloudant.spark.common.JsonStoreRDD.getPartitions(JsonStoreRDD.scala:81) 
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:248) 
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:246) 
    at scala.Option.getOrElse(Option.scala:120) 
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:246) 
    at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) 
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:248) 
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:246) 
    at scala.Option.getOrElse(Option.scala:120) 
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:246) 
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1934) 
    at org.apache.spark.rdd.RDD$$anonfun$fold$1.apply(RDD.scala:1046) 
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) 
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) 
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:358) 
    at org.apache.spark.rdd.RDD.fold(RDD.scala:1040) 
    at org.apache.spark.sql.execution.datasources.json.InferSchema$.infer(InferSchema.scala:68) 
    at org.apache.spark.sql.DataFrameReader$$anonfun$3.apply(DataFrameReader.scala:317) 
    at org.apache.spark.sql.DataFrameReader$$anonfun$3.apply(DataFrameReader.scala:317) 
    at scala.Option.getOrElse(Option.scala:120) 
    at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:316) 
    at com.cloudant.spark.DefaultSource.create(DefaultSource.scala:127) 
    at com.cloudant.spark.DefaultSource.createRelation(DefaultSource.scala:105) 
    at com.cloudant.spark.DefaultSource.createRelation(DefaultSource.scala:100) 
    at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:315) 
    at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:149) 
    at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:132) 
    at spark.cloudant.connecter.cloudantconnecter.main(cloudantconnecter.java:24) 
+0

看來你沒有正確的斯卡拉版本。如果您使用連接器版本2.0.0,請確保您有Scala 2.11。從stackoverflow這個問題可能會幫助你:http://stackoverflow.com/questions/25089852/what-is-the-reason-for-java-lang-nosuchmethoderror-scala-predef-arrowassoc-upo –

+0

如果我添加scala版本然後這種依賴與Spark核心庫衝突 –

回答

1

誤差顯示,因爲提到使用Scala的2.10問題庫,並提到包火花雲圖庫使用2.11

所以,請更改庫火花core_2.10到火花core_2.11

所以,現在依賴關係,

<dependency> 
      <groupId>org.apache.spark</groupId> 
      <artifactId>spark-core_2.11</artifactId> 
      <version>2.0.1</version> 
     </dependency> 
     <dependency> 
      <groupId>org.apache.spark</groupId> 
      <artifactId>spark-mllib_2.11</artifactId> 
      <version>2.0.1</version> 
     </dependency> 
     <dependency> 
      <groupId>cloudant-labs</groupId> 
      <artifactId>spark-cloudant</artifactId> 
      <version>2.0.0-s_2.11</version> 
     </dependency>