2016-11-16 37 views
0

我想使用spark和scala來獲取HBase數據。但是,我收到了一個我無法想到的錯誤。HBase TimeoutIOException

代碼

import org.apache.spark.{SparkConf, SparkContext} 
import org.apache.hadoop.hbase.HBaseConfiguration 
import org.apache.hadoop.hbase.HConstants 
import org.apache.hadoop.hbase.client.{HBaseAdmin, Result} 
import org.apache.hadoop.hbase.io.ImmutableBytesWritable 
import org.apache.hadoop.hbase.mapreduce.TableInputFormat 
import org.apache.hadoop.hbase.util.Bytes 
import org.apache.spark.SparkConf 
object HBase { 
def main(args: Array[String]): Unit = { 
val tableName = "posts" 
val sc = new SparkContext(new SparkConf().setAppName("HBaseReadWrite").setMaster("local[4]")) 
val conf = HBaseConfiguration.create() 
conf.set(HConstants.ZOOKEEPER_QUORUM, "localhost") 
conf.set(TableInputFormat.INPUT_TABLE, tableName) 

val admin = new HBaseAdmin(conf) 
if(!admin.isTableAvailable(conf.get(tableName))) { 
    println("Table doesn't exist") 
    return 
} 
val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], 
    classOf[ImmutableBytesWritable], classOf[Result]) 
println(hBaseRDD.map(x => x._2).map(result => Bytes.toString(result.getRow)).collect().take(5).mkString("\n")) 

} 
} 

build.sbt

name := "NLPAnnotationController" 

version := "1.0" 

scalaVersion := "2.10.5" 

resolvers += "Cloudera Repository" at "https://repository.cloudera.com/artifactory/cloudera-repos/" 
resolvers += "sonatype snapshots" at "https://oss.sonatype.org/content/repositories/snapshots/" 

organization := "com.scryAnalytics" 

val hadoop_version = "0.98.19-hadoop2" 

libraryDependencies ++= Seq(
    "org.apache.spark" % "spark-core_2.10" % "1.2.0", 
    "org.apache.hbase" % "hbase-spark" % "1.2.0-cdh5.7.2", 
    "org.apache.hbase" % "hbase-client" % hadoop_version excludeAll(ExclusionRule(organization = "javax.servlet", name="javax.servlet-api"), ExclusionRule(organization = "org.mortbay.jetty", name="jetty"), ExclusionRule(organization = "org.mortbay.jetty", name="servlet-api-2.5")), 
    "org.apache.hbase" % "hbase-common" % hadoop_version excludeAll(ExclusionRule(organization = "javax.servlet", name="javax.servlet-api"), ExclusionRule(organization = "org.mortbay.jetty", name="jetty"), ExclusionRule(organization = "org.mortbay.jetty", name="servlet-api-2.5")), 
    "org.apache.hbase" % "hbase-server" % hadoop_version excludeAll(ExclusionRule(organization = "javax.servlet", name="javax.servlet-api"), ExclusionRule(organization = "org.mortbay.jetty", name="jetty"), ExclusionRule(organization = "org.mortbay.jetty", name="servlet-api-2.5")), 
    "org.scala-lang" % "scala-library" % "2.10.5", 
    "it.nerdammer.bigdata" % "spark-hbase-connector_2.10" % "1.0.3" 
) 

錯誤

Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/hadoop/hbase/exceptions/TimeoutIOException 
at HBase$.main(HBase.scala:20) 
at HBase.main(HBase.scala) 
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) 
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 
at java.lang.reflect.Method.invoke(Method.java:606) 
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:147) 
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hbase.exceptions.TimeoutIOException 
at java.net.URLClassLoader$1.run(URLClassLoader.java:366) 
at java.net.URLClassLoader$1.run(URLClassLoader.java:355) 
at java.security.AccessController.doPrivileged(Native Method) 
at java.net.URLClassLoader.findClass(URLClassLoader.java:354) 
at java.lang.ClassLoader.loadClass(ClassLoader.java:425) 
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) 
at java.lang.ClassLoader.loadClass(ClassLoader.java:358) 

我試圖改變依賴,仍然沒有任何進展的版本。 任何幫助將是偉大的。提前致謝。

+0

我猜你正在使用分佈式系統,並且你的本地程序正在與遠程系統(數據庫或Hadoop)進行通信。 Hadoop通過網絡協議拋出一個異常,並將其返回給您序列化。在你的結尾,應用程序想要將一個錯誤消息反序列化成一個'org/apache/hadoop/hbase/exceptions/TimeoutIOException'類型的異常對象。爲此,它需要在類路徑中找到該類,但是失敗。你有沒有在你的本地類路徑中的圖書館?如果沒有,請添加它 – radumanolescu

+0

你在談論哪些庫?我在構建文件中有hbase依賴關係。 – wadhwasahil

+0

@wadhwasahil:你可以使用'conf.setInt(「timeout」,120000)來設置超時時間''你也需要檢查你是否擁有所有的依賴關係,當你運行你的作業時,你可以使用--jars傳遞Hbase依賴關係使用spark-submit命令。 – Shankar

回答

0

很可能,您向工作人員提供的jar文件不包含來自您的依賴項的類。使用sbt-assembly構建「胖」罐並將其上傳至火花。