2017-01-22 116 views
2

我想閱讀下面的代碼JSON文件,但它有多個錯誤返回:火花 - 問題閱讀JSON文件

val df = sqlcontext.read.json("E:/Dataset/Apps_for_Android_5.json") 

請由於錯誤的幫助提前

錯誤

scala> val df = sqlcontext.read.json("E:/Dataset/Apps_for_Android_5.json") 
[Stage 2:>               (0 + 4)/10] 
17/01/22 08:15:09 ERROR Executor: Exception in task 2.0 in stage 2.0 (TID 14) 
java.util.NoSuchElementException: None.get 
     at scala.None$.get(Option.scala:347) 
     at scala.None$.get(Option.scala:345) 
     at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(Bloc 
kInfoManager.scala:343) 
     at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockMan 
ager.scala:646) 
     at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281) 

     at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) 
     at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) 
     at java.lang.Thread.run(Unknown Source) 
17/01/22 08:15:09 WARN TaskSetManager: Lost task 2.0 in stage 2.0 (TID 14, local 
host): java.util.NoSuchElementException: None.get 
     at scala.None$.get(Option.scala:347) 
     at scala.None$.get(Option.scala:345) 
     at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(Bloc 
kInfoManager.scala:343) 
     at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockMan 
ager.scala:646) 
     at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281) 

     at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) 
     at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) 
     at java.lang.Thread.run(Unknown Source) 

17/01/22 08:15:09 ERROR TaskSetManager: Task 2 in stage 2.0 failed 1 times; abor 
ting job 
17/01/22 08:15:09 ERROR Executor: Exception in task 1.0 in stage 2.0 (TID 13) 
java.util.NoSuchElementException: None.get 
     at scala.None$.get(Option.scala:347) 
     at scala.None$.get(Option.scala:345) 
     at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(Bloc 
kInfoManager.scala:343) 
     at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockMan 
ager.scala:646) 
     at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281) 

     at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) 
     at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) 
     at java.lang.Thread.run(Unknown Source) 
17/01/22 08:15:09 ERROR Executor: Exception in task 4.0 in stage 2.0 (TID 16) 
org.apache.spark.TaskKilledException 
     at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:264) 

     at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) 
     at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) 
     at java.lang.Thread.run(Unknown Source) 
17/01/22 08:15:09 ERROR Executor: Exception in task 0.0 in stage 2.0 (TID 12) 
java.util.NoSuchElementException: None.get 
     at scala.None$.get(Option.scala:347) 
     at scala.None$.get(Option.scala:345) 
     at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(Bloc 
kInfoManager.scala:343) 
     at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockMan 
ager.scala:646) 
     at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281) 

     at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) 
     at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) 
     at java.lang.Thread.run(Unknown Source) 
17/01/22 08:15:09 ERROR Executor: Exception in task 3.0 in stage 2.0 (TID 15) 
java.util.NoSuchElementException: None.get 
     at scala.None$.get(Option.scala:347) 
     at scala.None$.get(Option.scala:345) 
     at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(Bloc 
kInfoManager.scala:343) 
     at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockMan 
ager.scala:646) 
     at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281) 

     at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) 
     at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) 
     at java.lang.Thread.run(Unknown Source) 
17/01/22 08:15:09 WARN TaskSetManager: Lost task 4.0 in stage 2.0 (TID 16, local 
host): org.apache.spark.TaskKilledException 
     at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:264) 

     at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) 
     at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) 
     at java.lang.Thread.run(Unknown Source) 

org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in sta 
ge 2.0 failed 1 times, most recent failure: Lost task 2.0 in stage 2.0 (TID 14, 
localhost): java.util.NoSuchElementException: None.get 
     at scala.None$.get(Option.scala:347) 
     at scala.None$.get(Option.scala:345) 
     at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(Bloc 
kInfoManager.scala:343) 
     at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockMan 
ager.scala:646) 
     at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281) 

     at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) 
     at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) 
     at java.lang.Thread.run(Unknown Source) 

Driver stacktrace: 
    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGSched 
uler$$failJobAndIndependentStages(DAGScheduler.scala:1454) 
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGSche 
duler.scala:1442) 
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGSche 
duler.scala:1441) 
    at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala: 
59) 
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) 
    at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1441) 

    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.appl 
y(DAGScheduler.scala:811) 
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.appl 
y(DAGScheduler.scala:811) 
    at scala.Option.foreach(Option.scala:257) 
    at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.sc 
ala:811) 
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGSche 
duler.scala:1667) 
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGSchedu 
ler.scala:1622) 
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGSchedu 
ler.scala:1611) 
    at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) 
    at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632) 
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1873) 
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1936) 
    at org.apache.spark.rdd.RDD$$anonfun$fold$1.apply(RDD.scala:1065) 
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:1 
51) 
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:1 
12) 
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:358) 
    at org.apache.spark.rdd.RDD.fold(RDD.scala:1059) 
    at org.apache.spark.sql.execution.datasources.json.InferSchema$.infer(InferSch 
ema.scala:68) 
    at org.apache.spark.sql.execution.datasources.json.JsonFileFormat.inferSchema(
JsonFileFormat.scala:62) 
    at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$15.apply(Dat 
aSource.scala:421) 
    at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$15.apply(Dat 
aSource.scala:421) 
    at scala.Option.orElse(Option.scala:289) 
    at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataS 
ource.scala:420) 
    at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:149) 
    at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:294) 
    at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:249) 
    ... 52 elided 
Caused by: java.util.NoSuchElementException: None.get 
    at scala.None$.get(Option.scala:347) 
    at scala.None$.get(Option.scala:345) 
    at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(BlockInfoM 
anager.scala:343) 
    at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockManager.s 
cala:646) 
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281) 
    at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) 
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) 
    at java.lang.Thread.run(Unknown Source) 
+0

每個JSON文件?或者特別是一個? – Aaron

+0

每個文件導致此錯誤 –

回答

2

看起來像這是一個已報告的Spark問題 - 目前還沒有明確的隔離或分辨率: https://issues.apache.org/jira/browse/SPARK-16599

唯一建議的解決方法是降級到Spark 1.6.2。

+0

非常感謝您的迴應先生..!將現在嘗試 –

+0

這工作再次感謝先生..! –

+0

您可以接受/提出答案,將其標記爲對未來的讀者有用 –