1
我正在使用Spark Mllib在零售行業做鏈接分析項目。我的模式是:函數take()錯誤 - 鏈接分析使用Spark Mllib的研究
ID - 長 鏈 - 詮釋 部門 - 詮釋 類別 - 詮釋 公司 - 長 品牌 - 龍 日期 - 日期 ProductSize - 詮釋 ProductMeasure - Chararray PurchaseQuantity - 詮釋 PurchaseAmount - 雙
而且我正在使用的代碼是:
scala> import org.apache.spark._
scala> import org.apache.spark.rdd.RDD
scala> import org.apache.spark.util.IntParam
scala> import org.apache.spark.graphx._
scala> import org.apache.spark.graphx.util.GraphGenerators
scala> case class Transactions(ID:Long,Chain:Int,Dept:Int,Category:Int,Company:Long,Brand:Long,Date:String,ProductSize:Int,ProductMeasure:String,PurchaseQuantity:Int,PurchaseAmount:Double)
defined class Transactions
scala> def parseTransactions(str:String): Transactions = {
| val line = str.split(",")
| Transactions(line(0).toLong,line(1).toInt,line(2).toInt,line(3).toInt,line(4).toInt,line(5).toInt,line(6),line(7).toInt,line(8),line(9).toInt,line(10).toInt)
| }
scala> val textRDD = sc.textFile("/user/cloudera/transactions.csv")
scala> val transactionsRDD = textRDD.map(parseTransactions).cache()
scala> val products = transactionsRDD.map(Transactions => (Transactions.ID,Transactions.Chain,Transactions.Dept,Transactions.Category,Transactions.Company,Transactions.Brand,Transactions.Date)).distinct
scala> products.take(1)
但當我提交的最後一行,我發現了以下錯誤:
[Stage 0:> (0 + 1)/7]16/08/24 04:56:13 ERROR executor.Executor: Exception in task 0.0 in stage 0.0 (TID 0)
java.lang.NumberFormatException: For input string: "id"
at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65)
at java.lang.Long.parseLong(Long.java:441)
at java.lang.Long.parseLong(Long.java:483)
at scala.collection.immutable.StringLike$class.toLong(StringLike.scala:230)
\t at scala.collection.immutable.StringOps.toLong(StringOps.scala:31)
\t at $line65.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.parseTransactions(<console>:38)
\t at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42)
at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42)
\t at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
\t at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:285)
\t at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171)
\t at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78)
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:268)
\t at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
\t at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
\t at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
\t at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
\t at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
\t at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
\t at org.apache.spark.scheduler.Task.run(Task.scala:89)
\t at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
\t at java.lang.Thread.run(Thread.java:745)
16/08/24 04:56:13 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0, localhost): java.lang.NumberFormatException: For input string: "id"
\t at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65)
\t at java.lang.Long.parseLong(Long.java:441)
\t at java.lang.Long.parseLong(Long.java:483)
\t at scala.collection.immutable.StringLike$class.toLong(StringLike.scala:230)
at scala.collection.immutable.StringOps.toLong(StringOps.scala:31)
at $line65.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.parseTransactions(<console>:38)
at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42)
\t at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42)
\t at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:285)
at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171)
at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:268)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:89)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
\t at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
\t at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
16/08/24 04:56:13 ERROR scheduler.TaskSetManager: Task 0 in stage 0.0 failed 1 times; aborting job
16/08/24 04:56:13 ERROR executor.Executor: Exception in task 1.0 in stage 0.0 (TID 1)
java.lang.NumberFormatException: For input string: "6.67"
at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65)
at java.lang.Integer.parseInt(Integer.java:492)
at java.lang.Integer.parseInt(Integer.java:527)
at scala.collection.immutable.StringLike$class.toInt(StringLike.scala:229)
\t at scala.collection.immutable.StringOps.toInt(StringOps.scala:31)
\t at $line65.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.parseTransactions(<console>:38)
\t at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42)
at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42)
\t at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
\t at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:285)
\t at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171)
\t at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78)
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:268)
\t at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
\t at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
\t at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
\t at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
\t at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
\t at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
\t at org.apache.spark.scheduler.Task.run(Task.scala:89)
\t at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
\t at java.lang.Thread.run(Thread.java:745)
16/08/24 04:56:13 WARN scheduler.TaskSetManager: Lost task 1.0 in stage 0.0 (TID 1, localhost): java.lang.NumberFormatException: For input string: "6.67"
\t at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65)
\t at java.lang.Integer.parseInt(Integer.java:492)
\t at java.lang.Integer.parseInt(Integer.java:527)
\t at scala.collection.immutable.StringLike$class.toInt(StringLike.scala:229)
at scala.collection.immutable.StringOps.toInt(StringOps.scala:31)
at $line65.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.parseTransactions(<console>:38)
at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42)
\t at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42)
\t at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:285)
at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171)
at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:268)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:89)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
\t at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
\t at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost): java.lang.NumberFormatException: For input string: "id"
at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65)
at java.lang.Long.parseLong(Long.java:441)
at java.lang.Long.parseLong(Long.java:483)
at scala.collection.immutable.StringLike$class.toLong(StringLike.scala:230)
\t at scala.collection.immutable.StringOps.toLong(StringOps.scala:31)
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.parseTransactions(<console>:38)
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42)
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42)
\t at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
\t at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:285)
\t at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171)
\t at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78)
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:268)
\t at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
\t at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
\t at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
\t at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
\t at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
\t at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
\t at org.apache.spark.scheduler.Task.run(Task.scala:89)
\t at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431)
\t at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418)
\t at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
\t at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
\t at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418)
\t at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
\t at scala.Option.foreach(Option.scala:236)
\t at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799)
\t at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640)
\t at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
\t at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
\t at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
\t at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620)
\t at org.apache.spark.SparkContext.runJob(SparkContext.scala:1843)
\t at org.apache.spark.SparkContext.runJob(SparkContext.scala:1856)
\t at org.apache.spark.SparkContext.runJob(SparkContext.scala:1869)
\t at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1328)
\t at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
\t at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
\t at org.apache.spark.rdd.RDD.take(RDD.scala:1302)
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:47)
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:52)
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:54)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:56)
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:58)
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:60)
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:62)
at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:64)
\t at $iwC$$iwC$$iwC$$iwC.<init>(<console>:66)
\t at $iwC$$iwC$$iwC.<init>(<console>:68)
\t at $iwC$$iwC.<init>(<console>:70)
at $iwC.<init>(<console>:72)
\t at <init>(<console>:74)
\t at .<init>(<console>:78)
\t at .<clinit>(<console>)
\t at .<init>(<console>:7)
\t at .<clinit>(<console>)
\t at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1045)
\t at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1326)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:821)
\t at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:852)
\t at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:800)
\t at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
\t at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
\t at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
\t at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
\t at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
\t at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1064)
at org.apache.spark.repl.Main$.main(Main.scala:31)
\t at org.apache.spark.repl.Main.main(Main.scala)
\t at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
\t at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
\t at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
\t at java.lang.reflect.Method.invoke(Method.java:606)
\t at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
\t at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
\t at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
\t at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
\t at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.NumberFormatException: For input string: "id"
\t at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65)
\t at java.lang.Long.parseLong(Long.java:441)
\t at java.lang.Long.parseLong(Long.java:483)
\t at scala.collection.immutable.StringLike$class.toLong(StringLike.scala:230)
\t at scala.collection.immutable.StringOps.toLong(StringOps.scala:31)
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.parseTransactions(<console>:38)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:285)
at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171)
at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:268)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:89)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
任何人都知道爲什麼我收到此錯誤?我本來應該返回我創建的陣列...
非常感謝!
我在想,錯誤可能是int「line(6)」,在那裏我有字段日期,我作爲字符串傳遞 –
你永遠不會解析它到一個日期嗎?只是把它保持爲一個字符串。我不認識任何Scala,所以我可能會離開,但我知道Spark。這是一個NumberFormatException,所以它試圖將一個字符串解析爲數字。除了Scala中的構造之外,還有其他的嘗試嗎?在做出獨特之前,請嘗試記錄TransactionsRDD的幾行。刪除一些toInt,看看問題是否存在(可能全部刪除) –
Jan,當我刪除所有.toInt我得到這個錯誤: def parseTransactions(str:String):Transactions = { | val line = str.split(「,」) |交易(線(0).toLong,線(1),線(2),線(3),線(4),線(5),線(6),線(7),管線(8),線(9),行(10)) | }:47:error:type mismatch; (0).toLong,line(1),line(2),line(3),line(4),line(5),line(6),line(0) 7),行(8),行(9),行(10)) 你知道爲什麼嗎? –