0
我想添加一列以激發數據幀,其值是現有數據幀行的hashMod。在下面的例子中,我可以實現1個特定列「數據」的哈希值,對於整個數據幀行(allcolumns),我該如何實現相同?添加一列激發數據幀,其值爲現有數據幀行的hashMod
object Container {
case class intContainer(data: Int)
}
val sqlContext = new SQLContext(sc)
import sqlContext.implicits._
val getBucket = udf((data: Object) => data.hashCode() %10)
val schema = StructType(List(StructField("age", IntegerType)))
val userList = List((23),(24), (25), (57))
val df1:RDD[Container.intContainer] = sc.parallelize(userList).map(x=> Container.intContainer(x))
val df = df1.toDF()
df.registerTempTable("dfcount")
val countdf = sqlContext.sql("select data,data+1 as count, current_timestamp() as time from dfcount")
val xx = countdf.withColumn("bucket_id", getBucket(col("data")))
df中可能有多少列可以存在? – mrsrinivas