我希望能夠像在普通SQL中一樣過濾日期。那可能嗎?我遇到了如何將字符串從文本文件轉換爲日期的問題。如何在Scala中將字符串轉換爲日期,以便在SparkSQL中進行篩選?
import org.apache.spark._
import org.apache.spark.SparkContext._
import org.apache.spark.sql._
import org.apache.log4j._
import java.text._
//import java.util.Date
import java.sql.Date
object BayAreaBikeAnalysis {
case class Station(ID:Int, name:String, lat:Double, longitude:Double, dockCount:Int, city:String, installationDate:Date)
case class Status(station_id:Int, bikesAvailable:Int, docksAvailable:Int, time:String)
val dateFormat = new SimpleDateFormat("yyyy-MM-dd")
def extractStations(line: String): Station = {
val fields = line.split(",",-1)
val station:Station = Station(fields(0).toInt, fields(1), fields(2).toDouble, fields(3).toDouble, fields(4).toInt, fields(5), dateFormat.parse(fields(6)))
return station
}
def extractStatus(line: String): Status = {
val fields = line.split(",",-1)
val status:Status = Status(fields(0).toInt, fields(1).toInt, fields(2).toInt, fields(3))
return status
}
def main(args: Array[String]) {
// Set the log level to only print errors
//Logger.getLogger("org").setLevel(Level.ERROR)
// Use new SparkSession interface in Spark 2.0
val spark = SparkSession
.builder
.appName("BayAreaBikeAnalysis")
.master("local[*]")
.config("spark.sql.warehouse.dir", "file:///C:/temp")
.getOrCreate()
//Load files into data sets
import spark.implicits._
val stationLines = spark.sparkContext.textFile("Data/station.csv")
val stations = stationLines.map(extractStations).toDS().cache()
val statusLines = spark.sparkContext.textFile("Data/status.csv")
val statuses = statusLines.map(extractStatus).toDS().cache()
//people.select("name").show()
stations.select("installationDate").show()
spark.stop()
}
}
很明顯,字段(6).toDate()不能編譯,但我不確定要使用什麼。
使用日期解析器將字符串解析爲日期。看看約達時間 – dumitru