1
object test {
case class Caserne(x: String, y: String, Name: String, Description: String)
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("BankDataAnalysis").setMaster("local[1]")
val sc = new SparkContext(conf)
val sqlContext= new SQLContext(sc)
import sqlContext.implicits._
// load caserne data
val caserneTxt = sc.parallelize(
IOUtils.toString(
new URL("http://donnees.ville.montreal.qc.ca/dataset/c69e78c6-e454-4bd9-9778-e4b0eaf8105b/resource/f6542ad1-31f5-458e-b33d-1a028fab3e98/download/casernessim.csv"),
Charset.forName("utf8")).split("\n"))
val header = caserneTxt.first()
val caserne = caserneTxt.map(s => s.split(",")).filter(s => s != header).map(
s => Caserne(s(0),
s(1),
s(2).replaceAll("[^\\d]", "").trim(),
s(3).replaceAll("""<(?!\/?a(?=>|\s.*>))\/?.*?>""", " ").trim()
)).toDF()
caserne.registerTempTable("caserne")
sqlContext.sql("Select * from caserne").show()
}
}
我不得不刪除csv文件頭。我使用過濾器(s => s!= header),但它沒有奏效。感謝您的幫助
謝謝你的幫助,我找到了簡單的方法從參考:) –