1
使用斯卡拉和IntelliJ,斯卡拉:轉換XML數據幀以csv文件
我有一個XML文件,我把它寫入到數據幀,如下圖所示:
var dftest = spark.read.format("com.databricks.spark.xml").option("rowTag","transferBatch").load(file)
架構是漫長的,有許多序列元素節點。某些列也有不同的數據類型。
root
|-- accountingInfo: struct (nullable = true)
| |-- currencyConversion: struct (nullable = true)
| | |-- ExchangeRateDefinition: struct (nullable = true)
| | | |-- exchangeRate: long (nullable = true)
| | | |-- exchangeRateCode: long (nullable = true)
| | | |-- numberOfDecimalPlaces: long (nullable = true)
| |-- localCurrency: string (nullable = true)
| |-- tapDecimalPlaces: long (nullable = true)
|-- auditControlInfo: struct (nullable = true)
| |-- callEventDetailsCount: long (nullable = true)
| |-- earliestCallTimeStamp: struct (nullable = true)
| | |-- localTimeStamp: string (nullable = true)
| | |-- utcTimeOffset: string (nullable = true)
| |-- latestCallTimeStamp: struct (nullable = true)
| | |-- localTimeStamp: string (nullable = true)
| | |-- utcTimeOffset: string (nullable = true)
| |-- operatorSpecInformation: struct (nullable = true)
| | |-- OperatorSpecInformation: array (nullable = true)
| | | |-- element: string (containsNull = true)
| |-- totalChargeValueList: struct (nullable = true)
| | |-- TotalChargeValue: struct (nullable = true)
| | | |-- chargeType: string (nullable = true)
| | | |-- totalCharge: long (nullable = true)
| |-- totalDiscountValue: long (nullable = true)
| |-- totalTaxValue: long (nullable = true)
|-- batchControlInfo: struct (nullable = true)
| |-- fileAvailableTimeStamp: struct (nullable = true)
| | |-- localTimeStamp: string (nullable = true)
| | |-- utcTimeOffset: string (nullable = true)
| |-- fileCreationTimeStamp: struct (nullable = true)
| | |-- localTimeStamp: string (nullable = true)
| | |-- utcTimeOffset: string (nullable = true)
| |-- fileSequenceNumber: string (nullable = true)
| |-- recipient: string (nullable = true)
| |-- releaseVersionNumber: long (nullable = true)
| |-- sender: string (nullable = true)
| |-- specificationVersionNumber: long (nullable = true)
| |-- transferCutOffTimeStamp: struct (nullable = true)
| | |-- localTimeStamp: string (nullable = true)
| | |-- utcTimeOffset: string (nullable = true)
|-- callEventDetails: struct (nullable = true)
| |-- gprsCall: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- equipmentInformation: struct (nullable = true)
| | | | |-- imeiOrEsn: struct (nullable = true)
| | | | | |-- imei: string (nullable = true)
| | | |-- gprsBasicCallInformation: struct (nullable = true)
| | | | |-- callEventStartTimeStamp: struct (nullable = true)
| | | | | |-- localTimeStamp: string (nullable = true)
| | | | | |-- utcTimeOffsetCode: long (nullable = true)
| | | | |-- chargeableSubscriber: struct (nullable = true)
| | | | | |-- chargeableSubscriber: struct (nullable = true)
| | | | | | |-- simChargeableSubscriber: struct (nullable = true)
| | | | | | | |-- imsi: string (nullable = true)
| | | | | | | |-- msisdn: string (nullable = true)
| | | | | |-- pdpAddress: string (nullable = true)
| | | | | |-- pdpType: long (nullable = true)
| | | | |-- chargingId: string (nullable = true)
| | | | |-- gprsDestination: struct (nullable = true)
| | | | | |-- accessPointNameNI: string (nullable = true)
| | | | | |-- accessPointNameOI: string (nullable = true)
| | | | |-- totalCallEventDuration: long (nullable = true)
| | | |-- gprsLocationInformation: struct (nullable = true)
| | | | |-- gprsNetworkLocation: struct (nullable = true)
| | | | | |-- cellId: long (nullable = true)
| | | | | |-- locationArea: long (nullable = true)
| | | | | |-- recEntity: struct (nullable = true)
| | | | | | |-- RecEntityCode: array (nullable = true)
| | | | | | | |-- element: long (containsNull = true)
| | | |-- gprsServiceUsed: struct (nullable = true)
| | | | |-- chargeInformationList: struct (nullable = true)
| | | | | |-- ChargeInformation: struct (nullable = true)
| | | | | | |-- chargeDetailList: struct (nullable = true)
| | | | | | | |-- ChargeDetail: struct (nullable = true)
| | | | | | | | |-- charge: long (nullable = true)
| | | | | | | | |-- chargeType: string (nullable = true)
| | | | | | | | |-- chargeableUnits: long (nullable = true)
| | | | | | | | |-- chargedUnits: long (nullable = true)
| | | | | | | | |-- dayCategory: long (nullable = true)
| | | | | | | | |-- timeBand: long (nullable = true)
| | | | | | |-- chargedItem: long (nullable = true)
| | | | | | |-- exchangeRateCode: long (nullable = true)
| | | | |-- gprsServiceUsageList: struct (nullable = true)
| | | | | |-- GprsServiceUsage: struct (nullable = true)
| | | | | | |-- dataVolumeIncoming: long (nullable = true)
| | | | | | |-- dataVolumeOutgoing: long (nullable = true)
| | | |-- operatorSpecInformation: struct (nullable = true)
| | | | |-- OperatorSpecInformation: array (nullable = true)
| | | | | |-- element: string (containsNull = true)
| | | |-- typeOfControllingNode: long (nullable = true)
| |-- mobileOriginatedCall: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- basicCallInformation: struct (nullable = true)
| | | | |-- callEventStartTimeStamp: struct (nullable = true)
| | | | | |-- localTimeStamp: string (nullable = true)
| | | | | |-- utcTimeOffsetCode: long (nullable = true)
| | | | |-- chargeableSubscriber: struct (nullable = true)
| | | | | |-- simChargeableSubscriber: struct (nullable = true)
| | | | | | |-- imsi: string (nullable = true)
| | | | | | |-- msisdn: string (nullable = true)
| | | | |-- destination: struct (nullable = true)
| | | | | |-- calledNumber: string (nullable = true)
| | | | |-- totalCallEventDuration: long (nullable = true)
| | | |-- basicServiceUsedList: struct (nullable = true)
| | | | |-- BasicServiceUsed: struct (nullable = true)
| | | | | |-- basicService: struct (nullable = true)
| | | | | | |-- serviceCode: struct (nullable = true)
| | | | | | | |-- teleServiceCode: string (nullable = true)
| | | | | |-- chargeInformationList: struct (nullable = true)
| | | | | | |-- ChargeInformation: struct (nullable = true)
| | | | | | | |-- callTypeGroup: struct (nullable = true)
| | | | | | | | |-- callTypeLevel1: long (nullable = true)
| | | | | | | | |-- callTypeLevel2: long (nullable = true)
| | | | | | | | |-- callTypeLevel3: long (nullable = true)
| | | | | | | | |-- calledCountryCode: string (nullable = true)
| | | | | | | |-- chargeDetailList: struct (nullable = true)
| | | | | | | | |-- ChargeDetail: struct (nullable = true)
| | | | | | | | | |-- charge: long (nullable = true)
| | | | | | | | | |-- chargeType: string (nullable = true)
| | | | | | | | | |-- chargeableUnits: long (nullable = true)
| | | | | | | | | |-- chargedUnits: long (nullable = true)
| | | | | | | | | |-- dayCategory: long (nullable = true)
| | | | | | | | | |-- timeBand: long (nullable = true)
| | | | | | | |-- chargedItem: long (nullable = true)
| | | | | | | |-- exchangeRateCode: long (nullable = true)
| | | |-- equipmentInformation: struct (nullable = true)
| | | | |-- imeiOrEsn: struct (nullable = true)
| | | | | |-- imei: string (nullable = true)
| | | |-- locationInformation: struct (nullable = true)
| | | | |-- networkLocation: struct (nullable = true)
| | | | | |-- callReference: string (nullable = true)
| | | | | |-- cellId: long (nullable = true)
| | | | | |-- locationArea: long (nullable = true)
| | | | | |-- recEntityCode: long (nullable = true)
| | | |-- operatorSpecInformation: struct (nullable = true)
| | | | |-- OperatorSpecInformation: array (nullable = true)
| | | | | |-- element: string (containsNull = true)
| |-- mobileTerminatedCall: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- basicCallInformation: struct (nullable = true)
| | | | |-- callEventStartTimeStamp: struct (nullable = true)
| | | | | |-- localTimeStamp: string (nullable = true)
| | | | | |-- utcTimeOffsetCode: long (nullable = true)
| | | | |-- callOriginator: struct (nullable = true)
| | | | | |-- callingNumber: string (nullable = true)
| | | | |-- chargeableSubscriber: struct (nullable = true)
| | | | | |-- simChargeableSubscriber: struct (nullable = true)
| | | | | | |-- imsi: string (nullable = true)
| | | | | | |-- msisdn: string (nullable = true)
| | | | |-- totalCallEventDuration: long (nullable = true)
| | | |-- basicServiceUsedList: struct (nullable = true)
| | | | |-- BasicServiceUsed: struct (nullable = true)
| | | | | |-- basicService: struct (nullable = true)
| | | | | | |-- serviceCode: struct (nullable = true)
| | | | | | | |-- teleServiceCode: string (nullable = true)
| | | | | |-- chargeInformationList: struct (nullable = true)
| | | | | | |-- ChargeInformation: struct (nullable = true)
| | | | | | | |-- chargeDetailList: struct (nullable = true)
| | | | | | | | |-- ChargeDetail: struct (nullable = true)
| | | | | | | | | |-- charge: long (nullable = true)
| | | | | | | | | |-- chargeType: string (nullable = true)
| | | | | | | | | |-- chargeableUnits: long (nullable = true)
| | | | | | | | | |-- chargedUnits: long (nullable = true)
| | | | | | | | | |-- dayCategory: long (nullable = true)
| | | | | | | | | |-- timeBand: long (nullable = true)
| | | | | | | |-- chargedItem: long (nullable = true)
| | | | | | | |-- exchangeRateCode: long (nullable = true)
| | | |-- equipmentInformation: struct (nullable = true)
| | | | |-- imeiOrEsn: struct (nullable = true)
| | | | | |-- imei: string (nullable = true)
| | | |-- locationInformation: struct (nullable = true)
| | | | |-- networkLocation: struct (nullable = true)
| | | | | |-- callReference: string (nullable = true)
| | | | | |-- cellId: long (nullable = true)
| | | | | |-- locationArea: long (nullable = true)
| | | | | |-- recEntityCode: long (nullable = true)
| | | |-- operatorSpecInformation: struct (nullable = true)
| | | | |-- OperatorSpecInformation: array (nullable = true)
| | | | | |-- element: string (containsNull = true)
|-- networkInfo: struct (nullable = true)
| |-- calledNumAnalysis: struct (nullable = true)
| | |-- CalledNumAnalysis: struct (nullable = true)
| | | |-- calledNumAnalysisCode: long (nullable = true)
| | | |-- countryCodeTable: struct (nullable = true)
| | | | |-- CountryCode: string (nullable = true)
| | | |-- iacTable: struct (nullable = true)
| | | | |-- Iac: string (nullable = true)
| |-- networkType: long (nullable = true)
| |-- recEntityInfo: struct (nullable = true)
| | |-- RecEntityDefinition: array (nullable = true)
| | | |-- element: struct (containsNull = true)
| | | | |-- recEntityCode: long (nullable = true)
| | | | |-- recEntityId: struct (nullable = true)
| | | | | |-- gsnaddress: struct (nullable = true)
| | | | | | |-- iPTextV4Address: string (nullable = true)
| | | | | |-- mscId: string (nullable = true)
| | | | | |-- msisdn: string (nullable = true)
| | | | |-- recEntityType: long (nullable = true)
| |-- utcTimeOffsetInfo: struct (nullable = true)
| | |-- UtcTimeOffsetDefinition: struct (nullable = true)
| | | |-- utcTimeOffset: string (nullable = true)
| | | |-- utcTimeOffsetCode: long (nullable = true)
當我想看看在數據幀中的元素,它顯示在一個表是這樣的: table
我不確定我該怎麼寫這個數據幀到CSV文件。
有什麼建議嗎?由於
的火花CSV模塊現在與Spark 2.0一起作爲一項新功能進行內聯。 https://spark.apache.org/releases/spark-release-2-0-0.html – RudyVerboven