2016-10-05 56 views
1

使用斯卡拉和IntelliJ,斯卡拉:轉換XML數據幀以csv文件

我有一個XML文件,我把它寫入到數據幀,如下圖所示:

var dftest = spark.read.format("com.databricks.spark.xml").option("rowTag","transferBatch").load(file) 

架構是漫長的,有許多序列元素節點。某些列也有不同的數據類型。

root 
|-- accountingInfo: struct (nullable = true) 
| |-- currencyConversion: struct (nullable = true) 
| | |-- ExchangeRateDefinition: struct (nullable = true) 
| | | |-- exchangeRate: long (nullable = true) 
| | | |-- exchangeRateCode: long (nullable = true) 
| | | |-- numberOfDecimalPlaces: long (nullable = true) 
| |-- localCurrency: string (nullable = true) 
| |-- tapDecimalPlaces: long (nullable = true) 
|-- auditControlInfo: struct (nullable = true) 
| |-- callEventDetailsCount: long (nullable = true) 
| |-- earliestCallTimeStamp: struct (nullable = true) 
| | |-- localTimeStamp: string (nullable = true) 
| | |-- utcTimeOffset: string (nullable = true) 
| |-- latestCallTimeStamp: struct (nullable = true) 
| | |-- localTimeStamp: string (nullable = true) 
| | |-- utcTimeOffset: string (nullable = true) 
| |-- operatorSpecInformation: struct (nullable = true) 
| | |-- OperatorSpecInformation: array (nullable = true) 
| | | |-- element: string (containsNull = true) 
| |-- totalChargeValueList: struct (nullable = true) 
| | |-- TotalChargeValue: struct (nullable = true) 
| | | |-- chargeType: string (nullable = true) 
| | | |-- totalCharge: long (nullable = true) 
| |-- totalDiscountValue: long (nullable = true) 
| |-- totalTaxValue: long (nullable = true) 
|-- batchControlInfo: struct (nullable = true) 
| |-- fileAvailableTimeStamp: struct (nullable = true) 
| | |-- localTimeStamp: string (nullable = true) 
| | |-- utcTimeOffset: string (nullable = true) 
| |-- fileCreationTimeStamp: struct (nullable = true) 
| | |-- localTimeStamp: string (nullable = true) 
| | |-- utcTimeOffset: string (nullable = true) 
| |-- fileSequenceNumber: string (nullable = true) 
| |-- recipient: string (nullable = true) 
| |-- releaseVersionNumber: long (nullable = true) 
| |-- sender: string (nullable = true) 
| |-- specificationVersionNumber: long (nullable = true) 
| |-- transferCutOffTimeStamp: struct (nullable = true) 
| | |-- localTimeStamp: string (nullable = true) 
| | |-- utcTimeOffset: string (nullable = true) 
|-- callEventDetails: struct (nullable = true) 
| |-- gprsCall: array (nullable = true) 
| | |-- element: struct (containsNull = true) 
| | | |-- equipmentInformation: struct (nullable = true) 
| | | | |-- imeiOrEsn: struct (nullable = true) 
| | | | | |-- imei: string (nullable = true) 
| | | |-- gprsBasicCallInformation: struct (nullable = true) 
| | | | |-- callEventStartTimeStamp: struct (nullable = true) 
| | | | | |-- localTimeStamp: string (nullable = true) 
| | | | | |-- utcTimeOffsetCode: long (nullable = true) 
| | | | |-- chargeableSubscriber: struct (nullable = true) 
| | | | | |-- chargeableSubscriber: struct (nullable = true) 
| | | | | | |-- simChargeableSubscriber: struct (nullable = true) 
| | | | | | | |-- imsi: string (nullable = true) 
| | | | | | | |-- msisdn: string (nullable = true) 
| | | | | |-- pdpAddress: string (nullable = true) 
| | | | | |-- pdpType: long (nullable = true) 
| | | | |-- chargingId: string (nullable = true) 
| | | | |-- gprsDestination: struct (nullable = true) 
| | | | | |-- accessPointNameNI: string (nullable = true) 
| | | | | |-- accessPointNameOI: string (nullable = true) 
| | | | |-- totalCallEventDuration: long (nullable = true) 
| | | |-- gprsLocationInformation: struct (nullable = true) 
| | | | |-- gprsNetworkLocation: struct (nullable = true) 
| | | | | |-- cellId: long (nullable = true) 
| | | | | |-- locationArea: long (nullable = true) 
| | | | | |-- recEntity: struct (nullable = true) 
| | | | | | |-- RecEntityCode: array (nullable = true) 
| | | | | | | |-- element: long (containsNull = true) 
| | | |-- gprsServiceUsed: struct (nullable = true) 
| | | | |-- chargeInformationList: struct (nullable = true) 
| | | | | |-- ChargeInformation: struct (nullable = true) 
| | | | | | |-- chargeDetailList: struct (nullable = true) 
| | | | | | | |-- ChargeDetail: struct (nullable = true) 
| | | | | | | | |-- charge: long (nullable = true) 
| | | | | | | | |-- chargeType: string (nullable = true) 
| | | | | | | | |-- chargeableUnits: long (nullable = true) 
| | | | | | | | |-- chargedUnits: long (nullable = true) 
| | | | | | | | |-- dayCategory: long (nullable = true) 
| | | | | | | | |-- timeBand: long (nullable = true) 
| | | | | | |-- chargedItem: long (nullable = true) 
| | | | | | |-- exchangeRateCode: long (nullable = true) 
| | | | |-- gprsServiceUsageList: struct (nullable = true) 
| | | | | |-- GprsServiceUsage: struct (nullable = true) 
| | | | | | |-- dataVolumeIncoming: long (nullable = true) 
| | | | | | |-- dataVolumeOutgoing: long (nullable = true) 
| | | |-- operatorSpecInformation: struct (nullable = true) 
| | | | |-- OperatorSpecInformation: array (nullable = true) 
| | | | | |-- element: string (containsNull = true) 
| | | |-- typeOfControllingNode: long (nullable = true) 
| |-- mobileOriginatedCall: array (nullable = true) 
| | |-- element: struct (containsNull = true) 
| | | |-- basicCallInformation: struct (nullable = true) 
| | | | |-- callEventStartTimeStamp: struct (nullable = true) 
| | | | | |-- localTimeStamp: string (nullable = true) 
| | | | | |-- utcTimeOffsetCode: long (nullable = true) 
| | | | |-- chargeableSubscriber: struct (nullable = true) 
| | | | | |-- simChargeableSubscriber: struct (nullable = true) 
| | | | | | |-- imsi: string (nullable = true) 
| | | | | | |-- msisdn: string (nullable = true) 
| | | | |-- destination: struct (nullable = true) 
| | | | | |-- calledNumber: string (nullable = true) 
| | | | |-- totalCallEventDuration: long (nullable = true) 
| | | |-- basicServiceUsedList: struct (nullable = true) 
| | | | |-- BasicServiceUsed: struct (nullable = true) 
| | | | | |-- basicService: struct (nullable = true) 
| | | | | | |-- serviceCode: struct (nullable = true) 
| | | | | | | |-- teleServiceCode: string (nullable = true) 
| | | | | |-- chargeInformationList: struct (nullable = true) 
| | | | | | |-- ChargeInformation: struct (nullable = true) 
| | | | | | | |-- callTypeGroup: struct (nullable = true) 
| | | | | | | | |-- callTypeLevel1: long (nullable = true) 
| | | | | | | | |-- callTypeLevel2: long (nullable = true) 
| | | | | | | | |-- callTypeLevel3: long (nullable = true) 
| | | | | | | | |-- calledCountryCode: string (nullable = true) 
| | | | | | | |-- chargeDetailList: struct (nullable = true) 
| | | | | | | | |-- ChargeDetail: struct (nullable = true) 
| | | | | | | | | |-- charge: long (nullable = true) 
| | | | | | | | | |-- chargeType: string (nullable = true) 
| | | | | | | | | |-- chargeableUnits: long (nullable = true) 
| | | | | | | | | |-- chargedUnits: long (nullable = true) 
| | | | | | | | | |-- dayCategory: long (nullable = true) 
| | | | | | | | | |-- timeBand: long (nullable = true) 
| | | | | | | |-- chargedItem: long (nullable = true) 
| | | | | | | |-- exchangeRateCode: long (nullable = true) 
| | | |-- equipmentInformation: struct (nullable = true) 
| | | | |-- imeiOrEsn: struct (nullable = true) 
| | | | | |-- imei: string (nullable = true) 
| | | |-- locationInformation: struct (nullable = true) 
| | | | |-- networkLocation: struct (nullable = true) 
| | | | | |-- callReference: string (nullable = true) 
| | | | | |-- cellId: long (nullable = true) 
| | | | | |-- locationArea: long (nullable = true) 
| | | | | |-- recEntityCode: long (nullable = true) 
| | | |-- operatorSpecInformation: struct (nullable = true) 
| | | | |-- OperatorSpecInformation: array (nullable = true) 
| | | | | |-- element: string (containsNull = true) 
| |-- mobileTerminatedCall: array (nullable = true) 
| | |-- element: struct (containsNull = true) 
| | | |-- basicCallInformation: struct (nullable = true) 
| | | | |-- callEventStartTimeStamp: struct (nullable = true) 
| | | | | |-- localTimeStamp: string (nullable = true) 
| | | | | |-- utcTimeOffsetCode: long (nullable = true) 
| | | | |-- callOriginator: struct (nullable = true) 
| | | | | |-- callingNumber: string (nullable = true) 
| | | | |-- chargeableSubscriber: struct (nullable = true) 
| | | | | |-- simChargeableSubscriber: struct (nullable = true) 
| | | | | | |-- imsi: string (nullable = true) 
| | | | | | |-- msisdn: string (nullable = true) 
| | | | |-- totalCallEventDuration: long (nullable = true) 
| | | |-- basicServiceUsedList: struct (nullable = true) 
| | | | |-- BasicServiceUsed: struct (nullable = true) 
| | | | | |-- basicService: struct (nullable = true) 
| | | | | | |-- serviceCode: struct (nullable = true) 
| | | | | | | |-- teleServiceCode: string (nullable = true) 
| | | | | |-- chargeInformationList: struct (nullable = true) 
| | | | | | |-- ChargeInformation: struct (nullable = true) 
| | | | | | | |-- chargeDetailList: struct (nullable = true) 
| | | | | | | | |-- ChargeDetail: struct (nullable = true) 
| | | | | | | | | |-- charge: long (nullable = true) 
| | | | | | | | | |-- chargeType: string (nullable = true) 
| | | | | | | | | |-- chargeableUnits: long (nullable = true) 
| | | | | | | | | |-- chargedUnits: long (nullable = true) 
| | | | | | | | | |-- dayCategory: long (nullable = true) 
| | | | | | | | | |-- timeBand: long (nullable = true) 
| | | | | | | |-- chargedItem: long (nullable = true) 
| | | | | | | |-- exchangeRateCode: long (nullable = true) 
| | | |-- equipmentInformation: struct (nullable = true) 
| | | | |-- imeiOrEsn: struct (nullable = true) 
| | | | | |-- imei: string (nullable = true) 
| | | |-- locationInformation: struct (nullable = true) 
| | | | |-- networkLocation: struct (nullable = true) 
| | | | | |-- callReference: string (nullable = true) 
| | | | | |-- cellId: long (nullable = true) 
| | | | | |-- locationArea: long (nullable = true) 
| | | | | |-- recEntityCode: long (nullable = true) 
| | | |-- operatorSpecInformation: struct (nullable = true) 
| | | | |-- OperatorSpecInformation: array (nullable = true) 
| | | | | |-- element: string (containsNull = true) 
|-- networkInfo: struct (nullable = true) 
| |-- calledNumAnalysis: struct (nullable = true) 
| | |-- CalledNumAnalysis: struct (nullable = true) 
| | | |-- calledNumAnalysisCode: long (nullable = true) 
| | | |-- countryCodeTable: struct (nullable = true) 
| | | | |-- CountryCode: string (nullable = true) 
| | | |-- iacTable: struct (nullable = true) 
| | | | |-- Iac: string (nullable = true) 
| |-- networkType: long (nullable = true) 
| |-- recEntityInfo: struct (nullable = true) 
| | |-- RecEntityDefinition: array (nullable = true) 
| | | |-- element: struct (containsNull = true) 
| | | | |-- recEntityCode: long (nullable = true) 
| | | | |-- recEntityId: struct (nullable = true) 
| | | | | |-- gsnaddress: struct (nullable = true) 
| | | | | | |-- iPTextV4Address: string (nullable = true) 
| | | | | |-- mscId: string (nullable = true) 
| | | | | |-- msisdn: string (nullable = true) 
| | | | |-- recEntityType: long (nullable = true) 
| |-- utcTimeOffsetInfo: struct (nullable = true) 
| | |-- UtcTimeOffsetDefinition: struct (nullable = true) 
| | | |-- utcTimeOffset: string (nullable = true) 
| | | |-- utcTimeOffsetCode: long (nullable = true) 

當我想看看在數據幀中的元素,它顯示在一個表是這樣的: table

我不確定我該怎麼寫這個數據幀到CSV文件。

有什麼建議嗎?由於

回答