library(XML)
file <-"E:/aaa.xml"
doc = xmlInternalTreeParse(file)
ns=names(xmlNamespace(xmlRoot(doc)))
patient=getNodeSet(doc, path=paste("/", ns, ":tcga_bcr/", ns,":patient", sep=""))
row=xmlToDataFrame(nodes=patient, stringsAsFactors = F)
shared_stage:stage_event有許多子節點,如何將每個子節點精確定位爲列。如何讀取R中的xml文件和data.frame
如果節點具有preferred_name,請使用preferred_name作爲data.frame列名稱。
aaa.xml:
<?xml version="1.0" encoding="UTF-8"?>
<brca:tcga_bcr xsi:schemaLocation="http://tcga.nci/bcr/xml/clinical/brca/2.7 http://tcga-data.nci.nih.gov/docs/xsd/BCR/tcga.nci/bcr/xml/clinical/brca/2.7/TCGA_BCR.BRCA_Clinical.xsd" schemaVersion="2.7" xmlns:brca="http://tcga.nci/bcr/xml/clinical/brca/2.7" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:admin="http://tcga.nci/bcr/xml/administration/2.7" xmlns:clin_shared="http://tcga.nci/bcr/xml/clinical/shared/2.7" xmlns:shared="http://tcga.nci/bcr/xml/shared/2.7" xmlns:brca_shared="http://tcga.nci/bcr/xml/clinical/brca/shared/2.7" xmlns:shared_stage="http://tcga.nci/bcr/xml/clinical/shared/stage/2.7" xmlns:brca_nte="http://tcga.nci/bcr/xml/clinical/brca/shared/new_tumor_event/2.7/1.0" xmlns:nte="http://tcga.nci/bcr/xml/clinical/shared/new_tumor_event/2.7" xmlns:follow_up_v2.1="http://tcga.nci/bcr/xml/clinical/brca/followup/2.7/2.1" xmlns:rx="http://tcga.nci/bcr/xml/clinical/pharmaceutical/2.7" xmlns:rad="http://tcga.nci/bcr/xml/clinical/radiation/2.7">
<brca:patient>
<admin:additional_studies/>
<clin_shared:tumor_tissue_site preferred_name="submitted_tumor_site" display_order="9999" cde="3427536" cde_ver="2.000" xsd_ver="2.6" tier="2" owner="TSS" procurement_status="Completed" restricted="false" source_system_identifier="175314">Breast</clin_shared:tumor_tissue_site>
<clin_shared:race_list>
<clin_shared:race preferred_name="race" display_order="12" cde="2192199" cde_ver="1.000" xsd_ver="1.8" tier="2" owner="TSS" procurement_status="Completed" restricted="false" source_system_identifier="175301">WHITE</clin_shared:race>
</clin_shared:race_list>
<shared:bcr_patient_barcode preferred_name="" display_order="9999" cde="2673794" cde_ver="" xsd_ver="1.8" owner="TSS" procurement_status="Completed" restricted="false">TCGA-A2-A0EV</shared:bcr_patient_barcode>
<shared:tissue_source_site cde="" cde_ver="" xsd_ver="2.4" owner="TSS" procurement_status="Completed" restricted="false">A2</shared:tissue_source_site>
<shared_stage:stage_event system="AJCC">
<shared_stage:system_version preferred_name="ajcc_staging_edition" display_order="51" cde="2722309" cde_ver="1.000" xsd_ver="2.6" tier="1" owner="TSS" procurement_status="Completed" restricted="false" source_system_identifier="1080001">6th</shared_stage:system_version>
<shared_stage:tnm_categories>
<shared_stage:pathologic_categories>
<shared_stage:pathologic_T preferred_name="ajcc_tumor_pathologic_pt" display_order="52" cde="3045435" cde_ver="1.000" xsd_ver="2.6" tier="1" owner="TSS" procurement_status="Completed" restricted="false" source_system_identifier="175336">T1c</shared_stage:pathologic_T>
</shared_stage:pathologic_categories>
</shared_stage:tnm_categories>
</shared_stage:stage_event>
<rx:drugs/>
<rad:radiations/>
</brca:patient>
</brca:tcga_bcr>
data.frame
submitted_tumor_site race bcr_patient_barcode ajcc_staging_edition ajcc_tumor_pathologic_pt
Breast WHITE TCGA-A2-A0EV 6th T1c