2016-04-22 38 views
1

我想讀取一堆相同格式的word docx文件,並將數據提取到數據庫。我沒有任何與文本有關的問題,但我正在努力與複選框。我需要說我是docx4j的新手,但現在一直在爲這個問題苦苦掙扎了四天。我真的很看重一些幫助/幫助/建議。Docx4j - 如何獲得docx複選框狀態

我附上了一個文檔(test.docx),我正在閱讀。第一個複選框是我自己使用Word插入的,被我的代碼檢測到,並作爲CTSdtCell出現在初始傳遞中,但其他複選框不是。他們似乎在文件中用CTObject,CTSHape,CTIMageData和CTControl以不同方式表示,我找不到從這些或其中之一獲取複選框的方法。

public static void main(String[] args) throws Exception { 
    WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(new java.io.File("test.docx"));  
    MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart(); 
    Finder finder = new Finder(FldChar.class); 
    new TraversalUtil(documentPart.getContent(), finder); 
} 

public static class Finder extends CallbackImpl { 
    protected Class<?> typeToFind; 
    protected Finder(Class<?> typeToFind) { 
     this.typeToFind = typeToFind; 
    } 

    public List<Object> results = new ArrayList<Object>(); 

    @Override 
    public List<Object> apply(Object o) { 
     String txtVal=""; 
     System.out.println(o.getClass().getName()); 

     if (o instanceof org.docx4j.wml.CTSdtCell) { 
      List<Object> objs = ((org.docx4j.wml.CTSdtCell)o).getSdtPr().getRPrOrAliasOrLock(); 
      findCheckbox(objs); 
     } 

     if (o instanceof org.docx4j.wml.SdtRun) { 
      List<Object> objs = ((org.docx4j.wml.SdtRun)o).getSdtPr().getRPrOrAliasOrLock(); 
      findCheckbox(objs); 
     } 

     if (o instanceof org.docx4j.wml.SdtBlock) { 
      List<Object> objs = ((org.docx4j.wml.SdtBlock)o).getSdtPr().getRPrOrAliasOrLock(); 
      findCheckbox(objs); 
     } 

     if (o instanceof org.docx4j.wml.Text) { 
      System.out.println("  Text Value : "+((org.docx4j.wml.Text)o).getValue()); 
     } 

     // Adapt as required 
     if (o.getClass().equals(typeToFind)) { 
      results.add(o); 
     } 
     return null; 
    } 

    private static void findCheckbox(List<Object> objs) { 
     for (Object obj : objs) { 
      if (obj instanceof javax.xml.bind.JAXBElement) { 
       if (((javax.xml.bind.JAXBElement)obj).getDeclaredType().getName().equals("org.docx4j.w14.CTSdtCheckbox")) { 
        JAXBElement<CTSdtCheckbox> elem = ((javax.xml.bind.JAXBElement)obj); 
        org.docx4j.w14.CTSdtCheckbox cb = elem.getValue(); 
        org.docx4j.w14.CTOnOff OnOff=cb.getChecked(); 
        System.out.println("  CheckBox found with value="+OnOff.getVal()); 
       } 
      } 
     } 
    } 
} 

的結果是:

org.docx4j.wml.Tbl 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : WORK INSTRUCTION # 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Drawing 
org.docx4j.dml.wordprocessingDrawing.Inline 
org.docx4j.dml.CTBlip 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : A 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : STEP BY STEP 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : - 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : WORK INSTRUCTION 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Drawing 
org.docx4j.dml.wordprocessingDrawing.Inline 
org.docx4j.dml.CTBlip 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : 1234567 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : TASK 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : Chlorine drum change 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : DATE 
org.docx4j.wml.CTSdtCell 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : 12/07/2015 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : MACHINE 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : ORIGINATOR 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : D.GROVE 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : CLOCK NUMBER 
org.docx4j.wml.CTSdtCell 
     CheckBox found with value=1 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : ? 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : AREA 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : CHLORINE HOUSE 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : CHECKED 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : (EXPERT) 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : J Clarke 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : CLOCK NUMBER 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : 4985 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : PPE 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Drawing 
org.docx4j.dml.wordprocessingDrawing.Anchor 
org.docx4j.dml.CTBlip 
org.docx4j.dml.CTColorChangeEffect 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : EYE 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Drawing 
org.docx4j.dml.wordprocessingDrawing.Anchor 
org.docx4j.dml.CTBlip 
org.docx4j.dml.CTColorChangeEffect 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : EAR 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Drawing 
org.docx4j.dml.wordprocessingDrawing.Anchor 
org.docx4j.dml.CTBlip 
org.docx4j.dml.CTColorChangeEffect 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : FOOT 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Drawing 
org.docx4j.dml.wordprocessingDrawing.Anchor 
org.docx4j.dml.CTBlip 
org.docx4j.dml.CTColorChangeEffect 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : HEAD 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Drawing 
org.docx4j.dml.wordprocessingDrawing.Anchor 
org.docx4j.dml.CTBlip 
org.docx4j.dml.CTColorChangeEffect 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : HAND 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.CTObject 
org.docx4j.vml.CTShapetype 
org.docx4j.vml.CTStroke 
org.docx4j.vml.CTFormulas 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTPath 
org.docx4j.vml.officedrawing.CTLock 
org.docx4j.vml.CTShape 
org.docx4j.vml.CTImageData 
org.docx4j.wml.CTControl 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.CTObject 
org.docx4j.vml.CTShape 
org.docx4j.vml.CTImageData 
org.docx4j.wml.CTControl 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.CTObject 
org.docx4j.vml.CTShape 
org.docx4j.vml.CTImageData 
org.docx4j.wml.CTControl 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.CTObject 
org.docx4j.vml.CTShape 
org.docx4j.vml.CTImageData 
org.docx4j.wml.CTControl 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.CTObject 
org.docx4j.vml.CTShape 
org.docx4j.vml.CTImageData 
org.docx4j.wml.CTControl 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : COSHH 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : SPECIAL PPE REQUIREMENTS 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : *SITE 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : R/A NUMBER 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : CONSIDERATION 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : PRODUCTS 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : B.A. EQUIPMENT 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : 12668 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.CTObject 
org.docx4j.vml.CTShape 
org.docx4j.vml.CTImageData 
org.docx4j.wml.CTControl 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : CHLORINE 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : GAS 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.P 
org.docx4j.wml.CTBookmark 
org.docx4j.wml.CTMarkupRange 

我現在已經加入從含有難以捉摸的複選框中的一個小區中的MainDocumentPart.getXML()的輸出。我看不出任何東西來告訴我價值。有誰能告訴我我錯過了什麼嗎?

<w:tc> 
     <w:tcPr> 
      <w:tcW w:w="1015" w:type="dxa"/> 
      <w:tcBorders> 
       <w:left w:val="single" w:color="auto" w:sz="24" w:space="0"/> 
       <w:bottom w:val="single" w:color="auto" w:sz="24" w:space="0"/> 
       <w:right w:val="single" w:color="auto" w:sz="24" w:space="0"/> 
      </w:tcBorders> 
      <w:vAlign w:val="center"/> 
     </w:tcPr> 
     <w:p w:rsidRPr="00A7008C" w:rsidR="00F909A4" w:rsidP="00017AE9" w:rsidRDefault="000F5760"> 
      <w:pPr> 
       <w:jc w:val="center"/> 
       <w:rPr> 
        <w:b/> 
        <w:color w:val="FFFFFF" w:themeColor="background1"/> 
       </w:rPr> 
      </w:pPr> 
      <w:r> 
       <w:rPr> 
        <w:b/> 
        <w:color w:val="FFFFFF" w:themeColor="background1"/> 
        <w:sz w:val="36"/> 
       </w:rPr> 
       <w:object w:dxaOrig="225" w:dyaOrig="225"> 
        <v:shape type="#_x0000_t75" style="width:12pt;height:29.25pt" id="_x0000_i1063" o:ole=""> 
         <v:imagedata o:title="" r:id="rId17"/> 
        </v:shape> 
        <w:control w:name="CheckBox11" w:shapeid="_x0000_i1063" r:id="rId18"/> 
       </w:object> 
      </w:r> 
      <w:bookmarkEnd w:id="0"/> 
     </w:p> 
    </w:tc> 
+0

我已經添加了包含難以捉摸複選框的單元格的xml。爲什麼沒有顯示值? – Richard

回答

0

我已經破解了它! CTImageData指向可以通過文檔關係訪問的圖像。這些圖像包含勾號或未勾選的框。通過檢查圖像的大小,我可以知道它是什麼。

我不明白Word比表面使用更多,也不知道這些「複選框」是如何創建的,但它看起來並沒有像我的測試那樣創建。因此,我不知道這些圖像是否會在組織升級MS Office軟件時發生變化,再次編輯和保存文檔文件。然而,對於我的軟件的需求會在初始加載後很快發生變化,因此這種風險對我來說意義不大。

+0

傳統Active X控件創建於:開發者菜單>舊版工具.. – JasonPlutext

0

現有的複選框是傳統的ActiveX控件:

  <w:object w:dxaOrig="225" w:dyaOrig="225"> 
      <v:shapetype id="_x0000_t75" coordsize="21600,21600" o:spt="75" o:preferrelative="t" path="[email protected]@[email protected]@[email protected]@[email protected]@5xe" filled="f" stroked="f"> 
       <v:stroke joinstyle="miter"/> 
       <v:formulas> 
       : 
       </v:formulas> 
       <v:path o:extrusionok="f" gradientshapeok="t" o:connecttype="rect"/> 
       <o:lock v:ext="edit" aspectratio="t"/> 
      </v:shapetype> 
      <v:shape id="_x0000_i1025" type="#_x0000_t75" style="width:12pt;height:29.25pt" o:ole=""> 
       <v:imagedata r:id="rId15" o:title=""/> 
      </v:shape> 
      <w:control r:id="rId16" w:name="CheckBox" w:shapeid="_x0000_i1025"/> 
      </w:object> 

您所創建的那些是現代XML友好的複選框內容控件。

還有複選框字符和複選框表單域...