2013-11-27 273 views
3

我想將使用圖像的html文件轉換爲使用iText的pdf。我在這裏提供我的來源。使用iText將html轉換爲pdf

這是我的HTML文件...

<html> 

<body> 
<img src='' width='62' height='80' style='float: left; margin-right: 28px;' alt="" /> 
<!-- <img src="add.png" alt="" /> --> 
</body> 
</html> 

我想轉換此HTML文件爲pdf ...

現在用下面的Java代碼...

import java.io.DataOutputStream; 
import java.io.File; 
import java.io.FileInputStream; 
import java.io.FileOutputStream; 
import java.io.IOException; 
import java.io.InputStream; 
import java.io.StringReader; 
import java.net.URL; 
import java.nio.charset.Charset; 

import org.apache.commons.io.IOUtils; 
import org.apache.pdfbox.encoding.Encoding; 
import org.jsoup.Jsoup; 
import org.jsoup.safety.Whitelist; 
import org.jsoup.select.Elements; 
import org.w3c.tidy.Tidy; 

import com.itextpdf.text.Document; 
import com.itextpdf.text.DocumentException; 
import com.itextpdf.text.Image; 
import com.itextpdf.text.pdf.PdfWriter; 
import com.itextpdf.tool.xml.Pipeline; 
import com.itextpdf.tool.xml.XMLWorker; 
import com.itextpdf.tool.xml.XMLWorkerFontProvider; 
import com.itextpdf.tool.xml.XMLWorkerHelper; 
import com.itextpdf.tool.xml.css.CssFilesImpl; 
import com.itextpdf.tool.xml.css.StyleAttrCSSResolver; 
import com.itextpdf.tool.xml.html.CssAppliersImpl; 
import com.itextpdf.tool.xml.html.HTML; 
import com.itextpdf.tool.xml.html.TagProcessor; 
import com.itextpdf.tool.xml.html.TagProcessorFactory; 
import com.itextpdf.tool.xml.html.Tags; 
import com.itextpdf.tool.xml.parser.XMLParser; 
import com.itextpdf.tool.xml.pipeline.css.CSSResolver; 
import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline; 
import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline; 
import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline; 
import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext; 
import com.itextpdf.tool.xml.pipeline.html.ImageProvider; 
import com.pdfcrowd.Client; 

public class App 
{ 


    public static void main(String[] args) throws DocumentException, IOException 
    { 

    // step 1 
    Document document = new Document(); 
    document.newPage(); 
    // step 2 
    PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream("pdf.pdf")); 
    // step 3 
    document.open(); 
    // step 4 
    XMLWorkerHelper.getInstance().parseXHtml(writer, document, 
      new FileInputStream("index.html")); 
    //step 5 
    document.close(); 
    System.out.println("PDF Created!"); 
    } 
} 

我得到以下錯誤...

Exception in thread "main" ExceptionConverter: java.io.IOException: The document has no pages. 
at com.itextpdf.text.pdf.PdfPages.writePageTree(PdfPages.java:113) 
at com.itextpdf.text.pdf.PdfWriter.close(PdfWriter.java:1243) 
at com.itextpdf.text.pdf.PdfDocument.close(PdfDocument.java:849) 
at com.itextpdf.text.Document.close(Document.java:416) 
at App.main(App.java:64) 

請幫助我如何將圖像轉換爲pdf格式的HTML文件使用itext。我能夠轉換該HTML文件,如果我沒有圖像或如果我硬編碼的圖像路徑。在此先感謝

+2

可能重複的[iText - HTML到PDF - 圖像不以PDF顯示](http:// stackov erflow.com/questions/15273933/itext-html-to-pdf-image-is-not-displayed-in-pdf) – Keerthivasan

回答

0

如果您的pdf頁面中沒有內容,則會發生此例外情況。 嘗試通過你的InputStream這樣

String str="<html> 

<body> 
<img src='' width='62' height='80' style='float: left; margin-right: 28px;' alt="" /> 
<!-- <img src="add.png" alt="" /> --> 
</body> 
</html>" 

InputStream is = new ByteArrayInputStream(str.getBytes()); 
XMLWorkerHelper.getInstance().parseXHtml(writer, document, is); 
+0

仍然得到與上述代碼相同的異常 – madas

4

你需要實現自定義圖像標籤處理器來處理嵌入在HTML中的圖像:

package com.example.itext.processor; 

import java.util.ArrayList; 
import java.util.List; 
import java.util.Map; 

import com.itextpdf.text.Chunk; 
import com.itextpdf.text.Element; 
import com.itextpdf.text.Image; 
import com.itextpdf.text.log.Level; 
import com.itextpdf.text.log.Logger; 
import com.itextpdf.text.log.LoggerFactory; 
import com.itextpdf.text.pdf.codec.Base64; 
import com.itextpdf.tool.xml.NoCustomContextException; 
import com.itextpdf.tool.xml.Tag; 
import com.itextpdf.tool.xml.WorkerContext; 
import com.itextpdf.tool.xml.exceptions.LocaleMessages; 
import com.itextpdf.tool.xml.exceptions.RuntimeWorkerException; 
import com.itextpdf.tool.xml.html.HTML; 
import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext; 

public class ImageTagProcessor extends com.itextpdf.tool.xml.html.Image { 

private final Logger logger = LoggerFactory.getLogger(getClass()); 

/* 
* (non-Javadoc) 
* 
* @see com.itextpdf.tool.xml.TagProcessor#endElement(com.itextpdf.tool.xml.Tag, java.util.List, com.itextpdf.text.Document) 
*/ 
@Override 
public List<Element> end(final WorkerContext ctx, final Tag tag, final List<Element> currentContent) { 
    final Map<String, String> attributes = tag.getAttributes(); 
    String src = attributes.get(HTML.Attribute.SRC); 
    List<Element> elements = new ArrayList<Element>(1); 
    if (null != src && src.length() > 0) { 
     Image img = null; 
     if (src.startsWith("data:image/")) { 
      final String base64Data = src.substring(src.indexOf(",") + 1); 
      try { 
       img = Image.getInstance(Base64.decode(base64Data)); 
      } catch (Exception e) { 
       if (logger.isLogging(Level.ERROR)) { 
        logger.error(String.format(LocaleMessages.getInstance().getMessage(LocaleMessages.HTML_IMG_RETRIEVE_FAIL), src), e); 
       } 
      } 
      if (img != null) { 
       try { 
        final HtmlPipelineContext htmlPipelineContext = getHtmlPipelineContext(ctx); 
        elements.add(getCssAppliers().apply(new Chunk((com.itextpdf.text.Image) getCssAppliers().apply(img, tag, htmlPipelineContext), 0, 0, true), tag, 
         htmlPipelineContext)); 
       } catch (NoCustomContextException e) { 
        throw new RuntimeWorkerException(e); 
       } 
      } 
     } 

     if (img == null) { 
      elements = super.end(ctx, tag, currentContent); 
     } 
    } 
    return elements; 
} 
} 

下面的代碼註冊自定義圖像標籤處理器和覆羽一個HTML文檔爲PDF

public static void main(String[] args) { 
    convertHtmlToPdf(); 

} 

private static void convertHtmlToPdf() { 
    try { 
     final OutputStream file = new FileOutputStream(new File("C:\\Test.pdf")); 
     final Document document = new Document(); 
     final PdfWriter writer = PdfWriter.getInstance(document, file); 
     document.open(); 
     final TagProcessorFactory tagProcessorFactory = Tags.getHtmlTagProcessorFactory(); 
     tagProcessorFactory.removeProcessor(HTML.Tag.IMG); 
     tagProcessorFactory.addProcessor(new ImageTagProcessor(), HTML.Tag.IMG); 

     final CssFilesImpl cssFiles = new CssFilesImpl(); 
     cssFiles.add(XMLWorkerHelper.getInstance().getDefaultCSS()); 
     final StyleAttrCSSResolver cssResolver = new StyleAttrCSSResolver(cssFiles); 
     final HtmlPipelineContext hpc = new HtmlPipelineContext(new CssAppliersImpl(new XMLWorkerFontProvider())); 
     hpc.setAcceptUnknown(true).autoBookmark(true).setTagFactory(tagProcessorFactory); 
     final HtmlPipeline htmlPipeline = new HtmlPipeline(hpc, new PdfWriterPipeline(document, writer)); 
     final Pipeline<?> pipeline = new CssResolverPipeline(cssResolver, htmlPipeline); 
     final XMLWorker worker = new XMLWorker(pipeline, true); 
     final Charset charset = Charset.forName("UTF-8"); 
     final XMLParser xmlParser = new XMLParser(true, worker, charset); 
     final InputStream is = new FileInputStream("C:\\test.html"); 
     xmlParser.parse(is, charset); 

     is.close(); 
     document.close(); 
     file.close(); 
    } catch (Exception e) { 
     e.printStackTrace(); 
     // TODO 
    } 
}