使用pypdf Python模塊如何閱讀下列PDF文件http://www.envis-icpe.com/pointcounterpointbook/Hindi_Book.pdfpypdf蟒蛇工具
# -*- coding: utf-8 -*-
from pyPdf import PdfFileWriter, PdfFileReader
import pyPdf
def getPDFContent(path):
content = ""
# Load PDF into pyPDF
pdf = pyPdf.PdfFileReader(file(path, "rb"))
# Iterate pages
for i in range(0, pdf.getNumPages()):
# Extract text from page and add to content
content += pdf.getPage(i).extractText() + "\n"
# Collapse whitespace
content = " ".join(content.replace(u"\xa0", " ").strip().split())
return content
print getPDFContent("/home/tom/Desktop/Hindi_Book.pdf").encode("ascii", "xmlcharrefreplace")
上面只打印二元
以及如何從下面的代碼
from pyPdf import PdfFileWriter, PdfFileReader
import sys
import pyPdf
from pyPdf import PdfFileWriter, PdfFileReader
output = PdfFileWriter()
input1 = PdfFileReader(file("/home/tom/Desktop/Hindi_Book.pdf", "rb"))
# print the title of document1.pdf
print "title = %s" % (input1.getDocumentInfo().title)
請更正第一個示例中最後一行的縮進。 – Steven 2010-10-04 14:07:54