我們有一個C#Windows服務,它通過使用第三方組件讀取PDF上的二維條碼來處理所有PDF,然後更新數據庫並將文檔存儲在文檔存儲庫中。將PDF拆分爲C#中的多個文件
有沒有辦法在閱讀條形碼後將文件剪切並作爲另一個文檔存儲?
例如,如果有10頁的文檔,它應該分成10個不同的文件。
謝謝。
我們有一個C#Windows服務,它通過使用第三方組件讀取PDF上的二維條碼來處理所有PDF,然後更新數據庫並將文檔存儲在文檔存儲庫中。將PDF拆分爲C#中的多個文件
有沒有辦法在閱讀條形碼後將文件剪切並作爲另一個文檔存儲?
例如,如果有10頁的文檔,它應該分成10個不同的文件。
謝謝。
您可以使用PDF庫,如PDFSharp,讀取文件,遍歷每個頁面,將它們添加到新的PDF文檔並將它們保存在文件系統中。您也可以刪除或保留原件。
這是相當多的代碼,但很簡單,這些樣本應該讓你開始。
前一個問題回答你的部分 - 如何分割的PDF文件,如果您知道哪裏有條形碼,那麼你可以很容易地分割文件:
How can I split up a PDF file into pages (preferably C#)
的建議是一個圖書館稱爲PDFSharp和sample demonstrating PDF splitting。
我遇到了同樣的問題,你可以使用itextsharp component工具來拆分文檔
public Split(String[] args)
{
if (args.Length != 4)
{
Console.Error.WriteLine("This tools needs 4 parameters:\njava Split srcfile destfile1 destfile2 pagenumber");
}
else
{
try
{
int pagenumber = int.Parse(args[3]);
// we create a reader for a certain document
PdfReader reader = new PdfReader(args[0]);
// we retrieve the total number of pages
int n = reader.NumberOfPages;
Console.WriteLine("There are " + n + " pages in the original file.");
if (pagenumber < 2 || pagenumber > n)
{
throw new DocumentException("You can't split this document at page " + pagenumber + "; there is no such page.");
}
// step 1: creation of a document-object
Document document1 = new Document(reader.GetPageSizeWithRotation(1));
Document document2 = new Document(reader.GetPageSizeWithRotation(pagenumber));
// step 2: we create a writer that listens to the document
PdfWriter writer1 = PdfWriter.GetInstance(document1, new FileStream(args[1], FileMode.Create));
PdfWriter writer2 = PdfWriter.GetInstance(document2, new FileStream(args[2], FileMode.Create));
// step 3: we open the document
document1.Open();
PdfContentByte cb1 = writer1.DirectContent;
document2.Open();
PdfContentByte cb2 = writer2.DirectContent;
PdfImportedPage page;
int rotation;
int i = 0;
// step 4: we add content
while (i < pagenumber - 1)
{
i++;
document1.SetPageSize(reader.GetPageSizeWithRotation(i));
document1.NewPage();
page = writer1.GetImportedPage(reader, i);
rotation = reader.GetPageRotation(i);
if (rotation == 90 || rotation == 270)
{
cb1.AddTemplate(page, 0, -1f, 1f, 0, 0, reader.GetPageSizeWithRotation(i).Height);
}
else
{
cb1.AddTemplate(page, 1f, 0, 0, 1f, 0, 0);
}
}
while (i < n)
{
i++;
document2.SetPageSize(reader.GetPageSizeWithRotation(i));
document2.NewPage();
page = writer2.GetImportedPage(reader, i);
rotation = reader.GetPageRotation(i);
if (rotation == 90 || rotation == 270)
{
cb2.AddTemplate(page, 0, -1f, 1f, 0, 0, reader.GetPageSizeWithRotation(i).Height);
}
else
{
cb2.AddTemplate(page, 1f, 0, 0, 1f, 0, 0);
}
Console.WriteLine("Processed page " + i);
}
// step 5: we close the document
document1.Close();
document2.Close();
}
catch(Exception e)
{
Console.Error.WriteLine(e.Message);
Console.Error.WriteLine(e.StackTrace);
}
}
}
不錯的第一篇文章!繼續並歡迎! ;-) – 2012-11-08 02:31:56
這個代碼是基於PDFsharp庫
http://www.pdfsharp.com/PDFsharp/
如果你想通過書籤分裂然後這裏是代碼。
public static void SplitPDFByBookMark(string fileName)
{
string sInFile = fileName;
PdfReader pdfReader = new PdfReader(sInFile);
try
{
IList<Dictionary<string, object>> bookmarks = SimpleBookmark.GetBookmark(pdfReader);
for (int i = 0; i < bookmarks.Count; ++i)
{
IDictionary<string, object> BM = (IDictionary<string, object>)bookmarks[0];
IDictionary<string, object> nextBM = i == bookmarks.Count - 1 ? null : bookmarks[i + 1];
string startPage = BM["Page"].ToString().Split(' ')[0].ToString();
string startPageNextBM = nextBM == null ? "" + (pdfReader.NumberOfPages + 1) : nextBM["Page"].ToString().Split(' ')[0].ToString();
SplitByBookmark(pdfReader, int.Parse(startPage), int.Parse(startPageNextBM), bookmarks[i].Values.ToArray().GetValue(0).ToString() + ".pdf", fileName);
}
}
catch (Exception ex)
{
throw ex;
}
}
private static void SplitByBookmark(PdfReader reader, int pageFrom, int PageTo, string outPutName, string inPutFileName)
{
Document document = new Document();
FileStream fs = new System.IO.FileStream(System.IO.Path.GetDirectoryName(inPutFileName) + '\\' + outPutName, System.IO.FileMode.Create);
try
{
PdfWriter writer = PdfWriter.GetInstance(document, fs);
document.Open();
PdfContentByte cb = writer.DirectContent;
//holds pdf data
PdfImportedPage page;
if (pageFrom == PageTo && pageFrom == 1)
{
document.NewPage();
page = writer.GetImportedPage(reader, pageFrom);
cb.AddTemplate(page, 0, 0);
pageFrom++;
fs.Flush();
document.Close();
fs.Close();
}
else
{
while (pageFrom < PageTo)
{
document.NewPage();
page = writer.GetImportedPage(reader, pageFrom);
cb.AddTemplate(page, 0, 0);
pageFrom++;
fs.Flush();
document.Close();
fs.Close();
}
}
}
catch (Exception ex)
{
throw ex;
}
finally
{
if (document.IsOpen())
document.Close();
if (fs != null)
fs.Close();
}
}
請提供有關此解決方案的其他信息。 OP將如何改變這段代碼來按頁面分割PDF?這段代碼使用了什麼庫? – andr 2013-02-11 11:21:22
public int ExtractPages(string sourcePdfPath, string DestinationFolder)
{
int p = 0;
try
{
iTextSharp.text.Document document;
iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader(new iTextSharp.text.pdf.RandomAccessFileOrArray(sourcePdfPath), new ASCIIEncoding().GetBytes(""));
if (!Directory.Exists(sourcePdfPath.ToLower().Replace(".pdf", "")))
{
Directory.CreateDirectory(sourcePdfPath.ToLower().Replace(".pdf", ""));
}
else
{
Directory.Delete(sourcePdfPath.ToLower().Replace(".pdf", ""), true);
Directory.CreateDirectory(sourcePdfPath.ToLower().Replace(".pdf", ""));
}
for (p = 1; p <= reader.NumberOfPages; p++)
{
using (MemoryStream memoryStream = new MemoryStream())
{
document = new iTextSharp.text.Document();
iTextSharp.text.pdf.PdfWriter writer = iTextSharp.text.pdf.PdfWriter.GetInstance(document, memoryStream);
writer.SetPdfVersion(iTextSharp.text.pdf.PdfWriter.PDF_VERSION_1_2);
writer.CompressionLevel = iTextSharp.text.pdf.PdfStream.BEST_COMPRESSION;
writer.SetFullCompression();
document.SetPageSize(reader.GetPageSize(p));
document.NewPage();
document.Open();
document.AddDocListener(writer);
iTextSharp.text.pdf.PdfContentByte cb = writer.DirectContent;
iTextSharp.text.pdf.PdfImportedPage pageImport = writer.GetImportedPage(reader, p);
int rot = reader.GetPageRotation(p);
if (rot == 90 || rot == 270)
{
cb.AddTemplate(pageImport, 0, -1.0F, 1.0F, 0, 0, reader.GetPageSizeWithRotation(p).Height);
}
else
{
cb.AddTemplate(pageImport, 1.0F, 0, 0, 1.0F, 0, 0);
}
document.Close();
document.Dispose();
File.WriteAllBytes(DestinationFolder + "/" + p + ".pdf", memoryStream.ToArray());
}
}
reader.Close();
reader.Dispose();
}
catch
{
}
finally
{
GC.Collect();
}
return p - 1;
}
調用這個函數在任何你想,並通過源和目標文件夾路徑
安靜,但它有景觀頁面的問題。 – Erfan 2013-11-04 10:17:47
public void SplitPDFByBookMark(string fileName)
{
string sInFile = fileName;
var pdfReader = new PdfReader(sInFile);
try
{
IList<Dictionary<string, object>> bookmarks = SimpleBookmark.GetBookmark(pdfReader);
for (int i = 0; i < bookmarks.Count; ++i)
{
IDictionary<string, object> BM = (IDictionary<string, object>)bookmarks[i];
IDictionary<string, object> nextBM = i == bookmarks.Count - 1 ? null : bookmarks[i + 1];
string startPage = BM["Page"].ToString().Split(' ')[0].ToString();
string startPageNextBM = nextBM == null ? "" + (pdfReader.NumberOfPages + 1) : nextBM["Page"].ToString().Split(' ')[0].ToString();
SplitByBookmark(pdfReader, int.Parse(startPage), int.Parse(startPageNextBM), bookmarks[i].Values.ToArray().GetValue(0).ToString() + ".pdf", fileName);
}
}
catch (Exception ex)
{
throw ex;
}
}
private void SplitByBookmark(PdfReader reader, int pageFrom, int PageTo, string outPutName, string inPutFileName)
{
Document document = new Document();
using (var fs = new FileStream(Path.GetDirectoryName(inPutFileName) + '\\' + outPutName, System.IO.FileMode.Create))
{
try
{
using (var writer = PdfWriter.GetInstance(document, fs))
{
document.Open();
PdfContentByte cb = writer.DirectContent;
//holds pdf data
PdfImportedPage page;
if (pageFrom == PageTo && pageFrom == 1)
{
document.NewPage();
page = writer.GetImportedPage(reader, pageFrom);
cb.AddTemplate(page, 0, 0);
pageFrom++;
fs.Flush();
document.Close();
fs.Close();
}
else
{
while (pageFrom < PageTo)
{
document.NewPage();
page = writer.GetImportedPage(reader, pageFrom);
cb.AddTemplate(page, 0, 0);
pageFrom++;
fs.Flush();
document.Close();
fs.Close();
}
}
}
//PdfWriter writer = PdfWriter.GetInstance(document, fs);
}
catch (Exception ex)
{
throw ex;
}
}
}
您可以安裝的NuGet itextsharp
複製並粘貼在C#應用程序調用此代碼SplitPDFByBookMark()
方法並傳遞pdf文件名。 此代碼將搜索您的書籤並完成!
**感謝米洛。** – 2018-01-16 16:13:00
您目前在使用任何PDF庫嗎? – Marko 2010-08-26 11:46:37
我的理解是,第三方組件僅用於檢測PDF中的條形碼。 – gyurisc 2010-08-26 11:49:42