2010-08-26 65 views
7

我們有一個C#Windows服務,它通過使用第三方組件讀取PDF上的二維條碼來處理所有PDF,然後更新數據庫並將文檔存儲在文檔存儲庫中。將PDF拆分爲C#中的多個文件

有沒有辦法在閱讀條形碼後將文件剪切並作爲另一個文檔存儲?

例如,如果有10頁的文檔,它應該分成10個不同的文件。

謝謝。

+0

您目前在使用任何PDF庫嗎? – Marko 2010-08-26 11:46:37

+0

我的理解是,第三方組件僅用於檢測PDF中的條形碼。 – gyurisc 2010-08-26 11:49:42

回答

5

我遇到了同樣的問題,你可以使用itextsharp component工具來拆分文檔

public Split(String[] args) 
    { 
     if (args.Length != 4) 
     { 
      Console.Error.WriteLine("This tools needs 4 parameters:\njava Split srcfile destfile1 destfile2 pagenumber"); 
     } 
     else 
     { 
      try 
      { 
       int pagenumber = int.Parse(args[3]); 

       // we create a reader for a certain document 
       PdfReader reader = new PdfReader(args[0]); 
       // we retrieve the total number of pages 
       int n = reader.NumberOfPages; 
       Console.WriteLine("There are " + n + " pages in the original file."); 

       if (pagenumber < 2 || pagenumber > n) 
       { 
        throw new DocumentException("You can't split this document at page " + pagenumber + "; there is no such page."); 
       } 

       // step 1: creation of a document-object 
       Document document1 = new Document(reader.GetPageSizeWithRotation(1)); 
       Document document2 = new Document(reader.GetPageSizeWithRotation(pagenumber)); 
       // step 2: we create a writer that listens to the document 
       PdfWriter writer1 = PdfWriter.GetInstance(document1, new FileStream(args[1], FileMode.Create)); 
       PdfWriter writer2 = PdfWriter.GetInstance(document2, new FileStream(args[2], FileMode.Create)); 
       // step 3: we open the document 
       document1.Open(); 
       PdfContentByte cb1 = writer1.DirectContent; 
       document2.Open(); 
       PdfContentByte cb2 = writer2.DirectContent; 
       PdfImportedPage page; 
       int rotation; 
       int i = 0; 
       // step 4: we add content 
       while (i < pagenumber - 1) 
       { 
        i++; 
        document1.SetPageSize(reader.GetPageSizeWithRotation(i)); 
        document1.NewPage(); 
        page = writer1.GetImportedPage(reader, i); 
        rotation = reader.GetPageRotation(i); 
        if (rotation == 90 || rotation == 270) 
        { 
         cb1.AddTemplate(page, 0, -1f, 1f, 0, 0, reader.GetPageSizeWithRotation(i).Height); 
        } 
        else 
        { 
         cb1.AddTemplate(page, 1f, 0, 0, 1f, 0, 0); 
        } 
       } 
       while (i < n) 
       { 
        i++; 
        document2.SetPageSize(reader.GetPageSizeWithRotation(i)); 
        document2.NewPage(); 
        page = writer2.GetImportedPage(reader, i); 
        rotation = reader.GetPageRotation(i); 
        if (rotation == 90 || rotation == 270) 
        { 
         cb2.AddTemplate(page, 0, -1f, 1f, 0, 0, reader.GetPageSizeWithRotation(i).Height); 
        } 
        else 
        { 
         cb2.AddTemplate(page, 1f, 0, 0, 1f, 0, 0); 
        } 
        Console.WriteLine("Processed page " + i); 
       } 
       // step 5: we close the document 
       document1.Close(); 
       document2.Close(); 
      } 
      catch(Exception e) 
      { 
       Console.Error.WriteLine(e.Message); 
       Console.Error.WriteLine(e.StackTrace); 
      } 
     } 

    } 
+1

不錯的第一篇文章!繼續並歡迎! ;-) – 2012-11-08 02:31:56

0

這個代碼是基於PDFsharp庫

http://www.pdfsharp.com/PDFsharp/

如果你想通過書籤分裂然後這裏是代碼。

public static void SplitPDFByBookMark(string fileName) 
    { 
     string sInFile = fileName; 
     PdfReader pdfReader = new PdfReader(sInFile); 
     try 
     { 
      IList<Dictionary<string, object>> bookmarks = SimpleBookmark.GetBookmark(pdfReader); 

      for (int i = 0; i < bookmarks.Count; ++i) 
      { 
       IDictionary<string, object> BM = (IDictionary<string, object>)bookmarks[0]; 
       IDictionary<string, object> nextBM = i == bookmarks.Count - 1 ? null : bookmarks[i + 1]; 

       string startPage = BM["Page"].ToString().Split(' ')[0].ToString(); 
       string startPageNextBM = nextBM == null ? "" + (pdfReader.NumberOfPages + 1) : nextBM["Page"].ToString().Split(' ')[0].ToString(); 
       SplitByBookmark(pdfReader, int.Parse(startPage), int.Parse(startPageNextBM), bookmarks[i].Values.ToArray().GetValue(0).ToString() + ".pdf", fileName); 

      } 
     } 
     catch (Exception ex) 
     { 
      throw ex; 
     } 
    } 
    private static void SplitByBookmark(PdfReader reader, int pageFrom, int PageTo, string outPutName, string inPutFileName) 
    { 
     Document document = new Document(); 
     FileStream fs = new System.IO.FileStream(System.IO.Path.GetDirectoryName(inPutFileName) + '\\' + outPutName, System.IO.FileMode.Create); 

     try 
     { 

      PdfWriter writer = PdfWriter.GetInstance(document, fs); 
      document.Open(); 
      PdfContentByte cb = writer.DirectContent; 
      //holds pdf data 
      PdfImportedPage page; 
      if (pageFrom == PageTo && pageFrom == 1) 
      { 
       document.NewPage(); 
       page = writer.GetImportedPage(reader, pageFrom); 
       cb.AddTemplate(page, 0, 0); 
       pageFrom++; 
       fs.Flush(); 
       document.Close(); 
       fs.Close(); 

      } 
      else 
      { 
       while (pageFrom < PageTo) 
       { 
        document.NewPage(); 
        page = writer.GetImportedPage(reader, pageFrom); 
        cb.AddTemplate(page, 0, 0); 
        pageFrom++; 
        fs.Flush(); 
        document.Close(); 
        fs.Close(); 
       } 
      } 
     } 
     catch (Exception ex) 
     { 
      throw ex; 
     } 
     finally 
     { 
      if (document.IsOpen()) 
       document.Close(); 
      if (fs != null) 
       fs.Close(); 
     } 

    } 
+0

請提供有關此解決方案的其他信息。 OP將如何改變這段代碼來按頁面分割PDF?這段代碼使用了什麼庫? – andr 2013-02-11 11:21:22

1
public int ExtractPages(string sourcePdfPath, string DestinationFolder) 
     { 
      int p = 0; 
      try 
      { 
       iTextSharp.text.Document document; 
       iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader(new iTextSharp.text.pdf.RandomAccessFileOrArray(sourcePdfPath), new ASCIIEncoding().GetBytes("")); 
       if (!Directory.Exists(sourcePdfPath.ToLower().Replace(".pdf", ""))) 
       { 
        Directory.CreateDirectory(sourcePdfPath.ToLower().Replace(".pdf", "")); 
       } 
       else 
       { 
        Directory.Delete(sourcePdfPath.ToLower().Replace(".pdf", ""), true); 
        Directory.CreateDirectory(sourcePdfPath.ToLower().Replace(".pdf", "")); 
       } 

       for (p = 1; p <= reader.NumberOfPages; p++) 
       { 
        using (MemoryStream memoryStream = new MemoryStream()) 
        { 
         document = new iTextSharp.text.Document(); 
         iTextSharp.text.pdf.PdfWriter writer = iTextSharp.text.pdf.PdfWriter.GetInstance(document, memoryStream); 
         writer.SetPdfVersion(iTextSharp.text.pdf.PdfWriter.PDF_VERSION_1_2); 
         writer.CompressionLevel = iTextSharp.text.pdf.PdfStream.BEST_COMPRESSION; 
         writer.SetFullCompression(); 
         document.SetPageSize(reader.GetPageSize(p)); 
         document.NewPage(); 
         document.Open(); 
         document.AddDocListener(writer); 
         iTextSharp.text.pdf.PdfContentByte cb = writer.DirectContent; 
         iTextSharp.text.pdf.PdfImportedPage pageImport = writer.GetImportedPage(reader, p); 
         int rot = reader.GetPageRotation(p); 
         if (rot == 90 || rot == 270) 
         { 
          cb.AddTemplate(pageImport, 0, -1.0F, 1.0F, 0, 0, reader.GetPageSizeWithRotation(p).Height); 
         } 
         else 
         { 
          cb.AddTemplate(pageImport, 1.0F, 0, 0, 1.0F, 0, 0); 
         } 
         document.Close(); 
         document.Dispose(); 
         File.WriteAllBytes(DestinationFolder + "/" + p + ".pdf", memoryStream.ToArray()); 
        } 
       } 
       reader.Close(); 
       reader.Dispose(); 
      } 
      catch 
      { 
      } 
      finally 
      { 
       GC.Collect(); 
      } 
      return p - 1; 

     } 

調用這個函數在任何你想,並通過源和目標文件夾路徑

+0

安靜,但它有景觀頁面的問題。 – Erfan 2013-11-04 10:17:47

0
public void SplitPDFByBookMark(string fileName) 
    { 
     string sInFile = fileName; 
     var pdfReader = new PdfReader(sInFile); 
     try 
     { 
      IList<Dictionary<string, object>> bookmarks = SimpleBookmark.GetBookmark(pdfReader); 

      for (int i = 0; i < bookmarks.Count; ++i) 
      { 
       IDictionary<string, object> BM = (IDictionary<string, object>)bookmarks[i]; 
       IDictionary<string, object> nextBM = i == bookmarks.Count - 1 ? null : bookmarks[i + 1]; 

       string startPage = BM["Page"].ToString().Split(' ')[0].ToString(); 
       string startPageNextBM = nextBM == null ? "" + (pdfReader.NumberOfPages + 1) : nextBM["Page"].ToString().Split(' ')[0].ToString(); 
       SplitByBookmark(pdfReader, int.Parse(startPage), int.Parse(startPageNextBM), bookmarks[i].Values.ToArray().GetValue(0).ToString() + ".pdf", fileName); 

      } 
     } 
     catch (Exception ex) 
     { 
      throw ex; 
     } 
    } 

    private void SplitByBookmark(PdfReader reader, int pageFrom, int PageTo, string outPutName, string inPutFileName) 
    { 
     Document document = new Document(); 
     using (var fs = new FileStream(Path.GetDirectoryName(inPutFileName) + '\\' + outPutName, System.IO.FileMode.Create)) 
     { 
      try 
      { 
       using (var writer = PdfWriter.GetInstance(document, fs)) 
       { 
        document.Open(); 
        PdfContentByte cb = writer.DirectContent; 
        //holds pdf data 
        PdfImportedPage page; 
        if (pageFrom == PageTo && pageFrom == 1) 
        { 
         document.NewPage(); 
         page = writer.GetImportedPage(reader, pageFrom); 
         cb.AddTemplate(page, 0, 0); 
         pageFrom++; 
         fs.Flush(); 
         document.Close(); 
         fs.Close(); 

        } 
        else 
        { 
         while (pageFrom < PageTo) 
         { 
          document.NewPage(); 
          page = writer.GetImportedPage(reader, pageFrom); 
          cb.AddTemplate(page, 0, 0); 
          pageFrom++; 
          fs.Flush(); 
          document.Close(); 
          fs.Close(); 
         } 
        } 
       } 
       //PdfWriter writer = PdfWriter.GetInstance(document, fs); 

      } 
      catch (Exception ex) 
      { 
       throw ex; 
      } 
     } 
    } 

您可以安裝的NuGet itextsharp複製並粘貼在C#應用程序調用此代碼SplitPDFByBookMark()方法並傳遞pdf文件名。 此代碼將搜索您的書籤並完成!

+0

**感謝米洛。** – 2018-01-16 16:13:00

相關問題