2016-02-05 62 views
0

我遇到問題,無法從PDF文件中提取突出顯示的文本。 str變量總是空的。任何人都可以幫助我?如何使用itextsharp庫僅複製pdf中的高亮文本?

我的代碼:

private static string GetPdfHighlighText(string file, int page) { 
    string nv = ""; 
    PdfReader reader = new PdfReader(file); 
    for (int x = 1; x < reader.NumberOfPages; x++) 
    { 
     PdfDictionary pageDict = reader.GetPageN(x); 
     PdfArray annots = pageDict.GetAsArray(PdfName.ANNOTS); 
     if (annots != null) 
     { 

      for (int i = 1; i <= annots.Size; ++i) 
      { 
       PdfDictionary annotationDic = (PdfDictionary)PdfReader.GetPdfObject(annots[i]); 
       PdfName subType = (PdfName)annotationDic.Get(PdfName.SUBTYPE); 
       if (subType.Equals(PdfName.HIGHLIGHT)) 
       { 

        PdfString str = annots.GetAsString(i); 

        nv = nv + str; 

       } 
      } 
     } 
    } 

    return nv; } 

我使用iTextSharp的庫。 PFLibrary是iTextSharp.text.pdf命名空間。

我想要掃描pdf中的所有頁面並提取所有突出顯示的文本, 這是245頁,但我將每頁放置過濾器。我可以找出亮點的註釋但是未返回與文本字符串突出

回答

0

我設法解決我用下面的代碼問題:

public string GetPdfLinks(string file, int pgIni, int pgFim) 
    { 
     Progresso = 0; 
     //Open our reader 
     PdfReader R = new PdfReader(file); 
     List<string> Ret = new List<string>(); 

     for (int i = pgIni; i <= pgFim; i++) 
     { 

      //Get the current page 
      PdfDictionary PageDictionary = R.GetPageN(i); 

      //Get all of the annotations for the current page 
      PdfArray Annots = PageDictionary.GetAsArray(PdfName.ANNOTS); 

      //Make sure we have something 
      if ((Annots == null) || (Annots.Length == 0)) 
       return null; 

      //kjkjjj 

      //Loop through each annotation 
      foreach (PdfObject A in Annots.ArrayList) 
      { 
       //Convert the itext-specific object as a generic PDF object 
       PdfDictionary AnnotationDictionary = (PdfDictionary)PdfReader.GetPdfObject(A); 

       //Make sure this annotation has a link 
       if (!AnnotationDictionary.Get(PdfName.SUBTYPE).Equals(PdfName.LINK)) 
        continue; 

       //Make sure this annotation has an ACTION 
       if (AnnotationDictionary.Get(PdfName.A) == null) 
        continue; 

       //Get the ACTION for the current annotation 
       PdfDictionary AnnotationAction = (PdfDictionary)AnnotationDictionary.Get(PdfName.A); 

       //Test if it is a URI action (There are tons of other types of actions, some of which might mimic URI, such as JavaScript, but those need to be handled seperately) 
       if (AnnotationAction.Get(PdfName.S).Equals(PdfName.URI)) 
       { 
        PdfString Destination = AnnotationAction.GetAsString(PdfName.URI); 
        if (Destination != null) 
         Ret.Add(Destination.ToString()); 
       } 
      } 

      Progresso++; 
     } 

     foreach (string link in Ret) 
     { 
      resultado = resultado + link + "\n "; 
     } 

     return resultado; 

    }