我有以下代碼,它查看我電子表格的A列中的每個單元格,搜索它在指定PDF中找到的文本,然後提取頁面在那裏它將文本視爲PDF,並用電子表格單元格中的值命名。代碼的工作原理很慢,但我可能需要在PDF中搜索多達200個字,最長可達600頁。有沒有辦法讓代碼更快?目前它循環遍歷每個單元搜索每個頁面循環遍歷每個單詞,直到它找到單元格中的單詞。Excel VBA搜索PDF和提取文本並命名頁面
Sub test_with_PDF()
Dim objApp As Object
Dim objPDDoc As Object
Dim objjso As Object
Dim wordsCount As Long
Dim page As Long
Dim i As Long
Dim strData As String
Dim strFileName As String
Dim lastrow As Long, c As Range
Dim PageNos As Integer
Dim newPDF As Acrobat.CAcroPDDoc
Dim NewName As String
Dim Folder As String
lastrow = Sheets("Sheet1").Cells(Rows.Count, "A").End(xlUp).Row
strFileName = selectFile()
Folder = GetFolder()
Set objApp = CreateObject("AcroExch.App")
Set objPDDoc = CreateObject("AcroExch.PDDoc")
'AD.1 open file, if =false file is damage
If objPDDoc.Open(strFileName) Then
Set objjso = objPDDoc.GetJSObject
PageNos = 0
For Each c In Sheets("Sheet1").Range("A2:A" & lastrow)
For page = 0 To objPDDoc.GetNumPages - 1
wordsCount = objjso.GetPageNumWords(page)
For i = 0 To wordsCount
If InStr(1, c.Value, ", ") = 0 Then
If objjso.getPageNthWord(page, i) = c.Value Then
PageNos = PageNos + 1
If FileExist(Folder & "\" & c.Offset(0, 4) & ".pdf") Then
Set newPDF = CreateObject("AcroExch.pdDoc")
NewName = Folder & "\" & c.Offset(0, 4) & ".pdf"
newPDF.Open (NewName)
newPDF.InsertPages lngPages, objPDDoc, page, 1, 0
newPDF.Save 1, NewName
newPDF.Close
Set newPDF = Nothing
Exit For
Else
Set newPDF = CreateObject("AcroExch.PDDoc")
newPDF.Create
NewName = Folder & "\" & c.Offset(0, 4) & ".pdf"
newPDF.InsertPages -1, objPDDoc, page, 1, 0
newPDF.Save 1, NewName
newPDF.Close
Set newPDF = Nothing
Exit For
End If
End If
Else
If objjso.getPageNthWord(page, i) = c.Offset(0, 1).Value Then
If objjso.getPageNthWord(page, i + 1) = c.Offset(0, 2).Value Then
PageNos = PageNos + 1
If FileExist(Folder & "\" & c.Offset(0, 4) & ".pdf") Then
Set newPDF = CreateObject("AcroExch.pdDoc")
NewName = Folder & "\" & c.Offset(0, 4) & ".pdf"
newPDF.Open (NewName)
newPDF.InsertPages lngPages, objPDDoc, page, 1, 0
newPDF.Save 1, NewName
newPDF.Close
Set newPDF = Nothing
Exit For
Else
Set newPDF = CreateObject("AcroExch.PDDoc")
newPDF.Create
NewName = Folder & "\" & c.Offset(0, 4) & ".pdf"
newPDF.InsertPages -1, objPDDoc, page, 1, 0
newPDF.Save 1, NewName
newPDF.Close
Set newPDF = Nothing
Exit For
End If
Exit For
End If
End If
End If
Next i
Next page
c.Offset(0, 3).Value = PageNos
PageNos = 0
Next c
MsgBox "Done"
Else
MsgBox "error!"
End If
End Sub
Function FileExist(path As String) As Boolean
If Dir(path) <> vbNullString Then FileExist = True
End Function
Function selectFile()
Dim fd As FileDialog, fileName As String
On Error GoTo ErrorHandler
Set fd = Application.FileDialog(msoFileDialogFilePicker)
fd.AllowMultiSelect = False
If fd.Show = True Then
If fd.SelectedItems(1) <> vbNullString Then
fileName = fd.SelectedItems(1)
End If
Else
'Exit code if no file is selected
End
End If
'Return Selected FileName
selectFile = fileName
Set fd = Nothing
Exit Function
ErrorHandler:
Set fd = Nothing
MsgBox "Error " & Err & ": " & Error(Err)
End Function
Function GetFolder() As String
Dim fldr As FileDialog
Dim sItem As String
Set fldr = Application.FileDialog(msoFileDialogFolderPicker)
With fldr
.Title = "Select the Folder where you want you new PDFs to go"
.AllowMultiSelect = False
.InitialFileName = Application.DefaultFilePath
If .Show <> -1 Then GoTo NextCode
sItem = .SelectedItems(1)
End With
NextCode:
GetFolder = sItem
Set fldr = Nothing
End Function
非常感謝提前。
顯然,您正在使用外部庫來搜索PDF文件(這是實現您想要的唯一可能的方式)。所以,代碼的速度取決於這些庫。從不同的軟件供應商處獲得更好的新版本,就完成了。簡而言之:你不需要更好/更快的VBA代碼,而是更好的外部庫。所以,本質上你的問題是「推薦更好的庫從VBA中調用」。然而,這個網站的這些問題是[off-topic](http://stackoverflow.com/help/on-topic)。 – Ralph