0
我目前在一個VB.Net控制檯應用程序中使用下面的代碼,該應用程序需要一個文本文件的內容並提取某些信息,然後將其導出到CSV。自動化PDF到文本VB.net
所有似乎工作正常,但問題是該文件最初通過PDF(只有選項可能),我必須手動打開文件在Adobe和'保存爲文本'。
是否有自動將PDF轉換爲文本文件或讀取PDF而不是文本文件的方式。
任何指導或選項,將不勝感激
Dim iLine, iEnd, c, iField As Integer
Dim iSecs, iMax As Long
Dim sText, sTemp, sSchema As String
Dim sHotel, sEndDate, sMon, sPLU, sTots, sValue, sDept, sFile, sOutFile, sDesc As String
Dim tdate As Date
Dim con As New OleDbConnection("Provider=Microsoft.ACE.OLEDB.12.0; Data Source=C:\temp\TX.accdb;")
Dim LUse As Boolean
sHotel = "Unknown Hotel"
sEndDate = "01/01/2015"
sMon = "MAR"
sPLU = ""
sTots = "0"
sValue = "0"
sDept = "Unknown Dept"
sDesc = ""
LUse = True
sTemp = ""
iField = 0
sSchema = "Chester"
'Open input file
sFile = "c:\temp\input.txt"
Dim InFile As New System.IO.StreamReader(sFile)
'Open lookup data table
con.Open()
Dim dbAdapter As OleDbDataAdapter = New OleDbDataAdapter(_
"SELECT * FROM Plookup", con)
Dim dsTX As DataSet = New DataSet()
Dim changes As DataTable
Dim cmdbuilder As OleDbCommandBuilder = New OleDbCommandBuilder(dbAdapter)
dbAdapter.FillSchema(dsTX, SchemaType.Source, "Plookup")
dbAdapter.Fill(dsTX, "Plookup")
Dim rstx As DataTable = dsTX.Tables(0)
iMax = rstx.Rows.Count
Dim productrow() As Data.DataRow
'Open Output file
iSecs = Timer
sOutFile = "c:\temp\TX" & Format$(Now, "yymmdd") & Trim$(Str$(iSecs)) & ".csv"
FileCopy(sFile, "c:\temp\TX" & Format$(Now, "yymmdd") & Trim$(Str$(iSecs)) & ".txt")
Dim OutFile As New System.IO.StreamWriter(sOutFile)
'Write header
OutFile.WriteLine("outlet,dept,epos,tots sold,total price,date of sales")
iLine = 0
Do While InFile.Peek() <> -1
'Read in text
iLine = iLine + 1
sText = InFile.ReadLine
sText = sText.Replace(",", "")
If Len(sText) > 2 And Len(sText) < 9 Then
If Mid$(sText, 3, 1) = "-" Then ' Department Name
sText = sText & Space(9 - Len(sText))
End If
End If
'Process all rows except header row - read data into array
If Len(sText) > 8 Then
Select Case Left(sText, 7)
Case "Consoli" ' Ignore
Case "Quanti " ' Ignore
Case "Group b" ' Ignore - but next row is the Hotel Name
iLine = iLine + 1
sText = InFile.ReadLine
sText = sText.Replace(",", "")
sHotel = Trim$(Left(sText, 20)) 'The username follows so we may truncate the hotel name
Case "Date ra" ' End date
sEndDate = Mid$(sText, 29, 2) & "/" & Mid$(sText, 32, 2) & "/" & Mid$(sText, 35, 4)
tdate = CDate(sEndDate).AddDays(-1)
sEndDate = tdate.ToString("dd/MM/yyyy")
Case Else 'Possible Code
If Mid$(sText, 3, 1) = "-" Then ' Department Name
sDept = Trim(sText)
Else
If IsNumeric(Left(sText, 7)) Then 'Got a code
sPLU = Trim(Str(Val(Left(sText, 7))))
'We don't know where the description ends as it contains spaces
'So best way is to start at the end and work back...
iEnd = Len(sText)
iField = 0
For c = iEnd To 9 Step -1
If Not (Mid(sText, c, 1) = " ") Or iField > 10 Then
sTemp = Mid(sText, c, 1) & sTemp
Else
iField = iField + 1
If iField = 9 Then
sValue = sTemp
ElseIf iField = 11 Then
sTots = sTemp
End If
sTemp = ""
End If
Next
If iField = 10 Then
sTots = Trim(sTemp)
sDesc = ""
Else
sDesc = Trim$(sTemp)
End If
'lookup code
productrow = rstx.Select("FileID = 'Chester' and PLU = '" & sPLU & "'")
If productrow.Length = 0 Then ' product not found
iMax = iMax + 1
rstx.Rows.Add(sSchema, sPLU, sDesc, False)
LUse = True
Else
LUse = Not productrow(0)("Exclude")
End If
If (Val(sTots) + Val(sValue) > 0) And LUse Then ' We have a non-zero sale or value and it is not excluded
OutFile.WriteLine(sHotel & "," & sDept & "," & sPLU & "," & sTots & "," & sValue & "," & sEndDate)
End If
End If
End If
End Select
End If
Loop
'dbAdapter.Update(dsTX.Tables(0))
'Close input/output csv files
'rstx.Rows.Add("303030", "Another Test", False)
dbAdapter.UpdateCommand = cmdbuilder.GetUpdateCommand(True)
dbAdapter.InsertCommand = cmdbuilder.GetInsertCommand(True)
dbAdapter.DeleteCommand = cmdbuilder.GetDeleteCommand()
changes = rstx.GetChanges()
If changes IsNot Nothing Then dbAdapter.Update(changes)
InFile.Close()
OutFile.Close()
con.Close()
我相信有很多庫將PDF轉換爲文本..一個簡單的谷歌搜索給了我很少..例如:http://www.squarepdf.net/how-to-convert-pdf-to-text -in-net-vb你有沒有試過任何庫? –
我還沒有嘗試過任何圖書館,因爲我正試圖查看它是否可以在沒有它們的情況下完成。我會看看你詳細介紹的這個鏈接。謝謝 – elmonko