結果相當複雜,我不能說我的解決方案是否適用於任何文檔。主要問題如問題所示,RegexMatch.FirstIndex不能用於確定實際匹配是否在MS Word文檔中。這是由於正則表達式匹配是在range.Text屬性(字符串)上完成的,並且該字符串只包含與範圍對象不同的字符數量,因此索引不匹配。
所以我的解決方案是爲每場比賽,我做了一個查找整個文件的比賽。 find方法提供了一個Range對象,從中可以確定正確的頁面。
在我的特例中,一場比賽可能是同樣的事情,也是不同的價值。例如:343
在我的情況下將與Prefix-343
相同。第二個問題是,比賽必須在324
之前排序,例如123
,而不管哪一個在文檔中首先出現。
如果需要排序功能,還需要以下爲 「模塊」:
SortDictionary功能:
http://www.cpearson.com/excel/CollectionsAndDictionaries.htm
模塊 「modQSortInPlace」:
http://www.cpearson.com/Zips/modQSortInPlace.zip
如果不需要排序,則不需要它們,但需要刪除該排序根據函數調用SortDictionary Dict, True
從我的代碼。
現在我的代碼。您可以刪除Soem部分,尤其是格式化的部分。這是特定於我的情況。此外,如果你的比賽是「獨特的」,例如。不是前綴,所以你也可以簡化代碼。您需要參考「Microsoft腳本庫」。
Option Explicit
Sub ExtractRNumbers()
Dim Dict As Scripting.Dictionary
Set Dict = CreateObject("Scripting.dictionary")
Dim regExp, Match, Matches
Dim rNumber As String
Dim range As range
Set regExp = CreateObject("VBScript.RegExp")
regExp.Pattern = "\b(R-)?\d{2}-\d{4,5}(-\d)?\b"
regExp.IgnoreCase = False
regExp.Global = True
' determine main section, only extract R-Numbers from main section
' and not the Table of contents as example
' main section = section with most characters
Dim section As section
Dim maxSectionSize As Long
Dim sectionSize As Long
Dim sectionIndex As Integer
Dim currentIndex As Integer
maxSectionSize = 0
currentIndex = 1
For Each section In ActiveDocument.Sections
sectionSize = Len(section.range.text)
If sectionSize > maxSectionSize Then
maxSectionSize = sectionSize
sectionIndex = currentIndex
End If
currentIndex = currentIndex + 1
Next
Set Matches = regExp.Execute(ActiveDocument.Sections(sectionIndex).range.text)
For Each Match In Matches
' If the Document contains Tables, ActiveDocument.range.Text will contain
' BEL charachters (chr(7)) that probably define the table structure. The issue
' is that then Match.FirstIndex does not point to the actual first charachter
' of a Match in the Document.
' Also there are other things (unknwon) that lead to the same issue, eg.
' Match.FirstIndex can not be used to find the actual "matching word" within the
' document. Because of that below commented apporach does not work on a generic document
' Set range = ActiveDocument.range(Match.FirstIndex, Match.FirstIndex + Len(Match.Value))
' page = range.Information(wdActiveEndAdjustedPageNumber)
' Maybe there is a simpler solution but this works more or less
' the exception beign tables again. see http://support.microsoft.com/kb/274003
' After a match is found the whole document is searched using the find method.
' For each find result the page number is put into an array (if it is not in the array yet)
' Then the match is formatted properly.
' After formatting, it is checked if the match was previously already found
'
' If not, we add a new entry to the dictionary (key = formatted match, value = array of page numbers)
'
' If match was already found before (but potentially in a different format! eg R-87-1000 vs 87-1000 as example),
' all additional pages are added to the already found pages.
Set range = ActiveDocument.Sections(sectionIndex).range
With range.Find
.text = Match.Value
.MatchWholeWord = True
.MatchCase = True
.Wrap = wdFindStop
End With
Dim page As Variant
Dim pages() As Integer
Dim index As Integer
index = 0
ReDim pages(0)
Do While range.Find.Execute() = True
page = range.Information(wdActiveEndAdjustedPageNumber)
If Not IsInArray(page, pages) Then
ReDim Preserve pages(index)
pages(index) = page
index = index + 1
End If
Loop
' FORMAT TO PROPER R-NUMBER: This is specific to my case
rNumber = Match.Value
If Not rNumber Like "R-*" Then
rNumber = "R-" & rNumber
End If
' remove possible batch number as r-number
If Len(rNumber) > 11 Then
rNumber = Left(rNumber, Len(rNumber) - 2)
End If
' END FORMAT
If Not Dict.Exists(rNumber) Then
Dict.Add rNumber, pages
Else
Dim existingPages() As Integer
existingPages = Dict(rNumber)
For Each page In pages
If Not IsInArray(page, existingPages) Then
' add additonal pages. this means that the previous match
' was formatted different, eg R-87-1000 vs 87-1000 as example
ReDim Preserve existingPages(UBound(existingPages) + 1)
existingPages(UBound(existingPages)) = page
Dict(rNumber) = existingPages
End If
Next
End If
Next
'sort dictionary by key (R-Number)
SortDictionary Dict, True
Dim fso
Set fso = CreateObject("Scripting.FileSystemObject")
Dim stream
' Create a TextStream.
Set stream = fso.CreateTextFile(ActiveDocument.Path & "\" & ActiveDocument.Name & "-rNumbers.txt", True)
Dim key As Variant
Dim output As String
Dim i As Integer
For Each key In Dict.Keys()
output = key & vbTab
pages = Dict(key)
For i = LBound(pages) To UBound(pages)
output = output & pages(i) & ", "
Next
output = Left(output, Len(output) - 2)
stream.WriteLine output
Next
Set Dict = Nothing
stream.Close
End Sub
Private Function IsInArray(page As Variant, pages As Variant) As Boolean
Dim i As Integer
IsInArray = False
For i = LBound(pages) To UBound(pages)
If pages(i) = page Then
IsInArray = True
Exit For
End If
Next
End Function
爲什麼你需要一個正則表達式來匹配頁碼?你能多解釋一下嗎?你可以找到頁碼... http://stackoverflow.com/questions/13327813/vba-return-page-number-from-selection-find-using-text-from-array – bonCodigo
@bonCodigo OP需要匹配*東西*,然後檢索相應的頁碼。 – Tomalak
@Tomalak然後它是有道理的...否則我想知道..然後OP必須告訴我們什麼想匹配....:$ – bonCodigo