Public Function GetAlphaNumericWords(ByVal InputText As String) As Collection
' This function splits the rich text input into unique alpha-numeric only strings
Dim words() As String
Dim characters() As Byte
Dim text As Variant
Dim i As Long
Set GetAlphaNumericWords = New Collection
text = Trim(PlainText(InputText))
If Len(text) > 0 Then
' Replace any non alphanumeric characters with a space
characters = StrConv(text, vbFromUnicode)
For i = LBound(characters) To UBound(characters)
If Not (Chr(characters(i)) Like "[A-Za-z0-9 ]") Then
characters(i) = 32 ' Space character
End If
Next
' Merge the byte array back to a string and then split on spaces
words = VBA.Split(StrConv(characters, vbUnicode))
' Add each unique word to the output collection
On Error Resume Next
For Each text In words
If (text <> vbNullString) Then GetAlphaNumericWords.Add CStr(text), CStr(text)
If Err Then Err.Clear
Next
End If
End Function
這個函數返回當前輸出的例子當你可以使用正則表達式時,這似乎很多工作。請參閱here和here以獲取良好的起點。
如果你添加一個引用到「微軟的VBScript正則表達式5.5」,並增加了以下功能(我已經包含超過必要了幾個功能,如果它是有用的其他地方):
Public Function RegEx(strInput As String, strRegEx As String, Optional bIgnoreCase As Boolean = True, Optional bMultiLine As Boolean = False) As Boolean
Dim RegExp As VBScript_RegExp_55.RegExp
Set RegExp = New VBScript_RegExp_55.RegExp
With RegExp
.MultiLine = bMultiLine
.IgnoreCase = bIgnoreCase
.Pattern = strRegEx
End With
RegEx = RegExp.test(strInput)
Set RegExp = Nothing
End Function
Public Function RegExMatch(strInput As String, strRegEx As String, Optional MatchNo As Long = 0, Optional FirstIDX As Long, Optional Lgth As Long, Optional bIgnoreCase As Boolean = True, Optional bMultiLine As Boolean = False) As String
Dim RegExp As VBScript_RegExp_55.RegExp, Matches As VBScript_RegExp_55.MatchCollection
Set RegExp = New VBScript_RegExp_55.RegExp
With RegExp
.Global = True
.MultiLine = bMultiLine
.IgnoreCase = bIgnoreCase
.Pattern = strRegEx
End With
If RegExp.test(strInput) Then
Set Matches = RegExp.Execute(strInput)
If MatchNo > Matches.Count - 1 Then
RegExMatch = ""
Else
RegExMatch = Matches(MatchNo).value
FirstIDX = Matches(MatchNo).FirstIndex
Lgth = Matches(MatchNo).Length
End If
Else
RegExMatch = ""
End If
Set RegExp = Nothing
End Function
Public Function RegexMatches(strInput As String, strRegEx As String, Optional bIgnoreCase As Boolean = True, Optional bMultiLine As Boolean = False) As VBScript_RegExp_55.MatchCollection
Dim RegExp As VBScript_RegExp_55.RegExp
Set RegExp = New VBScript_RegExp_55.RegExp
With RegExp
.Global = True
.MultiLine = bMultiLine
.IgnoreCase = bIgnoreCase
.Pattern = strRegEx
End With
Set RegexMatches = RegExp.Execute(strInput)
Set RegExp = Nothing
End Function
Public Function RegExReplace(strInput As String, strRegEx As String, strReplace As String, Optional bGlobal As Boolean = True, Optional bIgnoreCase As Boolean = True, Optional bMultiLine As Boolean = False) As String
Dim RegExp As VBScript_RegExp_55.RegExp
Set RegExp = New VBScript_RegExp_55.RegExp
With RegExp
.MultiLine = bMultiLine
.IgnoreCase = bIgnoreCase
.Pattern = strRegEx
.Global = bGlobal
End With
RegExReplace = RegExp.Replace(strInput, strReplace)
Set RegExp = Nothing
End Function
你應該能夠使用它們來製作更有用和更優雅的解決方案。
你應該考慮類似於正則表達式如下:
\b(\w+)\b
和類似下面的代碼 - 使用RegexMatches
每場比賽&子匹配,嘗試CDec
並在其上一個CDate
,如果拒絕你不得到一個錯誤(沒有錯誤將表明一個合法的日期或編號):
Dim Matches As VBScript_RegExp_55.MatchCollection
...
Set Matches = RegexMatches(InputText , "\b(\w+)\b")
If Matches.Count > 0 Then
For CtrA = 0 To Matches.Count - 1
For CtrB = 0 To Matches(CtrA).SubMatches.Count - 1
On Error Resume Next
TestVariant = Null
TestVariant = CDec(Matches(CtrA).Submatches(CtrB))
TestVariant = CDate(Matches(CtrA).Submatches(CtrB))
On Error Goto 0
If IsNull(TestVariant) Then
' Do further processing to check if the submatch can be split on non-alphanumeric characters...
Else
GetAlphaNumericWords.Add Matches(CtrA).Submatches(CtrB), Matches(CtrA).Submatches(CtrB)
End If
Next
Next
End If
我已經考慮創建一個緩衝區塔t增加一個字符。如果緩衝區的內容是數字,則繼續添加下一個字符。如果在添加下一個字符後緩衝區不再是數字,則將緩衝區作爲單詞添加到集合中。這將捕獲數字,但日期仍不會被添加。也許第二個日期特定緩衝區忽略數字文本之間的兩個特殊字符?然後,如果該值可以轉換爲日期,則整個字符串將作爲單個單詞添加。看起來似乎合理,但我相信有更好的辦法。 – Constablebrew