我有下面的代碼,需要去掉所有非字母數字字符。它不是在德爾福2009年工作德爾福2009年 - 去掉字符串中的非字母數字
unit Unit2;
//Used information from
// http://stackoverflow.com/questions/574603/what-is-the-fastest-way-of-stripping-non-alphanumeric-characters-from-a-string-in
interface
uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls;
Type
TExplodeArray = Array Of String;
TForm2 = class(TForm)
Memo1: TMemo;
ListBox1: TListBox;
Button1: TButton;
procedure Button1Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
Function Explode (Const cSeparator, vString : String) : TExplodeArray;
Function Implode (Const cSeparator : String; Const cArray : TExplodeArray) : String;
Function StripHTML (S : String) : String;
function allwords(data:string):integer;
end;
var
Form2: TForm2;
allword, allphrase: TExplodeArray;
implementation
{$R *.dfm}
Function TForm2.StripHTML (S : String) : String;
Var
TagBegin, TagEnd, TagLength : Integer;
Begin
TagBegin := Pos ('<', S); // search position of first <
While (TagBegin > 0) Do
Begin // while there is a < in S
TagEnd := Pos ('>', S); // find the matching >
TagLength := TagEnd - TagBegin + 1;
Delete (S, TagBegin, TagLength); // delete the tag
TagBegin := Pos ('<', S); // search for next <
End;
Result := S; // give the result
End;
Function TForm2.Implode (Const cSeparator : String; Const cArray : TExplodeArray) : String;
Var
i : Integer;
Begin
Result := '';
For i := 0 To Length (cArray) - 1 Do
Begin
Result := Result + cSeparator + cArray [i];
End;
System.Delete (Result, 1, Length (cSeparator));
End;
Function TForm2.Explode (Const cSeparator, vString : String) : TExplodeArray;
Var
i : Integer;
S : String;
Begin
S := vString;
SetLength (Result, 0);
i := 0;
While Pos (cSeparator, S) > 0 Do
Begin
SetLength (Result, Length (Result) + 1);
Result[i] := Copy (S, 1, Pos (cSeparator, S) - 1);
Inc (i);
S := Copy (S, Pos (cSeparator, S) + Length (cSeparator), Length (S));
End;
SetLength (Result, Length (Result) + 1);
Result[i] := Copy (S, 1, Length (S));
End;
//Copied from JclStrings
function StrKeepChars(const S: AnsiString; const Chars: TSysCharSet): AnsiString;
var
Source, Dest: PChar;
begin
SetLength(Result, Length(S));
UniqueString(Result);
Source := PChar(S);
Dest := PChar(Result);
while (Source <> nil) and (Source^ <> #0) do
begin
if Source^ in Chars then
begin
Dest^ := Source^;
Inc(Dest);
end;
Inc(Source);
end;
SetLength(Result, (Longint(Dest) - Longint(PChar(Result))) div SizeOf(AnsiChar));
end;
function ReplaceNewlines(const AValue: string): string;
var
SrcPtr, DestPtr: PChar;
begin
SrcPtr := PChar(AValue);
SetLength(Result, Length(AValue));
DestPtr := PChar(Result);
while SrcPtr <> {greater than less than} #0 do begin
if (SrcPtr[0] = #13) and (SrcPtr[1] = #10) then begin
DestPtr[0] := '\';
DestPtr[1] := 't';
Inc(SrcPtr);
Inc(DestPtr);
end else
DestPtr[0] := SrcPtr[0];
Inc(SrcPtr);
Inc(DestPtr);
end;
SetLength(Result, DestPtr - PChar(Result));
end;
function StripNonAlphaNumeric(const AValue: string): string;
var
SrcPtr, DestPtr: PChar;
begin
SrcPtr := PChar(AValue);
SetLength(Result, Length(AValue));
DestPtr := PChar(Result);
while SrcPtr <> #0 do begin
if SrcPtr[0] in ['a'..'z', 'A'..'Z', '0'..'9'] then begin
DestPtr[0] := SrcPtr[0];
Inc(DestPtr);
end;
Inc(SrcPtr);
end;
SetLength(Result, DestPtr - PChar(Result));
end;
function TForm2.allwords(data:string):integer;
var i:integer;
begin
listbox1.Items.add(data);
data:= StripHTML (data);
listbox1.Items.add(data);
//////////////////////////////////////////////////////////////
data := StrKeepChars(data, ['A'..'Z', 'a'..'z', '0'..'9']);
// Strips out everything data comes back blank in Delphi 2009
//////////////////////////////////////////////////////////////
listbox1.Items.add(data);
data := stringreplace(data,' ',' ', [rfReplaceAll, rfIgnoreCase]);
//Replace two spaces with one.
listbox1.Items.add(data);
allword:= explode(' ',data);
{ // Converting the following PHP code to Delphi
$text = ereg_replace("[^[:alnum:]]", " ", $text);
while(strpos($text,' ')!==false) $text = ereg_replace(" ", " ", $text);
$text=$string=strtolower($text);
$text=explode(" ",$text);
return count($text);
}
for I := 0 to Length(allword) - 1 do
listbox1.Items.Add(allword[i]);
end;
procedure TForm2.Button1Click(Sender: TObject);
begin
//[^[:alnum:]]
allwords(memo1.Text);
end;
end.
我還會怎麼做呢?
是的,我打算使用RegEx [^ [:alnum:]]和TPerlRegEx,但並不真正知道如何正確使用它。 [^ [:alnum:]]可以很好地工作。 – Brad
好吧,我用RegEx的建議,並在數小時後算出來! – Brad