Delphi 2009-从字符串中删除非字母数字
我有以下代码,需要去掉所有非字母数字字符。它在德尔福2009中不起作用Delphi 2009-从字符串中删除非字母数字,delphi,delphi-2009,delphi-2010,Delphi,Delphi 2009,Delphi 2010,我有以下代码,需要去掉所有非字母数字字符。它在德尔福2009中不起作用 unit Unit2; //Used information from // http://stackoverflow.com/questions/574603/what-is-the-fastest-way-of-stripping-non-alphanumeric-characters-from-a-string-in interface uses Windows, Messages, SysUtils, Var
unit Unit2;
//Used information from
// http://stackoverflow.com/questions/574603/what-is-the-fastest-way-of-stripping-non-alphanumeric-characters-from-a-string-in
interface
uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls;
Type
TExplodeArray = Array Of String;
TForm2 = class(TForm)
Memo1: TMemo;
ListBox1: TListBox;
Button1: TButton;
procedure Button1Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
Function Explode ( Const cSeparator, vString : String ) : TExplodeArray;
Function Implode ( Const cSeparator : String; Const cArray : TExplodeArray ) : String;
Function StripHTML ( S : String ) : String;
function allwords(data:string):integer;
end;
var
Form2: TForm2;
allword, allphrase: TExplodeArray;
implementation
{$R *.dfm}
Function TForm2.StripHTML ( S : String ) : String;
Var
TagBegin, TagEnd, TagLength : Integer;
Begin
TagBegin := Pos ( '<', S ); // search position of first <
While ( TagBegin > 0 ) Do
Begin // while there is a < in S
TagEnd := Pos ( '>', S ); // find the matching >
TagLength := TagEnd - TagBegin + 1;
Delete ( S, TagBegin, TagLength ); // delete the tag
TagBegin := Pos ( '<', S ); // search for next <
End;
Result := S; // give the result
End;
Function TForm2.Implode ( Const cSeparator : String; Const cArray : TExplodeArray ) : String;
Var
i : Integer;
Begin
Result := '';
For i := 0 To Length ( cArray ) - 1 Do
Begin
Result := Result + cSeparator + cArray [i];
End;
System.Delete ( Result, 1, Length ( cSeparator ) );
End;
Function TForm2.Explode ( Const cSeparator, vString : String ) : TExplodeArray;
Var
i : Integer;
S : String;
Begin
S := vString;
SetLength ( Result, 0 );
i := 0;
While Pos ( cSeparator, S ) > 0 Do
Begin
SetLength ( Result, Length ( Result ) + 1 );
Result[i] := Copy ( S, 1, Pos ( cSeparator, S ) - 1 );
Inc ( i );
S := Copy ( S, Pos ( cSeparator, S ) + Length ( cSeparator ), Length ( S ) );
End;
SetLength ( Result, Length ( Result ) + 1 );
Result[i] := Copy ( S, 1, Length ( S ) );
End;
//Copied from JclStrings
function StrKeepChars(const S: AnsiString; const Chars: TSysCharSet): AnsiString;
var
Source, Dest: PChar;
begin
SetLength(Result, Length(S));
UniqueString(Result);
Source := PChar(S);
Dest := PChar(Result);
while (Source <> nil) and (Source^ <> #0) do
begin
if Source^ in Chars then
begin
Dest^ := Source^;
Inc(Dest);
end;
Inc(Source);
end;
SetLength(Result, (Longint(Dest) - Longint(PChar(Result))) div SizeOf(AnsiChar));
end;
function ReplaceNewlines(const AValue: string): string;
var
SrcPtr, DestPtr: PChar;
begin
SrcPtr := PChar(AValue);
SetLength(Result, Length(AValue));
DestPtr := PChar(Result);
while SrcPtr <> {greater than less than} #0 do begin
if (SrcPtr[0] = #13) and (SrcPtr[1] = #10) then begin
DestPtr[0] := '\';
DestPtr[1] := 't';
Inc(SrcPtr);
Inc(DestPtr);
end else
DestPtr[0] := SrcPtr[0];
Inc(SrcPtr);
Inc(DestPtr);
end;
SetLength(Result, DestPtr - PChar(Result));
end;
function StripNonAlphaNumeric(const AValue: string): string;
var
SrcPtr, DestPtr: PChar;
begin
SrcPtr := PChar(AValue);
SetLength(Result, Length(AValue));
DestPtr := PChar(Result);
while SrcPtr <> #0 do begin
if SrcPtr[0] in ['a'..'z', 'A'..'Z', '0'..'9'] then begin
DestPtr[0] := SrcPtr[0];
Inc(DestPtr);
end;
Inc(SrcPtr);
end;
SetLength(Result, DestPtr - PChar(Result));
end;
function TForm2.allwords(data:string):integer;
var i:integer;
begin
listbox1.Items.add(data);
data:= StripHTML ( data );
listbox1.Items.add(data);
//////////////////////////////////////////////////////////////
data := StrKeepChars(data, ['A'..'Z', 'a'..'z', '0'..'9']);
// Strips out everything data comes back blank in Delphi 2009
//////////////////////////////////////////////////////////////
listbox1.Items.add(data);
data := stringreplace(data,' ',' ', [rfReplaceAll, rfIgnoreCase] );
//Replace two spaces with one.
listbox1.Items.add(data);
allword:= explode(' ',data);
{ // Converting the following PHP code to Delphi
$text = ereg_replace("[^[:alnum:]]", " ", $text);
while(strpos($text,' ')!==false) $text = ereg_replace(" ", " ", $text);
$text=$string=strtolower($text);
$text=explode(" ",$text);
return count($text);
}
for I := 0 to Length(allword) - 1 do
listbox1.Items.Add(allword[i]);
end;
procedure TForm2.Button1Click(Sender: TObject);
begin
//[^[:alnum:]]
allwords(memo1.Text);
end;
end.
单元2;
//使用的信息来自
// http://stackoverflow.com/questions/574603/what-is-the-fastest-way-of-stripping-non-alphanumeric-characters-from-a-string-in
接口
使用
窗口、消息、系统工具、变体、类、图形、控件、窗体、,
对话框,stdctrl;
类型
Texplodearay=字符串数组;
TForm2=类别(TForm)
备忘录1:TMemo;
ListBox1:TListBox;
按钮1:t按钮;
程序按钮1点击(发送方:ToObject);
私有的
{私有声明}
公众的
{公开声明}
函数分解(constcsepator,vString:String):TExplodeArray;
函数内爆(constcsepator:String;constcarray:TExplodeArray):String;
函数StripHTML(S:String):字符串;
函数allwords(数据:字符串):整数;
结束;
变量
表2:TForm2;
所有单词,所有短语:Texplodearay;
实施
{$R*.dfm}
函数TForm2.StripHTML(S:String):String;
变量
TagBegin、TagEnd、TagLength:整数;
开始
TagBegin:=位置('',S);//查找匹配项>
TagLength:=TagEnd-TagBegin+1;
删除(S,TagBegin,TagLength);//删除标记
TagBegin:=Pos(“想到的最简单的解决方案是定义一个正则表达式,它返回输入字符串减去其中的任何非字母字符。我已经有一段时间没有在Delphi上做太多工作了-版本5是我的游乐场
默认情况下,Delphi2009的主要功能之一不是现在的Unicode
这对任何试图逐个字符处理的操作都有影响。这可能是您问题的根源吗?是的,我本来打算将RegEx[^[:alnum:]与tperregex一起使用,但不知道如何正确使用它。[^[:alnum:]将非常有效。好吧,我使用了您对RegEx的建议,并在几个小时后解决了它!
Uses StrUtils; //StuffString
var
Regex: TPerlRegEx;
I:Integer;
begin
Regex := TPerlRegEx.Create(nil);
Regex.RegEx := '[^[:alnum:]]';
Regex.Options := [preMultiLine];
Regex.Subject := data;
if Regex.Match then begin
repeat
data := StuffString(data,Regex.MatchedExpressionOffset,Regex.MatchedExpressionLength,' ');
until not Regex.MatchAgain;
end;