Delphi 2009-从字符串中删除非字母数字

Delphi 2009-从字符串中删除非字母数字,delphi,delphi-2009,delphi-2010,Delphi,Delphi 2009,Delphi 2010,我有以下代码,需要去掉所有非字母数字字符。它在德尔福2009中不起作用 unit Unit2; //Used information from // http://stackoverflow.com/questions/574603/what-is-the-fastest-way-of-stripping-non-alphanumeric-characters-from-a-string-in interface uses Windows, Messages, SysUtils, Var

我有以下代码,需要去掉所有非字母数字字符。它在德尔福2009中不起作用

unit Unit2;
//Used information from
// http://stackoverflow.com/questions/574603/what-is-the-fastest-way-of-stripping-non-alphanumeric-characters-from-a-string-in

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, StdCtrls;
Type
     TExplodeArray = Array Of String;

  TForm2 = class(TForm)
    Memo1: TMemo;
    ListBox1: TListBox;
    Button1: TButton;
    procedure Button1Click(Sender: TObject);
  private
    { Private declarations }
  public
    { Public declarations }
    Function Explode ( Const cSeparator, vString : String ) : TExplodeArray;
    Function Implode ( Const cSeparator : String; Const cArray : TExplodeArray ) : String;
    Function StripHTML ( S : String ) : String;
    function allwords(data:string):integer;
  end;

var
  Form2: TForm2;
  allword, allphrase: TExplodeArray;

implementation
{$R *.dfm}
Function TForm2.StripHTML ( S : String ) : String;
Var
     TagBegin, TagEnd, TagLength : Integer;
Begin
     TagBegin := Pos ( '<', S );      // search position of first <

     While ( TagBegin > 0 ) Do
          Begin  // while there is a < in S
          TagEnd := Pos ( '>', S );              // find the matching >
          TagLength := TagEnd - TagBegin + 1;
          Delete ( S, TagBegin, TagLength );     // delete the tag
          TagBegin := Pos ( '<', S );            // search for next <
          End;

     Result := S;                   // give the result
End;
Function TForm2.Implode ( Const cSeparator : String; Const cArray : TExplodeArray ) : String;
Var
     i : Integer;
Begin
     Result := '';
     For i := 0 To Length ( cArray ) - 1 Do
          Begin
          Result := Result + cSeparator + cArray [i];
          End;
     System.Delete ( Result, 1, Length ( cSeparator ) );
End;

Function TForm2.Explode ( Const cSeparator, vString : String ) : TExplodeArray;
Var
     i : Integer;
     S : String;
Begin
     S := vString;
     SetLength ( Result, 0 );
     i := 0;
     While Pos ( cSeparator, S ) > 0 Do
          Begin
          SetLength ( Result, Length ( Result ) + 1 );
          Result[i] := Copy ( S, 1, Pos ( cSeparator, S ) - 1 );
          Inc ( i );
          S := Copy ( S, Pos ( cSeparator, S ) + Length ( cSeparator ), Length ( S ) );
          End;
     SetLength ( Result, Length ( Result ) + 1 );
     Result[i] := Copy ( S, 1, Length ( S ) );
End;
//Copied from JclStrings
function StrKeepChars(const S: AnsiString; const Chars: TSysCharSet): AnsiString;
var
  Source, Dest: PChar;
begin
  SetLength(Result, Length(S));
  UniqueString(Result);
  Source := PChar(S);
  Dest := PChar(Result);
  while (Source <> nil) and (Source^ <> #0) do
  begin
    if Source^ in Chars then
    begin
      Dest^ := Source^;
      Inc(Dest);
    end;
    Inc(Source);
  end;
  SetLength(Result, (Longint(Dest) - Longint(PChar(Result))) div SizeOf(AnsiChar));
end;
function ReplaceNewlines(const AValue: string): string;
var
  SrcPtr, DestPtr: PChar;
begin
  SrcPtr := PChar(AValue);
  SetLength(Result, Length(AValue));
  DestPtr := PChar(Result);
  while SrcPtr <> {greater than less than} #0 do begin
    if (SrcPtr[0] = #13) and (SrcPtr[1] = #10) then begin
      DestPtr[0] := '\';
      DestPtr[1] := 't';
      Inc(SrcPtr);
      Inc(DestPtr);
    end else
      DestPtr[0] := SrcPtr[0];
    Inc(SrcPtr);
    Inc(DestPtr);
  end;
  SetLength(Result, DestPtr - PChar(Result));
end;
function StripNonAlphaNumeric(const AValue: string): string;
var
  SrcPtr, DestPtr: PChar;
begin
  SrcPtr := PChar(AValue);
  SetLength(Result, Length(AValue));
  DestPtr := PChar(Result);
  while SrcPtr <> #0 do begin
    if SrcPtr[0] in ['a'..'z', 'A'..'Z', '0'..'9'] then begin
      DestPtr[0] := SrcPtr[0];
      Inc(DestPtr);
    end;
    Inc(SrcPtr);
  end;
  SetLength(Result, DestPtr - PChar(Result));
end;
function TForm2.allwords(data:string):integer;
var i:integer;
begin
  listbox1.Items.add(data);
  data:= StripHTML ( data );
  listbox1.Items.add(data);
  //////////////////////////////////////////////////////////////
  data := StrKeepChars(data, ['A'..'Z', 'a'..'z', '0'..'9']);
  // Strips out everything data comes back blank in Delphi 2009
  //////////////////////////////////////////////////////////////
  listbox1.Items.add(data);
  data := stringreplace(data,'  ',' ', [rfReplaceAll, rfIgnoreCase] );
  //Replace two spaces with one.
  listbox1.Items.add(data);
  allword:= explode(' ',data);
 { // Converting the following PHP code to Delphi
    $text = ereg_replace("[^[:alnum:]]", " ", $text);
    while(strpos($text,'  ')!==false) $text = ereg_replace("  ", " ", $text);
    $text=$string=strtolower($text);
    $text=explode(" ",$text);
    return count($text);
}
 for I := 0 to Length(allword) - 1 do
 listbox1.Items.Add(allword[i]);
end;
procedure TForm2.Button1Click(Sender: TObject);
begin
//[^[:alnum:]]

allwords(memo1.Text);
end;

end.
单元2;
//使用的信息来自
// http://stackoverflow.com/questions/574603/what-is-the-fastest-way-of-stripping-non-alphanumeric-characters-from-a-string-in
接口
使用
窗口、消息、系统工具、变体、类、图形、控件、窗体、,
对话框,stdctrl;
类型
Texplodearay=字符串数组;
TForm2=类别(TForm)
备忘录1:TMemo;
ListBox1:TListBox;
按钮1:t按钮;
程序按钮1点击(发送方:ToObject);
私有的
{私有声明}
公众的
{公开声明}
函数分解(constcsepator,vString:String):TExplodeArray;
函数内爆(constcsepator:String;constcarray:TExplodeArray):String;
函数StripHTML(S:String):字符串;
函数allwords(数据:字符串):整数;
结束;
变量
表2:TForm2;
所有单词,所有短语:Texplodearay;
实施
{$R*.dfm}
函数TForm2.StripHTML(S:String):String;
变量
TagBegin、TagEnd、TagLength:整数;
开始
TagBegin:=位置('',S);//查找匹配项>
TagLength:=TagEnd-TagBegin+1;
删除(S,TagBegin,TagLength);//删除标记

TagBegin:=Pos(“想到的最简单的解决方案是定义一个正则表达式,它返回输入字符串减去其中的任何非字母字符。

我已经有一段时间没有在Delphi上做太多工作了-版本5是我的游乐场

默认情况下,Delphi2009的主要功能之一不是现在的Unicode


这对任何试图逐个字符处理的操作都有影响。这可能是您问题的根源吗?

是的,我本来打算将RegEx[^[:alnum:]与tperregex一起使用,但不知道如何正确使用它。[^[:alnum:]将非常有效。好吧,我使用了您对RegEx的建议,并在几个小时后解决了它!

Uses StrUtils; //StuffString

var
    Regex: TPerlRegEx;
  I:Integer;
begin
Regex := TPerlRegEx.Create(nil);
Regex.RegEx := '[^[:alnum:]]';
Regex.Options := [preMultiLine];
Regex.Subject := data;
if Regex.Match then begin
    repeat
    data := StuffString(data,Regex.MatchedExpressionOffset,Regex.MatchedExpressionLength,' ');
    until not Regex.MatchAgain;
end;