Multithreading 如何使用线程进行搜索

Multithreading 如何使用线程进行搜索,multithreading,delphi,search,Multithreading,Delphi,Search,我有一个Tstringlist,它包含一个很长的文件列表,我使用磁盘上的搜索来填充这些文件。 该列表包含具有不同扩展名的文件-.docx.xlsx等 这个列表的填充是通过一次搜索一个扩展来完成的,因此需要相当长的时间 我想做的是使它能够启动多个搜索,用文件名填充同一个TStringList。 我有一个想法,它应该做一些线程,但这是一张白纸为我 有什么我应该研究的提示或样本吗 下面的代码就是我今天使用的代码 function TFiles.Search(aList: TstringList; aP

我有一个Tstringlist,它包含一个很长的文件列表,我使用磁盘上的搜索来填充这些文件。 该列表包含具有不同扩展名的文件-.docx.xlsx等 这个列表的填充是通过一次搜索一个扩展来完成的,因此需要相当长的时间 我想做的是使它能够启动多个搜索,用文件名填充同一个TStringList。 我有一个想法,它应该做一些线程,但这是一张白纸为我

有什么我应该研究的提示或样本吗

下面的代码就是我今天使用的代码

function TFiles.Search(aList: TstringList; aPathname: string; const aFile: string = '*.*'; const aSubdirs: boolean = True): integer;
var
  Rec: TSearchRec;
begin
  Folders.Validate(aPathName, False);
  if FindFirst(aPathname + aFile, faAnyFile - faDirectory, Rec) = 0 then
    try
      repeat
        aList.Add(aPathname + Rec.Name);
      until FindNext(Rec) <> 0;
    finally
      FindClose(Rec);
    end;
  Result := aList.Count;
  if not aSubdirs then Exit;
  if FindFirst(aPathname + '*.*', faDirectory, Rec) = 0 then
    try
      repeat
        if ((Rec.Attr and faDirectory) <> 0)  and (Rec.Name<>'.') and (Rec.Name<>'..') then
          Files.Search(aList, aPathname + Rec.Name, aFile, True);
        until FindNext(Rec) <> 0;
    finally
      FindClose(Rec);
    end;
  Result := aList.Count;
end;
函数TFiles.Search(aList:TstringList;aPathname:string;const-aFile:string='*.*.';const-aSubdirs:boolean=True):整数;
变量
Rec:TSearchRec;
开始
文件夹。验证(aPathName,False);
如果FindFirst(aPathname+aFile,faAnyFile-faDirectory,Rec)=0,则
尝试
重复
aList.Add(aPathname+Rec.Name);
直到FindNext(Rec)0;
最后
FindClose(Rec);
结束;
结果:=列表计数;
如果不是aSubdirs,则退出;
如果FindFirst(aPathname+'*.*',faDirectory,Rec)=0,则
尝试
重复
如果((Rec.Attr和faDirectory)0)和(Rec.Name'.'和(Rec.Name'.'),则
文件搜索(列表、名称+记录名、文件、真);
直到FindNext(Rec)0;
最后
FindClose(Rec);
结束;
结果:=列表计数;
结束;

根据陆路的建议进行建设

仅遍历磁盘一次
一次搜索所有文件。这样,您只需遍历目录一次,节省了大量I/O时间

见:

多线程处理非磁盘部分
获取文件后,可以使用线程一次搜索所有文件

像这样的

type
  TSearchThread = class(TThread)
  private
    FFilenames: TStringList;
    FExtensionToSearchFor: string;
    FResultList: TStringList;
  protected
    procedure Execute; override;
  public
    constructor Create(AFilelist: TStringlist; Extension: string);
    property Filenames: TStringList read FFilenames;
    property ExtensionToSearchFor: string read FExtensionToSearchFor;
    property ResultList: TStringList read FResultList;
  end;

  TForm1 = class(TForm)
  private
    FFilenames: TStringList;
    FWorkerBees: array of TSearchThread;
    FNumberOfBeesStillWorking: cardinal;
    procedure WorkerBeeTerminate(Sender: TObject);
  public
    procedure LetsWork;
    procedure AllDone;
  end;

implementation  

constructor TSearchThread.Create(AFilelist: TStringList; Extension: string);
const
  WaitABit = true;
begin
  inherited Create(WaitABit);
  FResultList:= TStringList.Create;
  FExtensionToSearchFor:= Extension;
  FFilenames:= AFilelist;
  //Self.FreeOnTerminate:= false;
end;

procedure TSearchThread.Execute;
var
  FilenameI: string;
begin
  for i:= 0 to FFilenames.Count -1 do begin
    FileNameI:= FFilenames[i];
    if (ExtractFileExtension(FilenameI) = FExtensionToSearchFor) then begin
      FResultList.Add(FilenameI);
    end;   
  end; {for i} 
end;

procedure TForm1.LetsWork;
begin
  FileSearch(PathName, Extensions, FFilenames);
  SetLength(FWorkerBees, NumberOfExtensions);
  FNumberOfBeesStillWorking:= NumberOfExtensions;
  for i:= 0 to NumberOfExtensions - 1 do begin
    FWorkerBees[i]:= TSearchThread.Create(FFilenames, GetExtensionI(Extensions,i));
    FWorkerBees[i].OnTerminate:= WorkerBeeTerminate;
    FWorkerBees[i].Start;
  end; {for i}
end;

procedure TForm1.WorkerBeeTerminate(Sender: TObject);
begin
  Dec(FNumberOfWorkerBeesStillWorking);
  if FNumberOfWorkerBeesStillWorking = 0 then AllDone;
end;

procedure TForm1.AllDone;
begin
  //process the ResultLists for all the threads...
  //Free the threads when done
为代码计时
但在你经历这些麻烦之前

为代码计时,请参阅:

只需为每个组件编写一个普通的单线程版本和时间。
仅当某个区段占用了显著的%运行时间时才对其进行优化

探查器
为此,我喜欢使用的一个很酷的工具是:
GPProfiler
请参见:


它支持Delphi,至少支持XE3,甚至更高版本

正如其他人提到的,我认为瓶颈是磁盘IO。所以我提出了一个解决方案,它在两个线程中运行。在第一步中,我进行文件搜索,在第二步中,文件将被过滤。因此,搜索和分析是同时进行的

但是:给代码计时,找出瓶颈所在

TSearchFilterThread = class(TThread)
  private
    fFileQueue: TStringList;
    fExtensionList: TStringList;
    fCriticalSection: TCriticalSection;
    fResultList: TStringList;
    fNewDataInList: TSimpleEvent;
    function getNextFileToProcess: string;
    function matchFilter(const filename: string): boolean;
protected
    procedure execute; override;
public
    constructor create(searchForExtension: TStringList); reintroduce;
    destructor destroy; override;
    procedure appendFile(const filename: string);
    procedure waitForEnd;
    property Results: TStringlist read fResultList;
end;

procedure TSearchFilterThread.appendFile(const filename: string);
begin
  fCriticalSection.Enter;
  try
    fFileQueue.Add(filename);
    fNewDataInList.SetEvent;
  finally
    fCriticalSection.Leave;
  end;
end;

constructor TSearchFilterThread.create(searchForExtension: TStringList);
begin
  inherited create(true);
  //To protected acces to the TStringList fFileQueue
  fCriticalSection := TCriticalSection.Create;

  fExtensionList := searchForExtension;
  fExtensionList.Sorted := true;
  fExtensionList.CaseSensitive := false;

  fFileQueue := TStringList.Create;

  //Event to notify workerthread, that new data available
  fNewDataInList := TSimpleEvent.Create;
  fNewDataInList.ResetEvent;

  fResultList := TStringList.Create;

  resume;
end;

destructor TSearchFilterThread.destroy;
begin
  terminate;
  fNewDataInList.SetEvent;
  waitFor;

  fResultList.Free;
  fCriticalSection.Free;
  fFileQueue.Free;
  inherited;
end;

function TSearchFilterThread.getNextFileToProcess: string;
begin
  fCriticalSection.Enter;
  try
    if fFileQueue.Count > 0 then begin
      result := fFileQueue[0];
      fFileQueue.Delete(0);
    end
    else
      result := '';
  finally
    fCriticalSection.Leave;
  end;
end;

function TSearchFilterThread.matchFilter(const filename: string): boolean;
var
  extension: string;
begin
  extension := ExtractFileExt(filename);
  result := fExtensionList.IndexOf(extension) > -1;
end;

procedure TSearchFilterThread.execute;
const
  INFINITE: longword = $FFFFFFFF;
var
 fileName: string;
begin
  while true do begin
    fileName := getNextFileToProcess;
    if fileName <> '' then begin
      if matchFilter(filename) then
        fResultList.Add(fileName);
    end
    else if not terminated then begin
      fNewDataInList.WaitFor(INFINITE);
      fNewDataInList.resetEvent;
    end
    else if terminated then
      break;
  end;
end;


procedure TSearchFilterThread.waitForEnd;
begin
  Terminate;
  fNewDataInList.SetEvent;
  waitFor;
end;

它可能有点过大,但我想编码一点。

我怀疑这是否能提高性能。您将有多个进程同时遍历磁盘,但查找不同的文件类型。这将导致严重的磁盘损坏。最好让FindFirst查找所有文件,然后将具有所需扩展名的文件名存储在TStringList中。磁盘I/O是这里的瓶颈。最好搜索所有文件,并将适当的文件添加到列表中。另一个选项是查找文件夹中的所有文件并将其存储在内存中,然后进行检查,假设它们不会在您下面发生更改。对我来说,做类似的事情时速度要快得多,即使存储它们需要花费内存。我想还有两个人遇到了同样的问题:)另外,作为一个小小的改进,你可以离开递归,使用深度优先搜索和堆栈(或字符串列表作为堆栈)。我非常怀疑这会减少一些东西。这样做很可能会降低性能。您正在主线程中执行递归搜索,在此之前,启动一个工作线程,将通过一个锁找到的每个文件排队,该锁甚至没有线程的旋转时间。不要过多地使用关键部分。并避免在工作线程中执行这种懒惰的任务。[没有投票否决]我必须给所有的建议一些想法,看看我是否可以改进我的代码,如果不是很多,那么至少是一些。但是在本周晚些时候之前我没有时间了,因为我要去荷兰旅行。但我会回来的。。。。
TSearchFilterThread = class(TThread)
  private
    fFileQueue: TStringList;
    fExtensionList: TStringList;
    fCriticalSection: TCriticalSection;
    fResultList: TStringList;
    fNewDataInList: TSimpleEvent;
    function getNextFileToProcess: string;
    function matchFilter(const filename: string): boolean;
protected
    procedure execute; override;
public
    constructor create(searchForExtension: TStringList); reintroduce;
    destructor destroy; override;
    procedure appendFile(const filename: string);
    procedure waitForEnd;
    property Results: TStringlist read fResultList;
end;

procedure TSearchFilterThread.appendFile(const filename: string);
begin
  fCriticalSection.Enter;
  try
    fFileQueue.Add(filename);
    fNewDataInList.SetEvent;
  finally
    fCriticalSection.Leave;
  end;
end;

constructor TSearchFilterThread.create(searchForExtension: TStringList);
begin
  inherited create(true);
  //To protected acces to the TStringList fFileQueue
  fCriticalSection := TCriticalSection.Create;

  fExtensionList := searchForExtension;
  fExtensionList.Sorted := true;
  fExtensionList.CaseSensitive := false;

  fFileQueue := TStringList.Create;

  //Event to notify workerthread, that new data available
  fNewDataInList := TSimpleEvent.Create;
  fNewDataInList.ResetEvent;

  fResultList := TStringList.Create;

  resume;
end;

destructor TSearchFilterThread.destroy;
begin
  terminate;
  fNewDataInList.SetEvent;
  waitFor;

  fResultList.Free;
  fCriticalSection.Free;
  fFileQueue.Free;
  inherited;
end;

function TSearchFilterThread.getNextFileToProcess: string;
begin
  fCriticalSection.Enter;
  try
    if fFileQueue.Count > 0 then begin
      result := fFileQueue[0];
      fFileQueue.Delete(0);
    end
    else
      result := '';
  finally
    fCriticalSection.Leave;
  end;
end;

function TSearchFilterThread.matchFilter(const filename: string): boolean;
var
  extension: string;
begin
  extension := ExtractFileExt(filename);
  result := fExtensionList.IndexOf(extension) > -1;
end;

procedure TSearchFilterThread.execute;
const
  INFINITE: longword = $FFFFFFFF;
var
 fileName: string;
begin
  while true do begin
    fileName := getNextFileToProcess;
    if fileName <> '' then begin
      if matchFilter(filename) then
        fResultList.Add(fileName);
    end
    else if not terminated then begin
      fNewDataInList.WaitFor(INFINITE);
      fNewDataInList.resetEvent;
    end
    else if terminated then
      break;
  end;
end;


procedure TSearchFilterThread.waitForEnd;
begin
  Terminate;
  fNewDataInList.SetEvent;
  waitFor;
end;
procedure FileSearch(const pathName: string; filter: TSearchFilterThread);
const
  FileMask = '*.*';
var
  Rec: TSearchRec;
  Path: string;
begin
  Path := IncludeTrailingPathDelimiter(pathName);
  if FindFirst(Path + FileMask, faAnyFile - faDirectory, Rec) = 0 then
    try
      repeat
        filter.appendFile(Path + rec.Name);
      until FindNext(Rec) <> 0;
    finally
      SysUtils.FindClose(Rec);
    end;

  if FindFirst(Path + '*.*', faDirectory, Rec) = 0 then
    try
      repeat
        if ((Rec.Attr and faDirectory) <> 0) and (Rec.Name <> '.') and
          (Rec.Name <> '..') then
          FileSearch(Path + Rec.Name, filter);
      until FindNext(Rec) <> 0;
    finally
      FindClose(Rec);
    end;
end;
procedure TForm1.startButtonClick(Sender: TObject);
var
  searchFilter: TSearchFilterThread;
  searchExtensions: TStringList;
  path: string;
begin
  path := 'c:\windows';

  searchExtensions := TStringList.Create;
  searchExtensions.Add('.doc');
  searchExtensions.Add('.docx');
  searchExtensions.Add('.ini');

  searchFilter := TSearchFilterThread.create(searchExtensions);
  try
    FileSearch(path, searchFilter);
    searchFilter.waitForEnd;

    fileMemo.Lines := searchFilter.Results;
  finally
    searchFilter.Free;
    searchExtensions.Free;
  end;
end;