如何使用Delphi读取文本文件中的最后一行

如何使用Delphi读取文本文件中的最后一行,delphi,ascii,delphi-xe2,pascal,Delphi,Ascii,Delphi Xe2,Pascal,我需要读取一些非常大的文本文件中的最后一行(从数据中获取时间戳)。TStringlist是一种简单的方法,但它返回内存不足错误。我试图使用seek和blockread,但是缓冲区中的字符都是无意义的。这与unicode有关吗 Function TForm1.ReadLastLine2(FileName: String): String; var FileHandle: File; s,line: string; ok: 0..1;

我需要读取一些非常大的文本文件中的最后一行(从数据中获取时间戳)。TStringlist是一种简单的方法,但它返回内存不足错误。我试图使用seek和blockread,但是缓冲区中的字符都是无意义的。这与unicode有关吗

    Function TForm1.ReadLastLine2(FileName: String): String;
    var
      FileHandle: File;
      s,line: string;
      ok: 0..1;
      Buf: array[1..8] of Char;
      k: longword;
      i,ReadCount: integer;
    begin
      AssignFile (FileHandle,FileName);
      Reset (FileHandle);           // or for binary files: Reset (FileHandle,1);
      ok := 0;
      k := FileSize (FileHandle);
      Seek (FileHandle, k-1);
      s := '';
      while ok<>1 do begin
        BlockRead (FileHandle, buf, SizeOf(Buf)-1, ReadCount);  //BlockRead ( var FileHandle : File; var Buffer; RecordCount : Integer {; var RecordsRead : Integer} ) ;
        if ord (buf[1]) <>13 then         //Arg to integer
          s := s + buf[1]
        else
          ok := ok + 1;
        k := k-1;
        seek (FileHandle,k);
      end;
      CloseFile (FileHandle);

      // Reverse the order in the line read
      setlength (line,length(s));
      for i:=1 to length(s) do
        line[length(s) - i+1 ] := s[i];
      Result := Line;
    end;

只是想到了一个新的解决方案

同样,可能会有更好的,但这是我想到的最好的

function GetLastLine(textFilePath: string): string;
var
  list: tstringlist;
begin
  list := tstringlist.Create;
  try
    list.LoadFromFile(textFilePath);
    result := list[list.Count-1];
  finally
     list.free;
  end;
end;

您的字符类型是两个字节,所以缓冲区是16个字节。然后使用blockread将sizeof(buffer)-1字节读入其中,并检查前2字节字符是否等于#13

sizeof(buffer)-1是不可靠的(这个-1来自哪里?),其余的都是有效的,但只有当您的输入文件是utf16时才有效

同时,每次读取8(或16)个字符,但只比较一个字符,然后再次进行搜索。这也不是很合乎逻辑


如果您的编码不是utf16,我建议您将缓冲区元素的类型更改为ansichar,并删除-1

。作为对kopiks建议的响应,我找到了使用TFilestream的方法,它可以与简单的测试文件一起工作,尽管在各种csv文件上使用它时可能会有一些进一步的问题。而且,我也不认为这是最有效的方法

    procedure TForm1.Button6Click(Sender: TObject);
    Var
      StreamSize, ApproxNumRows : Integer;
      TempStr : String;
    begin
      if OpenDialog1.Execute then begin
        TempStr := ReadLastLineOfTextFile(OpenDialog1.FileName,StreamSize, ApproxNumRows);
    //    TempStr := ReadFileStream('c:\temp\CSVTestFile.csv');
        ShowMessage ('approximately '+ IntToStr(ApproxNumRows)+' Rows');
        ListBox1.Items.Add(TempStr);
      end;
    end;

      Function TForm1.ReadLastLineOfTextFile(const FileName: String; var StreamSize, ApproxNumRows : Integer): String;
        const
          MAXLINELENGTH = 256;
        var
          Stream: TFileStream;
          BlockSize,CharCount : integer;
          Hash13Found : Boolean;
          Buffer : array [0..MAXLINELENGTH] of AnsiChar;
        begin
          Hash13Found := False;
          Result :='';
          Stream      := TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
          StreamSize := Stream.size;

          if StreamSize < MAXLINELENGTH then
            BlockSize := StreamSize
          Else
            BlockSize := MAXLINELENGTH;

        //  for CharCount := 0 to Length(Buffer)-1 do begin
        //    Buffer[CharCount] := #0;                         // zeroing the buffer can aid diagnostics
        //  end;

          CharCount := 0;
          Repeat
            Stream.Seek(-(CharCount+3), 2);         //+3 misses out the #0,#10,#13 at the end of the file
            Stream.Read( Buffer[CharCount], 1);
            Result := String(Buffer[CharCount]) + result;
            if Buffer[CharCount] =#13 then
              Hash13Found := True;
            Inc(CharCount);
          Until Hash13Found OR (CharCount = BlockSize);

          ShowMessage(Result);
          ApproxNumRows := Round(StreamSize / CharCount);
        end;
程序TForm1.按钮6单击(发送方:TObject);
变量
StreamSize,ApproxNumRows:整数;
TempStr:字符串;
开始
如果是OpenDialog1.Execute,则开始
TempStr:=ReadLastLineOfTextFile(OpenDialog1.FileName、StreamSize、ApproxNumRows);
//TempStr:=ReadFileStream('c:\temp\CSVTestFile.csv');
ShowMessage('大约'+IntToStr(近似numrows)+'行');
ListBox1.Items.Add(TempStr);
结束;
结束;
函数TForm1.ReadLastLineOfTextFile(常量文件名:String;var StreamSize,ApproxNumRows:Integer):String;
常数
MAXLINELENGTH=256;
变量
流:TFileStream;
块大小,字符数:整数;
Hash13Found:布尔型;
缓冲区:AnsiChar的数组[0..MAXLINELENGTH];
开始
Hash13Found:=False;
结果:='';
Stream:=TFileStream.Create(文件名、fmOpenRead或fmShareDenyWrite);
StreamSize:=Stream.size;
如果StreamSize
你真的需要从尾部到头部大量阅读文件。 因为它太大,不适合内存,所以从头到尾逐行读取它会非常慢。使用
ReadLn
-两倍的速度

您还必须准备好,最后一行可能以EOL结尾,也可能不以EOL结尾

就个人而言,我还将说明三种可能的EOL序列:

  • CR/LF aka#13#10=^M^J-DOS/Windows风格
  • CR不带LF-仅#13=^M-经典MacOS文件
  • LF不带CR-just#10=^J-UNIX风格,包括MacOS版本10
如果您确定您的CSV文件将仅由本机Windows程序生成,则可以安全地假定使用了完整的CR/LF。但如果还有其他Java程序、非Windows平台、移动程序,我就不那么肯定了。当然,没有LF的纯CR是最不可能的情况

uses System.IOUtils, System.Math, System.Classes;

type FileChar = AnsiChar; FileString = AnsiString; // for non-Unicode files
// type FileChar = WideChar; FileString = UnicodeString;// for UTF16 and UCS-2 files
const FileCharSize = SizeOf(FileChar);
// somewhere later in the code add: Assert(FileCharSize = SizeOf(FileString[1]);

function ReadLastLine(const FileName: String): FileString; overload; forward;

const PageSize = 4*1024; 
// the minimal read atom of most modern HDD and the memory allocation atom of Win32
// since the chances your file would have lines longer than 4Kb are very small - I would not increase it to several atoms.

function ReadLastLine(const Lines: TStringDynArray): FileString; overload;
var i: integer;
begin
  Result := '';
  i := High(Lines);
  if i < Low(Lines) then exit; // empty array - empty file

  Result := Lines[i];
  if Result > '' then exit; // we got the line

  Dec(i); // skip the empty ghost line, in case last line was CRLF-terminated
  if i < Low(Lines) then exit; // that ghost was the only line in the empty file
  Result := Lines[i];
end;

// scan for EOLs in not-yet-scanned part
function FindLastLine(buffer: TArray<FileChar>; const OldRead : Integer; 
     const LastChunk: Boolean; out Line: FileString): boolean;
var i, tailCRLF: integer; c: FileChar;
begin
  Result := False;
  if Length(Buffer) = 0 then exit;

  i := High(Buffer);    
  tailCRLF := 0; // test for trailing CR/LF
  if Buffer[i] = ^J then begin // LF - single, or after CR
     Dec(i);
     Inc(tailCRLF);
  end;
  if (i >= Low(Buffer)) and (Buffer[i] = ^M) then begin // CR, alone or before LF
     Inc(tailCRLF);
  end;

  i := High(Buffer) - Max(OldRead, tailCRLF);
  if i - Low(Buffer) < 0 then exit; // no new data to read - results would be like before

  if OldRead > 0 then Inc(i); // the CR/LF pair could be sliced between new and previous buffer - so need to start a bit earlier

  for i := i downto Low(Buffer) do begin
      c := Buffer[i];
      if (c=^J) or (c=^M) then begin // found EOL
         SetString( Line, @Buffer[i+1], High(Buffer) - tailCRLF - i);
         exit(True); 
      end;
  end;  

  // we did not find non-terminating EOL in the buffer (except maybe trailing),
  // now we should ask for more file content, if there is still left any
  // or take the entire file (without trailing EOL if any)

  if LastChunk then begin
     SetString( Line, @Buffer[ Low(Buffer) ], Length(Buffer) - tailCRLF);
     Result := true;
  end;
end;


function ReadLastLine(const FileName: String): FileString; overload;
var Buffer, tmp: TArray<FileChar>; 
    // dynamic arrays - eases memory management and protect from stack corruption
    FS: TFileStream; FSize, NewPos: Int64; 
    OldRead, NewLen : Integer; EndOfFile: boolean;
begin
  Result := '';
  FS := TFile.OpenRead(FileName);
  try
    FSize := FS.Size;
    if FSize <= PageSize then begin // small file, we can be lazy!
       FreeAndNil(FS);  // free the handle and avoid double-free in finally
       Result := ReadLastLine( TFile.ReadAllLines( FileName, TEncoding.ANSI )); 
          // or TEncoding.UTF16
          // warning - TFIle is not share-aware, if the file is being written to by another app
       exit;
    end;

    SetLength( Buffer, PageSize div FileCharSize);
    OldRead := 0;
    repeat
      NewPos := FSize - Length(Buffer)*FileCharSize;
      EndOfFile := NewPos <= 0;
      if NewPos < 0 then NewPos := 0; 
      FS.Position := NewPos;

      FS.ReadBuffer( Buffer[Low(Buffer)], (Length(Buffer) - OldRead)*FileCharSize);

      if FindLastLine(Buffer, OldRead, EndOfFile, Result) then 
         exit; // done !

      tmp := Buffer; Buffer := nil; // flip-flop: preparing to broaden our mouth

      OldRead := Length(tmp); // need not to re-scan the tail again and again when expanding our scanning range
      NewLen := Min( 2*Length(tmp), FSize div FileCharSize );

      SetLength(Buffer, NewLen); // this may trigger EOutOfMemory...
      Move( tmp[Low(tmp)], Buffer[High(Buffer)-OldRead+1], OldRead*FileCharSize);
      tmp := nil; // free old buffer
    until EndOfFile;
  finally
    FS.Free;
  end;
end;
使用System.IOUtils、System.Math、System.class;
类型FileChar=AnsiChar;FileString=AnsiString;//对于非Unicode文件
//类型FileChar=WideChar;FileString=UnicodeString;//对于UTF16和UCS-2文件
const FileCharSize=SizeOf(FileChar);
//在代码后面的某个地方添加:Assert(FileCharSize=SizeOf(FileString[1]);
函数ReadLastLine(constfilename:String):FileString;重载;转发;
常量页面大小=4*1024;
//最现代HDD的最小读取原子和Win32的内存分配原子
//因为你的文件有超过4Kb的行的可能性非常小,我不会把它增加到几个原子。
函数ReadLastLine(常量行:TStringDynArray):FileString;重载;
varⅠ:整数;
开始
结果:='';
i:=高(行);
如果i'',则退出;//我们得到了行
Dec(i);//跳过空的重影行,以防最后一行被CRLF终止
如果i=低(缓冲区))和(缓冲区[i]=^M),则单独或在LF之前开始//CR
公司(tailCRLF);
结束;
i:=高(缓冲)-最大(OldRead,tailCRLF);
如果i-Low(Buffer)<0,则退出;//没有要读取的新数据-结果将与以前类似
如果OldRead>0,则Inc(i);//CR/LF对可以在新缓冲区和以前的缓冲区之间切片-因此需要稍微开始一点
uses System.IOUtils, System.Math, System.Classes;

type FileChar = AnsiChar; FileString = AnsiString; // for non-Unicode files
// type FileChar = WideChar; FileString = UnicodeString;// for UTF16 and UCS-2 files
const FileCharSize = SizeOf(FileChar);
// somewhere later in the code add: Assert(FileCharSize = SizeOf(FileString[1]);

function ReadLastLine(const FileName: String): FileString; overload; forward;

const PageSize = 4*1024; 
// the minimal read atom of most modern HDD and the memory allocation atom of Win32
// since the chances your file would have lines longer than 4Kb are very small - I would not increase it to several atoms.

function ReadLastLine(const Lines: TStringDynArray): FileString; overload;
var i: integer;
begin
  Result := '';
  i := High(Lines);
  if i < Low(Lines) then exit; // empty array - empty file

  Result := Lines[i];
  if Result > '' then exit; // we got the line

  Dec(i); // skip the empty ghost line, in case last line was CRLF-terminated
  if i < Low(Lines) then exit; // that ghost was the only line in the empty file
  Result := Lines[i];
end;

// scan for EOLs in not-yet-scanned part
function FindLastLine(buffer: TArray<FileChar>; const OldRead : Integer; 
     const LastChunk: Boolean; out Line: FileString): boolean;
var i, tailCRLF: integer; c: FileChar;
begin
  Result := False;
  if Length(Buffer) = 0 then exit;

  i := High(Buffer);    
  tailCRLF := 0; // test for trailing CR/LF
  if Buffer[i] = ^J then begin // LF - single, or after CR
     Dec(i);
     Inc(tailCRLF);
  end;
  if (i >= Low(Buffer)) and (Buffer[i] = ^M) then begin // CR, alone or before LF
     Inc(tailCRLF);
  end;

  i := High(Buffer) - Max(OldRead, tailCRLF);
  if i - Low(Buffer) < 0 then exit; // no new data to read - results would be like before

  if OldRead > 0 then Inc(i); // the CR/LF pair could be sliced between new and previous buffer - so need to start a bit earlier

  for i := i downto Low(Buffer) do begin
      c := Buffer[i];
      if (c=^J) or (c=^M) then begin // found EOL
         SetString( Line, @Buffer[i+1], High(Buffer) - tailCRLF - i);
         exit(True); 
      end;
  end;  

  // we did not find non-terminating EOL in the buffer (except maybe trailing),
  // now we should ask for more file content, if there is still left any
  // or take the entire file (without trailing EOL if any)

  if LastChunk then begin
     SetString( Line, @Buffer[ Low(Buffer) ], Length(Buffer) - tailCRLF);
     Result := true;
  end;
end;


function ReadLastLine(const FileName: String): FileString; overload;
var Buffer, tmp: TArray<FileChar>; 
    // dynamic arrays - eases memory management and protect from stack corruption
    FS: TFileStream; FSize, NewPos: Int64; 
    OldRead, NewLen : Integer; EndOfFile: boolean;
begin
  Result := '';
  FS := TFile.OpenRead(FileName);
  try
    FSize := FS.Size;
    if FSize <= PageSize then begin // small file, we can be lazy!
       FreeAndNil(FS);  // free the handle and avoid double-free in finally
       Result := ReadLastLine( TFile.ReadAllLines( FileName, TEncoding.ANSI )); 
          // or TEncoding.UTF16
          // warning - TFIle is not share-aware, if the file is being written to by another app
       exit;
    end;

    SetLength( Buffer, PageSize div FileCharSize);
    OldRead := 0;
    repeat
      NewPos := FSize - Length(Buffer)*FileCharSize;
      EndOfFile := NewPos <= 0;
      if NewPos < 0 then NewPos := 0; 
      FS.Position := NewPos;

      FS.ReadBuffer( Buffer[Low(Buffer)], (Length(Buffer) - OldRead)*FileCharSize);

      if FindLastLine(Buffer, OldRead, EndOfFile, Result) then 
         exit; // done !

      tmp := Buffer; Buffer := nil; // flip-flop: preparing to broaden our mouth

      OldRead := Length(tmp); // need not to re-scan the tail again and again when expanding our scanning range
      NewLen := Min( 2*Length(tmp), FSize div FileCharSize );

      SetLength(Buffer, NewLen); // this may trigger EOutOfMemory...
      Move( tmp[Low(tmp)], Buffer[High(Buffer)-OldRead+1], OldRead*FileCharSize);
      tmp := nil; // free old buffer
    until EndOfFile;
  finally
    FS.Free;
  end;
end;