.net 读取大文本文件，直到某个字符串_.net_String_File_Large Files

.net 读取大文本文件，直到某个字符串

.net string file

.net 读取大文本文件，直到某个字符串,.net,string,file,large-files,.net,String,File,Large Files,我有一个大的字符串分隔文本文件（不是单字符分隔的），如下所示：第一个数据[字符串分隔符]第二个数据[字符串分隔符] 我不想在内存中加载整个文件，因为它的大小（~250MB）。如果我用System.IO.file.ReadAllText读取整个文件，我会得到一个OutOfMemoryException 因此，我希望读取该文件，直到第一次出现[STRING-separator]，然后继续下一个字符串。它就像是从文件中“取出”第一个数据，处理它，然后继续处理第二个数据，这是文件的第一个数据 Syst

我有一个大的字符串分隔文本文件（不是单字符分隔的），如下所示：

第一个数据[字符串分隔符]第二个数据[字符串分隔符]

我不想在内存中加载整个文件，因为它的大小（~250MB）。如果我用

System.IO.file.ReadAllText

读取整个文件，我会得到一个

OutOfMemoryException

因此，我希望读取该文件，直到第一次出现

[STRING-separator]

，然后继续下一个字符串。它就像是从文件中“取出”第一个数据，处理它，然后继续处理第二个数据，这是文件的第一个数据

System.IO.StreamReader.ReadLine（）

对我没有帮助，因为文件的内容只有一行

你知道如何读取文件直到.NET中的某个字符串吗

我希望有一些想法，谢谢。

文本文件也可以按字符读取，如中所述。要搜索某个字符串，您必须使用一些手动实现的逻辑，这些逻辑可以基于字符输入搜索所需字符串，这可以通过状态机完成。

StreamReader.Read有一些重载可能会对您有所帮助。试试这个：

int index, count;
index = 0;
count = 200; // or whatever number you think is better
char[] buffer = new char[count];
System.IO.StreamReader sr = new System.IO.StreamReader("Path here");
while (sr.Read(buffer, index, count) > 0) { 
    /*
    check if buffer contains your string seperator, or at least some part of it 
    if it contains a part of it, you need check the rest of the stream to make sure it's a real seporator
    do your stuff, set the index to one character after the last seporator.
    */
}

这应该对你有帮助

private IEnumerable<string> ReadCharsByChunks(int chunkSize, string filePath)
{
    using (FileStream fs = new FileStream(filePath, FileMode.Open))
    {
        byte[] buffer = new byte[chunkSize]; 
        int currentRead;
        while ((currentRead = fs.Read(buffer, 0, chunkSize)) > 0)
        {
            yield return Encoding.Default.GetString(buffer, 0, currentRead);
        }
    }
}

private void SearchWord(string searchWord)
{
    StringBuilder builder = new StringBuilder();
    foreach (var chars in ReadCharsByChunks(2, "sample.txt"))//Can be any number
    {
        builder.Append(chars);

        var existing = builder.ToString();
        int foundIndex = -1;
        if ((foundIndex = existing.IndexOf(searchWord)) >= 0)
        {
            //Found
            MessageBox.Show("Found");

            builder.Remove(0, foundIndex + searchWord.Length);
        }
        else if (!existing.Contains(searchWord.First()))
        {
            builder.Clear();
        }
    }
}

private IEnumerable ReadCharsByChunks（int-chunkSize，string-filePath）
{
使用（FileStream fs=newfilestream（filePath，FileMode.Open））
{
字节[]缓冲区=新字节[chunkSize]；
int currentRead；
而（（currentRead=fs.Read（缓冲区，0，chunkSize））>0）
{
产生返回编码.Default.GetString（缓冲区，0，currentRead）；
}
}
}
私有void SearchWord（字符串SearchWord）
{
StringBuilder=新的StringBuilder（）；
foreach（ReadCharsByChunks（2，“sample.txt”）中的var chars）//可以是任意数字
{
附加（字符）；
var existing=builder.ToString（）；
int foundIndex=-1；
如果（（foundIndex=existing.IndexOf（searchWord））>=0）
{
//发现
MessageBox.Show（“Found”）；
删除（0，foundIndex+searchWord.Length）；
}
如果（！existing.Contains（searchWord.First（）），则为else
{
builder.Clear（）；
}
}
}

感谢您的回复。下面是我在VB.NET中编写的函数：

Public函数ReadUntil（流为System.IO.FileStream，UntilText为String）作为字符串
Dim builder作为新的System.Text.StringBuilder（）
Dim returnTextBuilder作为新的System.Text.StringBuilder（）
Dim returnText As String=String.Empty
整数尺寸=CInt（unteltext.Length/2）-1
作为字节的Dim缓冲区（大小）
Dim currentRead为整数=-1
直到currentRead=0为止
Dim收集为字符串=无
Dim chars As String=无
整数形式的索引=-1
currentRead=Stream.Read（缓冲区，0，缓冲区.长度）
chars=System.Text.Encoding.Default.GetString（缓冲区，0，currentRead）
builder.Append（字符）
returnTextBuilder.Append（字符）
collected=builder.ToString（）
foundIndex=collected.IndexOf（unteltext）
如果（foundIndex>=0），则
returnText=returnTextBuilder.ToString（）
Dim indexOfSep As Integer=返回文本。IndexOf（UntilText）
Dim cutLength As Integer=returnText.Length-indexOfSep
returnText=returnText.Remove（indexOfSep，剪切长度）
builder.Remove（0，foundIndex+unteltext.Length）
如果（剪切长度>直到文本长度），则
Stream.Position=Stream.Position-（cutLength-unteltext.Length）
如果结束
返回文本
ElseIf（未收集.Contains（unteltext.First（）））然后
builder.Length=0
如果结束
环
返回字符串。空
端函数

[STRING-separator]

是单个字符还是字符串？它是一个字符串。

[STRING-separator]

有多长？每个连续的

[STRING-separator]

之间可以有多长？

[STRING-separator]

是一个GUID。分隔符之间可以有大约100个字符。它是否总是相同的

Guid

？这看起来不错，但如何从特定位置开始？

public static string ReadUntil(System.IO.FileStream Stream, string UntilText)
{
    System.Text.StringBuilder builder = new System.Text.StringBuilder();
    System.Text.StringBuilder returnTextBuilder = new System.Text.StringBuilder();
    string returnText = string.Empty;
    int size = System.Convert.ToInt32(UntilText.Length / (double)2) - 1;
    byte[] buffer = new byte[size + 1];
    int currentRead = -1;

    while (currentRead != 0)
    {
        string collected = null;
        string chars = null;
        int foundIndex = -1;

        currentRead = Stream.Read(buffer, 0, buffer.Length);
        chars = System.Text.Encoding.Default.GetString(buffer, 0, currentRead);

        builder.Append(chars);
        returnTextBuilder.Append(chars);

        collected = builder.ToString();
        foundIndex = collected.IndexOf(UntilText);

        if ((foundIndex >= 0))
        {
            returnText = returnTextBuilder.ToString();

            int indexOfSep = returnText.IndexOf(UntilText);
            int cutLength = returnText.Length - indexOfSep;

            returnText = returnText.Remove(indexOfSep, cutLength);

            builder.Remove(0, foundIndex + UntilText.Length);

            if ((cutLength > UntilText.Length))
                Stream.Position = Stream.Position - (cutLength - UntilText.Length);

            return returnText;
        }
        else if ((!collected.Contains(UntilText.First())))
            builder.Length = 0;
    }

    return string.Empty;
}