Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/shell/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
如何使用C#遍历多个约200 MB的日志/文本文件?并应用正则表达式_C#_.net_Regex - Fatal编程技术网

如何使用C#遍历多个约200 MB的日志/文本文件?并应用正则表达式

如何使用C#遍历多个约200 MB的日志/文本文件?并应用正则表达式,c#,.net,regex,C#,.net,Regex,我必须开发一个实用程序,它接受包含多个日志/文本文件(每个文件大约200 MB)的文件夹路径,然后遍历所有文件,从存在的行中选择四个元素 我已经尝试了多种解决方案,所有解决方案对于较小的文件都非常有效但是当我加载较大的文件时,Windows窗体会挂起或显示“OutOfMemory异常”。请帮忙 解决方案1: string textFile; string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0

我必须开发一个实用程序,它接受包含多个日志/文本文件(每个文件大约200 MB)的文件夹路径,然后遍历所有文件,从存在的行中选择四个元素

我已经尝试了多种解决方案,所有解决方案对于较小的文件都非常有效但是当我加载较大的文件时,Windows窗体会挂起或显示“OutOfMemory异常”。请帮忙

解决方案1:

string textFile;
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
if (!string.IsNullOrWhiteSpace(fbd.SelectedPath))
{
    string[] files = Directory.GetFiles(fbd.SelectedPath);

    System.Windows.Forms.MessageBox.Show("Files found: " + files.Length.ToString(), "Message");
    foreach (string fileName in files)
    {
        textFile = File.ReadAllText(fileName); 

        MatchCollection mc = Regex.Matches(textFile, re1);
        foreach (Match m in mc)
        {
            string a = m.ToString();
            Path.Text += a; //Temporary, Just to check the output
            Path.Text += Environment.NewLine;
        }

    }

}
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
foreach (string file in System.IO.Directory.GetFiles(fbd.SelectedPath))
{

    const Int32 BufferSize = 512;
    using (var fileStream = File.OpenRead(file))
    using (var streamReader = new StreamReader(fileStream, Encoding.UTF8, true, BufferSize))


    {
        String line;
        while ((line = streamReader.ReadLine()) != null)
        {
            MatchCollection mc = Regex.Matches(line, re1);
            foreach (Match m in mc)
            {
                string a = m.ToString();
                Path.Text += a; //Temporary, Just to check the output
                Path.Text += Environment.NewLine;
            }
       }  
}
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
using (StreamReader r = new StreamReader(file))
{

    try
    {
        string line = String.Empty;

        while (!r.EndOfStream)
        {
            line = r.ReadLine();
            MatchCollection mc = Regex.Matches(line, re1);
            foreach (Match m in mc)
            {
                string a = m.ToString();
                Path.Text += a; //Temporary, Just to check the output
                Path.Text += Environment.NewLine;
            }

        }
    }
    catch (Exception ex)
    {
        MessageBox.Show(ex.Message);
    }
}
解决方案2:

string textFile;
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
if (!string.IsNullOrWhiteSpace(fbd.SelectedPath))
{
    string[] files = Directory.GetFiles(fbd.SelectedPath);

    System.Windows.Forms.MessageBox.Show("Files found: " + files.Length.ToString(), "Message");
    foreach (string fileName in files)
    {
        textFile = File.ReadAllText(fileName); 

        MatchCollection mc = Regex.Matches(textFile, re1);
        foreach (Match m in mc)
        {
            string a = m.ToString();
            Path.Text += a; //Temporary, Just to check the output
            Path.Text += Environment.NewLine;
        }

    }

}
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
foreach (string file in System.IO.Directory.GetFiles(fbd.SelectedPath))
{

    const Int32 BufferSize = 512;
    using (var fileStream = File.OpenRead(file))
    using (var streamReader = new StreamReader(fileStream, Encoding.UTF8, true, BufferSize))


    {
        String line;
        while ((line = streamReader.ReadLine()) != null)
        {
            MatchCollection mc = Regex.Matches(line, re1);
            foreach (Match m in mc)
            {
                string a = m.ToString();
                Path.Text += a; //Temporary, Just to check the output
                Path.Text += Environment.NewLine;
            }
       }  
}
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
using (StreamReader r = new StreamReader(file))
{

    try
    {
        string line = String.Empty;

        while (!r.EndOfStream)
        {
            line = r.ReadLine();
            MatchCollection mc = Regex.Matches(line, re1);
            foreach (Match m in mc)
            {
                string a = m.ToString();
                Path.Text += a; //Temporary, Just to check the output
                Path.Text += Environment.NewLine;
            }

        }
    }
    catch (Exception ex)
    {
        MessageBox.Show(ex.Message);
    }
}
解决方案3:

string textFile;
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
if (!string.IsNullOrWhiteSpace(fbd.SelectedPath))
{
    string[] files = Directory.GetFiles(fbd.SelectedPath);

    System.Windows.Forms.MessageBox.Show("Files found: " + files.Length.ToString(), "Message");
    foreach (string fileName in files)
    {
        textFile = File.ReadAllText(fileName); 

        MatchCollection mc = Regex.Matches(textFile, re1);
        foreach (Match m in mc)
        {
            string a = m.ToString();
            Path.Text += a; //Temporary, Just to check the output
            Path.Text += Environment.NewLine;
        }

    }

}
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
foreach (string file in System.IO.Directory.GetFiles(fbd.SelectedPath))
{

    const Int32 BufferSize = 512;
    using (var fileStream = File.OpenRead(file))
    using (var streamReader = new StreamReader(fileStream, Encoding.UTF8, true, BufferSize))


    {
        String line;
        while ((line = streamReader.ReadLine()) != null)
        {
            MatchCollection mc = Regex.Matches(line, re1);
            foreach (Match m in mc)
            {
                string a = m.ToString();
                Path.Text += a; //Temporary, Just to check the output
                Path.Text += Environment.NewLine;
            }
       }  
}
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
using (StreamReader r = new StreamReader(file))
{

    try
    {
        string line = String.Empty;

        while (!r.EndOfStream)
        {
            line = r.ReadLine();
            MatchCollection mc = Regex.Matches(line, re1);
            foreach (Match m in mc)
            {
                string a = m.ToString();
                Path.Text += a; //Temporary, Just to check the output
                Path.Text += Environment.NewLine;
            }

        }
    }
    catch (Exception ex)
    {
        MessageBox.Show(ex.Message);
    }
}

很少有事情需要注意

  • 不要附加到字符串
    Path.Text+=…
    。我假设这只是一个测试代码,希望它能被扔掉
  • 您只需使用简单的
    File.ReadLines
    调用,在您的案例中,文件读取速度没有实际差异
  • 你应该编译你的正则表达式
  • 您可以尝试简化正则表达式
  • 在进行正则表达式匹配之前,可以添加简单的基于字符串的预检查
  • 下面是实现上述准则的示例代码

    string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
    var buf = new List<string>();
    var re2 = new Regex(re1, RegexOptions.Compiled);
    
    FolderBrowserDialog fbd = new FolderBrowserDialog();
    DialogResult result = fbd.ShowDialog();
    foreach (string file in System.IO.Directory.GetFiles(fbd.SelectedPath)) {
    
        foreach (var line in File.ReadLines(file)) {
            if ((indx = line.IndexOf('-')) == -1 || line.IndexOf(':', indx + 1) == -1)
                continue;
    
            MatchCollection mc = re2.Matches(line);
            foreach (Match m in mc) {
                string a = m.ToString();
                buf.Add(a + Environment.NewLine); //Temporary, Just to check the output
            }
        }
    }
    
    string re1=“((?:2 | 1)\\d{3}(?:-\ \/)(?:(?:0[1-9])(?:-\/)(?:-\/)(?:(?:0[1-9])(?:[1-2][0-9])(?:3[0-1])(?:[0-1])(?:[0-1])(?:[0-1])(?:[0-9])(?:[0-5])(?:[0-9]);
    var buf=新列表();
    var re2=新的正则表达式(re1,RegexOptions.Compiled);
    FolderBrowserDialog fbd=新建FolderBrowserDialog();
    DialogResult结果=fbd.ShowDialog();
    foreach(System.IO.Directory.GetFiles(fbd.SelectedPath)中的字符串文件){
    foreach(文件中的var行。ReadLines(文件)){
    如果((indx=line.IndexOf('-'))=-1 | | line.IndexOf(':'),indx+1)=-1)
    继续;
    MatchCollection mc=re2.匹配(行);
    foreach(在mc中匹配m){
    字符串a=m.ToString();
    buf.Add(a+Environment.NewLine);//临时,仅用于检查输出
    }
    }
    }
    
    应该注意的事情很少

  • 不要附加到字符串
    Path.Text+=…
    。我假设这只是一个测试代码,希望它能被扔掉
  • 您只需使用简单的
    File.ReadLines
    调用,在您的案例中,文件读取速度没有实际差异
  • 你应该编译你的正则表达式
  • 您可以尝试简化正则表达式
  • 在进行正则表达式匹配之前,可以添加简单的基于字符串的预检查
  • 下面是实现上述准则的示例代码

    string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
    var buf = new List<string>();
    var re2 = new Regex(re1, RegexOptions.Compiled);
    
    FolderBrowserDialog fbd = new FolderBrowserDialog();
    DialogResult result = fbd.ShowDialog();
    foreach (string file in System.IO.Directory.GetFiles(fbd.SelectedPath)) {
    
        foreach (var line in File.ReadLines(file)) {
            if ((indx = line.IndexOf('-')) == -1 || line.IndexOf(':', indx + 1) == -1)
                continue;
    
            MatchCollection mc = re2.Matches(line);
            foreach (Match m in mc) {
                string a = m.ToString();
                buf.Add(a + Environment.NewLine); //Temporary, Just to check the output
            }
        }
    }
    
    string re1=“((?:2 | 1)\\d{3}(?:-\ \/)(?:(?:0[1-9])(?:-\/)(?:-\/)(?:(?:0[1-9])(?:[1-2][0-9])(?:3[0-1])(?:[0-1])(?:[0-1])(?:[0-1])(?:[0-9])(?:[0-5])(?:[0-9]);
    var buf=新列表();
    var re2=新的正则表达式(re1,RegexOptions.Compiled);
    FolderBrowserDialog fbd=新建FolderBrowserDialog();
    DialogResult结果=fbd.ShowDialog();
    foreach(System.IO.Directory.GetFiles(fbd.SelectedPath)中的字符串文件){
    foreach(文件中的var行。ReadLines(文件)){
    如果((indx=line.IndexOf('-'))=-1 | | line.IndexOf(':'),indx+1)=-1)
    继续;
    MatchCollection mc=re2.匹配(行);
    foreach(在mc中匹配m){
    字符串a=m.ToString();
    buf.Add(a+Environment.NewLine);//临时,仅用于检查输出
    }
    }
    }
    
    您的“路径”调试可能连接了大量字符串。将其更改为StringBuilder而不是+=连接,以查看这是否是内存问题的原因

    您是否已经找到了另一种方法?

    您的“路径”调试可能正在连接大量的字符串。将其更改为StringBuilder而不是+=连接,以查看这是否是内存问题的原因


    你有没有考虑过另一种方法?

    什么样的windows(Vista/7/8/10)、verision(32/64位)和RAM大小测试:windows 10 64位、4 GB RAM、Core i5Mayby你会在本文中找到解决方案:。@Michal也尝试过,但其相同的.NET Framework对对象大小有2 GB的硬限制,减去框架本身消耗的开销。是否有可能将这个大日志文件拆分为几个较小的文件?windows(Vista/7/8/10)、verision(32/64位)和RAM的大小测试:windows 10 64位、4 GB RAM、Core i5Mayby您将在本文中找到解决方案:。@Michal也尝试过,但其相同的.NET Framework对对象大小有2 GB的硬限制,减去框架本身消耗的开销。是否有可能将这个大日志文件拆分为几个小日志文件?