如何使用C#遍历多个约200 MB的日志/文本文件?并应用正则表达式
我必须开发一个实用程序,它接受包含多个日志/文本文件(每个文件大约200 MB)的文件夹路径,然后遍历所有文件,从存在的行中选择四个元素 我已经尝试了多种解决方案,所有解决方案对于较小的文件都非常有效但是当我加载较大的文件时,Windows窗体会挂起或显示“OutOfMemory异常”。请帮忙 解决方案1:如何使用C#遍历多个约200 MB的日志/文本文件?并应用正则表达式,c#,.net,regex,C#,.net,Regex,我必须开发一个实用程序,它接受包含多个日志/文本文件(每个文件大约200 MB)的文件夹路径,然后遍历所有文件,从存在的行中选择四个元素 我已经尝试了多种解决方案,所有解决方案对于较小的文件都非常有效但是当我加载较大的文件时,Windows窗体会挂起或显示“OutOfMemory异常”。请帮忙 解决方案1: string textFile; string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0
string textFile;
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
if (!string.IsNullOrWhiteSpace(fbd.SelectedPath))
{
string[] files = Directory.GetFiles(fbd.SelectedPath);
System.Windows.Forms.MessageBox.Show("Files found: " + files.Length.ToString(), "Message");
foreach (string fileName in files)
{
textFile = File.ReadAllText(fileName);
MatchCollection mc = Regex.Matches(textFile, re1);
foreach (Match m in mc)
{
string a = m.ToString();
Path.Text += a; //Temporary, Just to check the output
Path.Text += Environment.NewLine;
}
}
}
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
foreach (string file in System.IO.Directory.GetFiles(fbd.SelectedPath))
{
const Int32 BufferSize = 512;
using (var fileStream = File.OpenRead(file))
using (var streamReader = new StreamReader(fileStream, Encoding.UTF8, true, BufferSize))
{
String line;
while ((line = streamReader.ReadLine()) != null)
{
MatchCollection mc = Regex.Matches(line, re1);
foreach (Match m in mc)
{
string a = m.ToString();
Path.Text += a; //Temporary, Just to check the output
Path.Text += Environment.NewLine;
}
}
}
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
using (StreamReader r = new StreamReader(file))
{
try
{
string line = String.Empty;
while (!r.EndOfStream)
{
line = r.ReadLine();
MatchCollection mc = Regex.Matches(line, re1);
foreach (Match m in mc)
{
string a = m.ToString();
Path.Text += a; //Temporary, Just to check the output
Path.Text += Environment.NewLine;
}
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
解决方案2:
string textFile;
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
if (!string.IsNullOrWhiteSpace(fbd.SelectedPath))
{
string[] files = Directory.GetFiles(fbd.SelectedPath);
System.Windows.Forms.MessageBox.Show("Files found: " + files.Length.ToString(), "Message");
foreach (string fileName in files)
{
textFile = File.ReadAllText(fileName);
MatchCollection mc = Regex.Matches(textFile, re1);
foreach (Match m in mc)
{
string a = m.ToString();
Path.Text += a; //Temporary, Just to check the output
Path.Text += Environment.NewLine;
}
}
}
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
foreach (string file in System.IO.Directory.GetFiles(fbd.SelectedPath))
{
const Int32 BufferSize = 512;
using (var fileStream = File.OpenRead(file))
using (var streamReader = new StreamReader(fileStream, Encoding.UTF8, true, BufferSize))
{
String line;
while ((line = streamReader.ReadLine()) != null)
{
MatchCollection mc = Regex.Matches(line, re1);
foreach (Match m in mc)
{
string a = m.ToString();
Path.Text += a; //Temporary, Just to check the output
Path.Text += Environment.NewLine;
}
}
}
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
using (StreamReader r = new StreamReader(file))
{
try
{
string line = String.Empty;
while (!r.EndOfStream)
{
line = r.ReadLine();
MatchCollection mc = Regex.Matches(line, re1);
foreach (Match m in mc)
{
string a = m.ToString();
Path.Text += a; //Temporary, Just to check the output
Path.Text += Environment.NewLine;
}
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
解决方案3:
string textFile;
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
if (!string.IsNullOrWhiteSpace(fbd.SelectedPath))
{
string[] files = Directory.GetFiles(fbd.SelectedPath);
System.Windows.Forms.MessageBox.Show("Files found: " + files.Length.ToString(), "Message");
foreach (string fileName in files)
{
textFile = File.ReadAllText(fileName);
MatchCollection mc = Regex.Matches(textFile, re1);
foreach (Match m in mc)
{
string a = m.ToString();
Path.Text += a; //Temporary, Just to check the output
Path.Text += Environment.NewLine;
}
}
}
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
foreach (string file in System.IO.Directory.GetFiles(fbd.SelectedPath))
{
const Int32 BufferSize = 512;
using (var fileStream = File.OpenRead(file))
using (var streamReader = new StreamReader(fileStream, Encoding.UTF8, true, BufferSize))
{
String line;
while ((line = streamReader.ReadLine()) != null)
{
MatchCollection mc = Regex.Matches(line, re1);
foreach (Match m in mc)
{
string a = m.ToString();
Path.Text += a; //Temporary, Just to check the output
Path.Text += Environment.NewLine;
}
}
}
string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
using (StreamReader r = new StreamReader(file))
{
try
{
string line = String.Empty;
while (!r.EndOfStream)
{
line = r.ReadLine();
MatchCollection mc = Regex.Matches(line, re1);
foreach (Match m in mc)
{
string a = m.ToString();
Path.Text += a; //Temporary, Just to check the output
Path.Text += Environment.NewLine;
}
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
很少有事情需要注意
Path.Text+=…
。我假设这只是一个测试代码,希望它能被扔掉File.ReadLines
调用,在您的案例中,文件读取速度没有实际差异string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
var buf = new List<string>();
var re2 = new Regex(re1, RegexOptions.Compiled);
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
foreach (string file in System.IO.Directory.GetFiles(fbd.SelectedPath)) {
foreach (var line in File.ReadLines(file)) {
if ((indx = line.IndexOf('-')) == -1 || line.IndexOf(':', indx + 1) == -1)
continue;
MatchCollection mc = re2.Matches(line);
foreach (Match m in mc) {
string a = m.ToString();
buf.Add(a + Environment.NewLine); //Temporary, Just to check the output
}
}
}
string re1=“((?:2 | 1)\\d{3}(?:-\ \/)(?:(?:0[1-9])(?:-\/)(?:-\/)(?:(?:0[1-9])(?:[1-2][0-9])(?:3[0-1])(?:[0-1])(?:[0-1])(?:[0-1])(?:[0-9])(?:[0-5])(?:[0-9]);
var buf=新列表();
var re2=新的正则表达式(re1,RegexOptions.Compiled);
FolderBrowserDialog fbd=新建FolderBrowserDialog();
DialogResult结果=fbd.ShowDialog();
foreach(System.IO.Directory.GetFiles(fbd.SelectedPath)中的字符串文件){
foreach(文件中的var行。ReadLines(文件)){
如果((indx=line.IndexOf('-'))=-1 | | line.IndexOf(':'),indx+1)=-1)
继续;
MatchCollection mc=re2.匹配(行);
foreach(在mc中匹配m){
字符串a=m.ToString();
buf.Add(a+Environment.NewLine);//临时,仅用于检查输出
}
}
}
应该注意的事情很少
Path.Text+=…
。我假设这只是一个测试代码,希望它能被扔掉File.ReadLines
调用,在您的案例中,文件读取速度没有实际差异string re1 = "((?:2|1)\\d{3}(?:-|\\/)(?:(?:0[1-9])|(?:1[0-2]))(?:-|\\/)(?:(?:0[1-9])|(?:[1-2][0-9])|(?:3[0-1]))(?:T|\\s)(?:(?:[0-1][0-9])|(?:2[0-3])):(?:[0-5][0-9]):(?:[0-5][0-9]))";
var buf = new List<string>();
var re2 = new Regex(re1, RegexOptions.Compiled);
FolderBrowserDialog fbd = new FolderBrowserDialog();
DialogResult result = fbd.ShowDialog();
foreach (string file in System.IO.Directory.GetFiles(fbd.SelectedPath)) {
foreach (var line in File.ReadLines(file)) {
if ((indx = line.IndexOf('-')) == -1 || line.IndexOf(':', indx + 1) == -1)
continue;
MatchCollection mc = re2.Matches(line);
foreach (Match m in mc) {
string a = m.ToString();
buf.Add(a + Environment.NewLine); //Temporary, Just to check the output
}
}
}
string re1=“((?:2 | 1)\\d{3}(?:-\ \/)(?:(?:0[1-9])(?:-\/)(?:-\/)(?:(?:0[1-9])(?:[1-2][0-9])(?:3[0-1])(?:[0-1])(?:[0-1])(?:[0-1])(?:[0-9])(?:[0-5])(?:[0-9]);
var buf=新列表();
var re2=新的正则表达式(re1,RegexOptions.Compiled);
FolderBrowserDialog fbd=新建FolderBrowserDialog();
DialogResult结果=fbd.ShowDialog();
foreach(System.IO.Directory.GetFiles(fbd.SelectedPath)中的字符串文件){
foreach(文件中的var行。ReadLines(文件)){
如果((indx=line.IndexOf('-'))=-1 | | line.IndexOf(':'),indx+1)=-1)
继续;
MatchCollection mc=re2.匹配(行);
foreach(在mc中匹配m){
字符串a=m.ToString();
buf.Add(a+Environment.NewLine);//临时,仅用于检查输出
}
}
}
您的“路径”调试可能连接了大量字符串。将其更改为StringBuilder而不是+=连接,以查看这是否是内存问题的原因
您是否已经找到了另一种方法?您的“路径”调试可能正在连接大量的字符串。将其更改为StringBuilder而不是+=连接,以查看这是否是内存问题的原因
你有没有考虑过另一种方法?什么样的windows(Vista/7/8/10)、verision(32/64位)和RAM大小测试:windows 10 64位、4 GB RAM、Core i5Mayby你会在本文中找到解决方案:。@Michal也尝试过,但其相同的.NET Framework对对象大小有2 GB的硬限制,减去框架本身消耗的开销。是否有可能将这个大日志文件拆分为几个较小的文件?windows(Vista/7/8/10)、verision(32/64位)和RAM的大小测试:windows 10 64位、4 GB RAM、Core i5Mayby您将在本文中找到解决方案:。@Michal也尝试过,但其相同的.NET Framework对对象大小有2 GB的硬限制,减去框架本身消耗的开销。是否有可能将这个大日志文件拆分为几个小日志文件?