C# 如何有效地交叉引用2个文本文件?|改进我的代码
下面是我的代码的功能概述:C# 如何有效地交叉引用2个文本文件?|改进我的代码,c#,arrays,visual-studio-2010,list,text-files,C#,Arrays,Visual Studio 2010,List,Text Files,下面是我的代码的功能概述: 读取具有150k行的TextFileA 读取TextFileB,它有150k行,是TextFileA的交叉引用列表 。拆分两个文本文件并匹配指定的元素 最后,输出第三个文本文件,其中包含TextFileA和TextFileB的值 下面的代码运行良好,直到大约13000行输入,然后程序变得异常缓慢 有人能解释一下为什么程序会以指数级的速度变慢,以及我如何改进这段代码吗?谢谢 private void BT_Xref_Click(object sender, Even
- 读取具有150k行的TextFileA李>
- 读取TextFileB,它有150k行,是TextFileA的交叉引用列表
- 。拆分两个文本文件并匹配指定的元素李>
- 最后,输出第三个文本文件,其中包含TextFileA和TextFileB的值李>
private void BT_Xref_Click(object sender, EventArgs e)
{
//grabs file path from text box
string ManifestPath = TB_Manifest.Text;
//grabs parent directory from file path
string directoryName = Path.GetDirectoryName(ManifestPath);
//creates a new folder for the final output text file
string pathString = Path.Combine(directoryName, "Final Index");
Directory.CreateDirectory(pathString);
//list for matching text lines which will eventually be output to the final text file
List<string> NewData = new List<string>();
//initializes StreamReader for the first text file
StreamReader ManifestReader = new StreamReader(ManifestPath);
String[] ManifestArray = File.ReadAllLines(ManifestPath);
List<string> RemoveManifest = new List<string>(ManifestArray);
//initializes StreamReader for the second text file
StreamReader OutputReader = new StreamReader(TB_Complete.Text);
String[] OutputArray = File.ReadAllLines(TB_Complete.Text);
List<string> RemoveOutput = new List<string>(OutputArray);
//initializes a count which decides at what point a text file should be created
int shortcount = 0;
//.ReadLine is initialized to ignore the first line in both text files
string ManifestLine = ManifestReader.ReadLine();
string OutputLine = OutputReader.ReadLine();
foreach (string mfile in ManifestArray)
{
ManifestLine = ManifestReader.ReadLine();
string ManifestElement = ManifestLine.Split(',')[6];
string ManifestElement2 = ManifestLine.Split(',')[5];
//value to be retreived and output to final text file
string ManifestElementDate = ManifestElement2.Replace("/", "-");
//value to be compared with the other text file
string ManifestNoExt = Regex.Replace(ManifestElement, ("(\\.\\w+$)"),"");
//resets OutpuReader reader to ensure no lines are being skipped
OutputReader.BaseStream.Position = 0;
//counting the mfile position in the ManifestArray
//int removeIndex = Array.IndexOf(ManifestArray, mfile);
//remove by resising the array
//Array.Resize(ref ManifestArray, ManifestArray.Length - 1);
foreach (string ofile in OutputArray)
{
OutputLine = OutputReader.ReadLine();
//value to be comapred with other text file
string OutputElement = OutputLine.Split('|')[2];
//if values equal then add the specified line of text to the list.
if (ManifestNoExt.Equals(OutputElement))
{
NewData.Add(OutputLine + "|" + ManifestElementDate);
RemoveManifest.RemoveAll(item => item == ManifestLine);
if (NewData.Count == 1000)
{
//if youve reached the count then output files into a new text file
shortcount = shortcount + 1;
File.WriteAllLines(pathString + "\\test" + shortcount + ".txt", NewData);
NewData.Clear();
}
break;
}
}
}
//once all line of text have been searched combine all text files in directory
shortcount = shortcount + 1;
File.WriteAllLines(pathString + "\\test" + shortcount + ".txt", NewData);
String[] SplitTextFiles = Directory.GetFiles(pathString, "*.*", SearchOption.AllDirectories);
using (var FinalIndexFile = File.Create(pathString + "\\FinalIndex.txt"))
{
foreach (var file in SplitTextFiles)
{
using (var input = File.OpenRead(file))
{
input.CopyTo(FinalIndexFile);
}
File.Delete(file);
}
}
//File.WriteAllLines("\\test.txt", Directory.EnumerateFiles(pathString, @"*.txt").SelectMany(file => File.ReadLines(file)));
}
private void BT\u Xref\u单击(对象发送方,事件参数e)
{
//从文本框中获取文件路径
字符串ManifestPath=TB_Manifest.Text;
//从文件路径获取父目录
字符串directoryName=Path.GetDirectoryName(ManifestPath);
//为最终输出文本文件创建新文件夹
string Path string=Path.Combine(directoryName,“最终索引”);
CreateDirectory(路径字符串);
//最终将输出到最终文本文件的匹配文本行的列表
List NewData=新列表();
//初始化第一个文本文件的StreamReader
StreamReader ManifestReader=新的StreamReader(ManifestPath);
字符串[]ManifestArray=File.ReadAllLines(ManifestPath);
List RemoveManifest=新列表(ManifestArray);
//初始化第二个文本文件的StreamReader
StreamReader OutputReader=新的StreamReader(TB_Complete.Text);
String[]OutputArray=File.ReadAllLines(TB_Complete.Text);
List RemoveOutput=新列表(OutputArray);
//初始化一个计数,该计数决定应在什么点创建文本文件
int shortcount=0;
//.ReadLine初始化为忽略两个文本文件中的第一行
字符串ManifestLine=ManifestReader.ReadLine();
字符串OutputLine=OutputReader.ReadLine();
foreach(ManifestArray中的字符串mfile)
{
ManifestLine=ManifestReader.ReadLine();
字符串ManifestElement=ManifestLine.Split(',')[6];
字符串ManifestElement2=ManifestLine.Split(',')[5];
//要检索并输出到最终文本文件的值
字符串ManifestElementDate=ManifestElement2.Replace(“/”,“-”);
//要与其他文本文件进行比较的值
字符串ManifestNoExt=Regex.Replace(ManifestElement,(“(\\.\\w+$)”)”)”;
//重置OutpureReader读卡器以确保不跳过任何行
OutputReader.BaseStream.Position=0;
//计算mfile在数组中的位置
//int removeIndex=Array.IndexOf(ManifestArray,mfile);
//通过调整阵列的大小来删除
//Resize(ref ManifestArray,ManifestArray.Length-1);
foreach(输出阵列中的文件字符串)
{
OutputLine=OutputReader.ReadLine();
//要与其他文本文件兼容的值
字符串OutputElement=OutputLine.Split(“|”)[2];
//如果值相等,则将指定的文本行添加到列表中。
if(ManifestNoExt.Equals(OutputElement))
{
添加(OutputLine+“|”+ManifestElementDate);
RemoveManifest.RemoveAll(item=>item==ManifestLine);
如果(NewData.Count==1000)
{
//如果已达到计数,则将文件输出到新的文本文件中
短计数=短计数+1;
File.writeAllines(路径字符串+“\\test”+shortcount+”.txt),NewData);
NewData.Clear();
}
打破
}
}
}
//搜索完所有文本行后,合并目录中的所有文本文件
短计数=短计数+1;
File.writeAllines(路径字符串+“\\test”+shortcount+”.txt),NewData);
String[]SplitTextFiles=Directory.GetFiles(路径字符串,“***”,SearchOption.AllDirectories);
使用(var FinalIndexFile=File.Create(pathString+“\\FinalIndex.txt”))
{
foreach(SplitTextFiles中的var文件)
{
使用(var input=File.OpenRead(File))
{
input.CopyTo(FinalIndexFile);
}
文件。删除(文件);
}
}
//File.writeAllines(“\\test.txt”,Directory.EnumerateFiles(pathString,@“*.txt”)。SelectMany(File=>File.ReadLines(File));
}
这里有一个O(nm)算法,假设n和m相同,它实际上是一个O(n^2)。这不是很好,这就是为什么它会慢到爬行的原因(对于每个文件中的150k行,您将看到内部循环的22500000次迭代。不完全确定您的代码试图做什么,但根据条件if(ManifestNoExt.Equals(OutputElement))
,我认为您可以大幅降低复杂性,如下所示:
读入TextFileA,根据ManifestNoExt作为键和mFile作为值将值存储到字典中
接下来读入TextFileB并迭代B中的所有行,然后在构建的字典中进行查找
这将为您提供一个快速的算法,即O(n)+O(m)
另外,我不知道为什么要读取整个文件,然后在循环中再次读取它们(ManifestArray和OutputArray的内容与文件相同)。这当然也是导致速度减慢的原因,因为最终会影响文件系统
这一想法的一个完全未经测试的版本:
private void BT_Xref_Click(object sender, EventArgs e)
{
//grabs file path from text box
string ManifestPath = TB_Manifest.Text;
//grabs parent directory from file path
string directoryName = Path.GetDirectoryName(ManifestPath);
//creates a new folder for the final output text file
string pathString = Path.Combine(directoryName, "Final Index");
Directory.CreateDirectory(pathString);
//list for matching text lines which will eventually be output to the final text file
List<string> NewData = new List<string>();
String[] ManifestArray = File.ReadAllLines(ManifestPath);
List<string> RemoveManifest = new List<string>(ManifestArray);
String[] OutputArray = File.ReadAllLines(TB_Complete.Text);
List<string> RemoveOutput = new List<string>(OutputArray);
//initializes a count which decides at what point a text file should be created
int shortcount = 0;
//.ReadLine is initialized to ignore the first line in both text files
string ManifestLine = ManifestReader.ReadLine();
string OutputLine = OutputReader.ReadLine();
Dictionary<string, Tuple<string, string>> ManifestMap = new Dictionary<string, Tuple<string, string>>();
foreach (string mfile in ManifestArray.Skip(1))
{
string ManifestLine = mfile;
string ManifestElement = ManifestLine.Split(',')[6];
string ManifestElement2 = ManifestLine.Split(',')[5];
//value to be retreived and output to final text file
string ManifestElementDate = ManifestElement2.Replace("/", "-");
//value to be compared with the other text file
string ManifestNoExt = Regex.Replace(ManifestElement, ("(\\.\\w+$)"),"");
ManifestMap.Add(ManifestNoExt, Tuple.Create(ManifestElementDate, ManifestLine));
//counting the mfile position in the ManifestArray
//int removeIndex = Array.IndexOf(ManifestArray, mfile);
//remove by resising the array
//Array.Resize(ref ManifestArray, ManifestArray.Length - 1);
}
foreach (string ofile in OutputArray.Skip(1))
{
//value to be compared with other text file
string OutputElement = OutputLine.Split('|')[2];
//if values equal then add the specified line of text to the list.
if (ManifestMap.ContainsKey(OutputElement))
{
NewData.Add(OutputLine + "|" + ManifestMap[OutputElement].Item1);
RemoveManifest.RemoveAll(item => item == ManifestMap[OutputElement].Item2);
if (NewData.Count == 1000)
{
//if youve reached the count then output files into a new text file
shortcount = shortcount + 1;
File.WriteAllLines(pathString + "\\test" + shortcount + ".txt", NewData);
NewData.Clear();
}
break;
}
}
//once all line of text have been searched combine all text files in directory
shortcount = shortcount + 1;
File.WriteAllLines(pathString + "\\test" + shortcount + ".txt", NewData);
String[] SplitTextFiles = Directory.GetFiles(pathString, "*.*", SearchOption.AllDirectories);
using (var FinalIndexFile = File.Create(pathString + "\\FinalIndex.txt"))
{
foreach (var file in SplitTextFiles)
{
using (var input = File.OpenRead(file))
{
input.CopyTo(FinalIndexFile);
}
File.Delete(file);
}
}
//File.WriteAllLines("\\test.txt", Directory.EnumerateFiles(pathString, @"*.txt").SelectMany(file => File.ReadLines(file)));
}
private void BT\u Xref\u单击(对象发送方,事件参数e)
{
//从文本框中获取文件路径
字符串ManifestPath=TB_Manifest.Text;
//gr