C# 如何有效地交叉引用2个文本文件？|改进我的代码_C#_Arrays_Visual Studio 2010_List_Text Files

C# 如何有效地交叉引用2个文本文件？|改进我的代码

c# arrays visual-studio-2010 list

C# 如何有效地交叉引用2个文本文件？|改进我的代码,c#,arrays,visual-studio-2010,list,text-files,C#,Arrays,Visual Studio 2010,List,Text Files,下面是我的代码的功能概述：读取具有150k行的TextFileA 读取TextFileB，它有150k行，是TextFileA的交叉引用列表。拆分两个文本文件并匹配指定的元素最后，输出第三个文本文件，其中包含TextFileA和TextFileB的值下面的代码运行良好，直到大约13000行输入，然后程序变得异常缓慢有人能解释一下为什么程序会以指数级的速度变慢，以及我如何改进这段代码吗？谢谢 private void BT_Xref_Click(object sender, Even

下面是我的代码的功能概述：

读取具有150k行的TextFileA
读取TextFileB，它有150k行，是TextFileA的交叉引用列表
。拆分两个文本文件并匹配指定的元素
最后，输出第三个文本文件，其中包含TextFileA和TextFileB的值

下面的代码运行良好，直到大约13000行输入，然后程序变得异常缓慢

有人能解释一下为什么程序会以指数级的速度变慢，以及我如何改进这段代码吗？谢谢

private void BT_Xref_Click(object sender, EventArgs e)
    {
        //grabs file path from text box
        string ManifestPath = TB_Manifest.Text;
        //grabs parent directory from file path
        string directoryName = Path.GetDirectoryName(ManifestPath);
        //creates a new folder for the final output text file
        string pathString = Path.Combine(directoryName, "Final Index");
        Directory.CreateDirectory(pathString);
        //list for matching text lines which will eventually be output to the final text file
        List<string> NewData = new List<string>();

        //initializes StreamReader for the first text file
        StreamReader ManifestReader = new StreamReader(ManifestPath);
        String[] ManifestArray = File.ReadAllLines(ManifestPath);
        List<string> RemoveManifest = new List<string>(ManifestArray);
        //initializes StreamReader for the second text file
        StreamReader OutputReader = new StreamReader(TB_Complete.Text);
        String[] OutputArray = File.ReadAllLines(TB_Complete.Text);
        List<string> RemoveOutput = new List<string>(OutputArray);

        //initializes a count which decides at what point a text file should be created
        int shortcount = 0;
        //.ReadLine is initialized to ignore the first line in both text files
        string ManifestLine = ManifestReader.ReadLine();
        string OutputLine = OutputReader.ReadLine();

        foreach (string mfile in ManifestArray)
        {
            ManifestLine = ManifestReader.ReadLine();
            string ManifestElement = ManifestLine.Split(',')[6];
            string ManifestElement2 = ManifestLine.Split(',')[5];
            //value to be retreived and output to final text file
            string ManifestElementDate = ManifestElement2.Replace("/", "-");
            //value to be compared with the other text file
            string ManifestNoExt = Regex.Replace(ManifestElement, ("(\\.\\w+$)"),"");
            //resets OutpuReader reader to ensure no lines are being skipped
            OutputReader.BaseStream.Position = 0;

            //counting the mfile position in the ManifestArray
            //int removeIndex = Array.IndexOf(ManifestArray, mfile);
            //remove by resising the array
            //Array.Resize(ref ManifestArray, ManifestArray.Length - 1);

            foreach (string ofile in OutputArray)
            {
                OutputLine = OutputReader.ReadLine();
                //value to be comapred with other text file
                string OutputElement = OutputLine.Split('|')[2];
                //if values equal then add the specified line of text to the list.
                if (ManifestNoExt.Equals(OutputElement))
                {
                    NewData.Add(OutputLine + "|" + ManifestElementDate);
                    RemoveManifest.RemoveAll(item => item == ManifestLine);

                    if (NewData.Count == 1000)
                    {
                        //if youve reached the count then output files into a new text file
                        shortcount = shortcount + 1;
                        File.WriteAllLines(pathString + "\\test" + shortcount + ".txt", NewData);
                        NewData.Clear();
                    }
                    break;
                }
            }
        }
        //once all line of text have been searched combine all text files in directory
        shortcount = shortcount + 1;
        File.WriteAllLines(pathString + "\\test" + shortcount + ".txt", NewData);
        String[] SplitTextFiles = Directory.GetFiles(pathString, "*.*", SearchOption.AllDirectories);
        using (var FinalIndexFile = File.Create(pathString + "\\FinalIndex.txt"))
        {
            foreach (var file in SplitTextFiles)
            {
                using (var input = File.OpenRead(file))
                {
                    input.CopyTo(FinalIndexFile);
                }
                File.Delete(file);
            }
        }
        //File.WriteAllLines("\\test.txt", Directory.EnumerateFiles(pathString, @"*.txt").SelectMany(file => File.ReadLines(file)));
    }

private void BT\u Xref\u单击（对象发送方，事件参数e）
{
//从文本框中获取文件路径
字符串ManifestPath=TB_Manifest.Text；
//从文件路径获取父目录
字符串directoryName=Path.GetDirectoryName（ManifestPath）；
//为最终输出文本文件创建新文件夹
string Path string=Path.Combine（directoryName，“最终索引”）；
CreateDirectory（路径字符串）；
//最终将输出到最终文本文件的匹配文本行的列表
List NewData=新列表（）；
//初始化第一个文本文件的StreamReader
StreamReader ManifestReader=新的StreamReader（ManifestPath）；
字符串[]ManifestArray=File.ReadAllLines（ManifestPath）；
List RemoveManifest=新列表（ManifestArray）；
//初始化第二个文本文件的StreamReader
StreamReader OutputReader=新的StreamReader（TB_Complete.Text）；
String[]OutputArray=File.ReadAllLines（TB_Complete.Text）；
List RemoveOutput=新列表（OutputArray）；
//初始化一个计数，该计数决定应在什么点创建文本文件
int shortcount=0；
//.ReadLine初始化为忽略两个文本文件中的第一行
字符串ManifestLine=ManifestReader.ReadLine（）；
字符串OutputLine=OutputReader.ReadLine（）；
foreach（ManifestArray中的字符串mfile）
{
ManifestLine=ManifestReader.ReadLine（）；
字符串ManifestElement=ManifestLine.Split（'，'）[6]；
字符串ManifestElement2=ManifestLine.Split（'，'）[5]；
//要检索并输出到最终文本文件的值
字符串ManifestElementDate=ManifestElement2.Replace（“/”，“-”）；
//要与其他文本文件进行比较的值
字符串ManifestNoExt=Regex.Replace（ManifestElement，（“（\\.\\w+$）”）”）”；
//重置OutpureReader读卡器以确保不跳过任何行
OutputReader.BaseStream.Position=0；
//计算mfile在数组中的位置
//int removeIndex=Array.IndexOf（ManifestArray，mfile）；
//通过调整阵列的大小来删除
//Resize（ref ManifestArray，ManifestArray.Length-1）；
foreach（输出阵列中的文件字符串）
{
OutputLine=OutputReader.ReadLine（）；
//要与其他文本文件兼容的值
字符串OutputElement=OutputLine.Split（“|”）[2]；
//如果值相等，则将指定的文本行添加到列表中。
if（ManifestNoExt.Equals（OutputElement））
{
添加（OutputLine+“|”+ManifestElementDate）；
RemoveManifest.RemoveAll（item=>item==ManifestLine）；
如果（NewData.Count==1000）
{
//如果已达到计数，则将文件输出到新的文本文件中
短计数=短计数+1；
File.writeAllines（路径字符串+“\\test”+shortcount+”.txt），NewData）；
NewData.Clear（）；
}
打破
}
}
}
//搜索完所有文本行后，合并目录中的所有文本文件
短计数=短计数+1；
File.writeAllines（路径字符串+“\\test”+shortcount+”.txt），NewData）；
String[]SplitTextFiles=Directory.GetFiles（路径字符串，“***”，SearchOption.AllDirectories）；
使用（var FinalIndexFile=File.Create（pathString+“\\FinalIndex.txt”））
{
foreach（SplitTextFiles中的var文件）
{
使用（var input=File.OpenRead（File））
{
input.CopyTo（FinalIndexFile）；
}
文件。删除（文件）；
}
}
//File.writeAllines（“\\test.txt”，Directory.EnumerateFiles（pathString，@“*.txt”）。SelectMany（File=>File.ReadLines（File））；
}

这里有一个O（nm）算法，假设n和m相同，它实际上是一个O（n^2）。这不是很好，这就是为什么它会慢到爬行的原因（对于每个文件中的150k行，您将看到内部循环的22500000次迭代。不完全确定您的代码试图做什么，但根据条件

if（ManifestNoExt.Equals（OutputElement））

，我认为您可以大幅降低复杂性，如下所示：

读入TextFileA，根据ManifestNoExt作为键和mFile作为值将值存储到字典中

接下来读入TextFileB并迭代B中的所有行，然后在构建的字典中进行查找

这将为您提供一个快速的算法，即O（n）+O（m）

另外，我不知道为什么要读取整个文件，然后在循环中再次读取它们（ManifestArray和OutputArray的内容与文件相同）。这当然也是导致速度减慢的原因，因为最终会影响文件系统

这一想法的一个完全未经测试的版本：

private void BT_Xref_Click(object sender, EventArgs e)
{
    //grabs file path from text box
    string ManifestPath = TB_Manifest.Text;
    //grabs parent directory from file path
    string directoryName = Path.GetDirectoryName(ManifestPath);
    //creates a new folder for the final output text file
    string pathString = Path.Combine(directoryName, "Final Index");
    Directory.CreateDirectory(pathString);
    //list for matching text lines which will eventually be output to the final text file
    List<string> NewData = new List<string>();

    String[] ManifestArray = File.ReadAllLines(ManifestPath);
    List<string> RemoveManifest = new List<string>(ManifestArray);
    String[] OutputArray = File.ReadAllLines(TB_Complete.Text);
    List<string> RemoveOutput = new List<string>(OutputArray);

    //initializes a count which decides at what point a text file should be created
    int shortcount = 0;
    //.ReadLine is initialized to ignore the first line in both text files
    string ManifestLine = ManifestReader.ReadLine();
    string OutputLine = OutputReader.ReadLine();

    Dictionary<string, Tuple<string, string>> ManifestMap = new Dictionary<string, Tuple<string, string>>();

    foreach (string mfile in ManifestArray.Skip(1))
    {
        string ManifestLine = mfile;
        string ManifestElement = ManifestLine.Split(',')[6];
        string ManifestElement2 = ManifestLine.Split(',')[5];
        //value to be retreived and output to final text file
        string ManifestElementDate = ManifestElement2.Replace("/", "-");
        //value to be compared with the other text file
        string ManifestNoExt = Regex.Replace(ManifestElement, ("(\\.\\w+$)"),"");

        ManifestMap.Add(ManifestNoExt, Tuple.Create(ManifestElementDate, ManifestLine));

        //counting the mfile position in the ManifestArray
        //int removeIndex = Array.IndexOf(ManifestArray, mfile);
        //remove by resising the array
        //Array.Resize(ref ManifestArray, ManifestArray.Length - 1);
    }

    foreach (string ofile in OutputArray.Skip(1))
    {
        //value to be compared with other text file
        string OutputElement = OutputLine.Split('|')[2];
        //if values equal then add the specified line of text to the list.
        if (ManifestMap.ContainsKey(OutputElement))
        {
            NewData.Add(OutputLine + "|" + ManifestMap[OutputElement].Item1);
            RemoveManifest.RemoveAll(item => item == ManifestMap[OutputElement].Item2);

            if (NewData.Count == 1000)
            {
                //if youve reached the count then output files into a new text file
                shortcount = shortcount + 1;
                File.WriteAllLines(pathString + "\\test" + shortcount + ".txt", NewData);
                NewData.Clear();
            }
            break;
        }
    }

    //once all line of text have been searched combine all text files in directory
    shortcount = shortcount + 1;
    File.WriteAllLines(pathString + "\\test" + shortcount + ".txt", NewData);
    String[] SplitTextFiles = Directory.GetFiles(pathString, "*.*", SearchOption.AllDirectories);
    using (var FinalIndexFile = File.Create(pathString + "\\FinalIndex.txt"))
    {
        foreach (var file in SplitTextFiles)
        {
            using (var input = File.OpenRead(file))
            {
                input.CopyTo(FinalIndexFile);
            }
            File.Delete(file);
        }
    }
    //File.WriteAllLines("\\test.txt", Directory.EnumerateFiles(pathString, @"*.txt").SelectMany(file => File.ReadLines(file)));
}

private void BT\u Xref\u单击（对象发送方，事件参数e）
{
//从文本框中获取文件路径
字符串ManifestPath=TB_Manifest.Text；
//gr