Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/csharp/339.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C# 如何有效地交叉引用2个文本文件?|改进我的代码_C#_Arrays_Visual Studio 2010_List_Text Files - Fatal编程技术网

C# 如何有效地交叉引用2个文本文件?|改进我的代码

C# 如何有效地交叉引用2个文本文件?|改进我的代码,c#,arrays,visual-studio-2010,list,text-files,C#,Arrays,Visual Studio 2010,List,Text Files,下面是我的代码的功能概述: 读取具有150k行的TextFileA 读取TextFileB,它有150k行,是TextFileA的交叉引用列表 。拆分两个文本文件并匹配指定的元素 最后,输出第三个文本文件,其中包含TextFileA和TextFileB的值 下面的代码运行良好,直到大约13000行输入,然后程序变得异常缓慢 有人能解释一下为什么程序会以指数级的速度变慢,以及我如何改进这段代码吗?谢谢 private void BT_Xref_Click(object sender, Even

下面是我的代码的功能概述:

  • 读取具有150k行的TextFileA
  • 读取TextFileB,它有150k行,是TextFileA的交叉引用列表
  • 。拆分两个文本文件并匹配指定的元素
  • 最后,输出第三个文本文件,其中包含TextFileA和TextFileB的值
下面的代码运行良好,直到大约13000行输入,然后程序变得异常缓慢

有人能解释一下为什么程序会以指数级的速度变慢,以及我如何改进这段代码吗?谢谢

private void BT_Xref_Click(object sender, EventArgs e)
    {
        //grabs file path from text box
        string ManifestPath = TB_Manifest.Text;
        //grabs parent directory from file path
        string directoryName = Path.GetDirectoryName(ManifestPath);
        //creates a new folder for the final output text file
        string pathString = Path.Combine(directoryName, "Final Index");
        Directory.CreateDirectory(pathString);
        //list for matching text lines which will eventually be output to the final text file
        List<string> NewData = new List<string>();

        //initializes StreamReader for the first text file
        StreamReader ManifestReader = new StreamReader(ManifestPath);
        String[] ManifestArray = File.ReadAllLines(ManifestPath);
        List<string> RemoveManifest = new List<string>(ManifestArray);
        //initializes StreamReader for the second text file
        StreamReader OutputReader = new StreamReader(TB_Complete.Text);
        String[] OutputArray = File.ReadAllLines(TB_Complete.Text);
        List<string> RemoveOutput = new List<string>(OutputArray);

        //initializes a count which decides at what point a text file should be created
        int shortcount = 0;
        //.ReadLine is initialized to ignore the first line in both text files
        string ManifestLine = ManifestReader.ReadLine();
        string OutputLine = OutputReader.ReadLine();

        foreach (string mfile in ManifestArray)
        {
            ManifestLine = ManifestReader.ReadLine();
            string ManifestElement = ManifestLine.Split(',')[6];
            string ManifestElement2 = ManifestLine.Split(',')[5];
            //value to be retreived and output to final text file
            string ManifestElementDate = ManifestElement2.Replace("/", "-");
            //value to be compared with the other text file
            string ManifestNoExt = Regex.Replace(ManifestElement, ("(\\.\\w+$)"),"");
            //resets OutpuReader reader to ensure no lines are being skipped
            OutputReader.BaseStream.Position = 0;

            //counting the mfile position in the ManifestArray
            //int removeIndex = Array.IndexOf(ManifestArray, mfile);
            //remove by resising the array
            //Array.Resize(ref ManifestArray, ManifestArray.Length - 1);

            foreach (string ofile in OutputArray)
            {
                OutputLine = OutputReader.ReadLine();
                //value to be comapred with other text file
                string OutputElement = OutputLine.Split('|')[2];
                //if values equal then add the specified line of text to the list.
                if (ManifestNoExt.Equals(OutputElement))
                {
                    NewData.Add(OutputLine + "|" + ManifestElementDate);
                    RemoveManifest.RemoveAll(item => item == ManifestLine);

                    if (NewData.Count == 1000)
                    {
                        //if youve reached the count then output files into a new text file
                        shortcount = shortcount + 1;
                        File.WriteAllLines(pathString + "\\test" + shortcount + ".txt", NewData);
                        NewData.Clear();
                    }
                    break;
                }
            }
        }
        //once all line of text have been searched combine all text files in directory
        shortcount = shortcount + 1;
        File.WriteAllLines(pathString + "\\test" + shortcount + ".txt", NewData);
        String[] SplitTextFiles = Directory.GetFiles(pathString, "*.*", SearchOption.AllDirectories);
        using (var FinalIndexFile = File.Create(pathString + "\\FinalIndex.txt"))
        {
            foreach (var file in SplitTextFiles)
            {
                using (var input = File.OpenRead(file))
                {
                    input.CopyTo(FinalIndexFile);
                }
                File.Delete(file);
            }
        }
        //File.WriteAllLines("\\test.txt", Directory.EnumerateFiles(pathString, @"*.txt").SelectMany(file => File.ReadLines(file)));
    }
private void BT\u Xref\u单击(对象发送方,事件参数e)
{
//从文本框中获取文件路径
字符串ManifestPath=TB_Manifest.Text;
//从文件路径获取父目录
字符串directoryName=Path.GetDirectoryName(ManifestPath);
//为最终输出文本文件创建新文件夹
string Path string=Path.Combine(directoryName,“最终索引”);
CreateDirectory(路径字符串);
//最终将输出到最终文本文件的匹配文本行的列表
List NewData=新列表();
//初始化第一个文本文件的StreamReader
StreamReader ManifestReader=新的StreamReader(ManifestPath);
字符串[]ManifestArray=File.ReadAllLines(ManifestPath);
List RemoveManifest=新列表(ManifestArray);
//初始化第二个文本文件的StreamReader
StreamReader OutputReader=新的StreamReader(TB_Complete.Text);
String[]OutputArray=File.ReadAllLines(TB_Complete.Text);
List RemoveOutput=新列表(OutputArray);
//初始化一个计数,该计数决定应在什么点创建文本文件
int shortcount=0;
//.ReadLine初始化为忽略两个文本文件中的第一行
字符串ManifestLine=ManifestReader.ReadLine();
字符串OutputLine=OutputReader.ReadLine();
foreach(ManifestArray中的字符串mfile)
{
ManifestLine=ManifestReader.ReadLine();
字符串ManifestElement=ManifestLine.Split(',')[6];
字符串ManifestElement2=ManifestLine.Split(',')[5];
//要检索并输出到最终文本文件的值
字符串ManifestElementDate=ManifestElement2.Replace(“/”,“-”);
//要与其他文本文件进行比较的值
字符串ManifestNoExt=Regex.Replace(ManifestElement,(“(\\.\\w+$)”)”)”;
//重置OutpureReader读卡器以确保不跳过任何行
OutputReader.BaseStream.Position=0;
//计算mfile在数组中的位置
//int removeIndex=Array.IndexOf(ManifestArray,mfile);
//通过调整阵列的大小来删除
//Resize(ref ManifestArray,ManifestArray.Length-1);
foreach(输出阵列中的文件字符串)
{
OutputLine=OutputReader.ReadLine();
//要与其他文本文件兼容的值
字符串OutputElement=OutputLine.Split(“|”)[2];
//如果值相等,则将指定的文本行添加到列表中。
if(ManifestNoExt.Equals(OutputElement))
{
添加(OutputLine+“|”+ManifestElementDate);
RemoveManifest.RemoveAll(item=>item==ManifestLine);
如果(NewData.Count==1000)
{
//如果已达到计数,则将文件输出到新的文本文件中
短计数=短计数+1;
File.writeAllines(路径字符串+“\\test”+shortcount+”.txt),NewData);
NewData.Clear();
}
打破
}
}
}
//搜索完所有文本行后,合并目录中的所有文本文件
短计数=短计数+1;
File.writeAllines(路径字符串+“\\test”+shortcount+”.txt),NewData);
String[]SplitTextFiles=Directory.GetFiles(路径字符串,“***”,SearchOption.AllDirectories);
使用(var FinalIndexFile=File.Create(pathString+“\\FinalIndex.txt”))
{
foreach(SplitTextFiles中的var文件)
{
使用(var input=File.OpenRead(File))
{
input.CopyTo(FinalIndexFile);
}
文件。删除(文件);
}
}
//File.writeAllines(“\\test.txt”,Directory.EnumerateFiles(pathString,@“*.txt”)。SelectMany(File=>File.ReadLines(File));
}
这里有一个O(nm)算法,假设n和m相同,它实际上是一个O(n^2)。这不是很好,这就是为什么它会慢到爬行的原因(对于每个文件中的150k行,您将看到内部循环的22500000次迭代。不完全确定您的代码试图做什么,但根据条件
if(ManifestNoExt.Equals(OutputElement))
,我认为您可以大幅降低复杂性,如下所示:

读入TextFileA,根据ManifestNoExt作为键和mFile作为值将值存储到字典中

接下来读入TextFileB并迭代B中的所有行,然后在构建的字典中进行查找

这将为您提供一个快速的算法,即O(n)+O(m)

另外,我不知道为什么要读取整个文件,然后在循环中再次读取它们(ManifestArray和OutputArray的内容与文件相同)。这当然也是导致速度减慢的原因,因为最终会影响文件系统

这一想法的一个完全未经测试的版本:

private void BT_Xref_Click(object sender, EventArgs e)
{
    //grabs file path from text box
    string ManifestPath = TB_Manifest.Text;
    //grabs parent directory from file path
    string directoryName = Path.GetDirectoryName(ManifestPath);
    //creates a new folder for the final output text file
    string pathString = Path.Combine(directoryName, "Final Index");
    Directory.CreateDirectory(pathString);
    //list for matching text lines which will eventually be output to the final text file
    List<string> NewData = new List<string>();

    String[] ManifestArray = File.ReadAllLines(ManifestPath);
    List<string> RemoveManifest = new List<string>(ManifestArray);
    String[] OutputArray = File.ReadAllLines(TB_Complete.Text);
    List<string> RemoveOutput = new List<string>(OutputArray);

    //initializes a count which decides at what point a text file should be created
    int shortcount = 0;
    //.ReadLine is initialized to ignore the first line in both text files
    string ManifestLine = ManifestReader.ReadLine();
    string OutputLine = OutputReader.ReadLine();

    Dictionary<string, Tuple<string, string>> ManifestMap = new Dictionary<string, Tuple<string, string>>();

    foreach (string mfile in ManifestArray.Skip(1))
    {
        string ManifestLine = mfile;
        string ManifestElement = ManifestLine.Split(',')[6];
        string ManifestElement2 = ManifestLine.Split(',')[5];
        //value to be retreived and output to final text file
        string ManifestElementDate = ManifestElement2.Replace("/", "-");
        //value to be compared with the other text file
        string ManifestNoExt = Regex.Replace(ManifestElement, ("(\\.\\w+$)"),"");

        ManifestMap.Add(ManifestNoExt, Tuple.Create(ManifestElementDate, ManifestLine));

        //counting the mfile position in the ManifestArray
        //int removeIndex = Array.IndexOf(ManifestArray, mfile);
        //remove by resising the array
        //Array.Resize(ref ManifestArray, ManifestArray.Length - 1);
    }

    foreach (string ofile in OutputArray.Skip(1))
    {
        //value to be compared with other text file
        string OutputElement = OutputLine.Split('|')[2];
        //if values equal then add the specified line of text to the list.
        if (ManifestMap.ContainsKey(OutputElement))
        {
            NewData.Add(OutputLine + "|" + ManifestMap[OutputElement].Item1);
            RemoveManifest.RemoveAll(item => item == ManifestMap[OutputElement].Item2);

            if (NewData.Count == 1000)
            {
                //if youve reached the count then output files into a new text file
                shortcount = shortcount + 1;
                File.WriteAllLines(pathString + "\\test" + shortcount + ".txt", NewData);
                NewData.Clear();
            }
            break;
        }
    }

    //once all line of text have been searched combine all text files in directory
    shortcount = shortcount + 1;
    File.WriteAllLines(pathString + "\\test" + shortcount + ".txt", NewData);
    String[] SplitTextFiles = Directory.GetFiles(pathString, "*.*", SearchOption.AllDirectories);
    using (var FinalIndexFile = File.Create(pathString + "\\FinalIndex.txt"))
    {
        foreach (var file in SplitTextFiles)
        {
            using (var input = File.OpenRead(file))
            {
                input.CopyTo(FinalIndexFile);
            }
            File.Delete(file);
        }
    }
    //File.WriteAllLines("\\test.txt", Directory.EnumerateFiles(pathString, @"*.txt").SelectMany(file => File.ReadLines(file)));
}
private void BT\u Xref\u单击(对象发送方,事件参数e)
{
//从文本框中获取文件路径
字符串ManifestPath=TB_Manifest.Text;
//gr