C# 寻找在c中快速读取和搜索文件的方法#_C#_File_Text_Io

C# 寻找在c中快速读取和搜索文件的方法#

c# file text io

C# 寻找在c中快速读取和搜索文件的方法#,c#,file,text,io,C#,File,Text,Io,我有100Mb的文本文件，我需要检查每一行的特殊单词。我正在寻找快速的方法来做这件事因此，我将文件分为10： public void ParseTheFile(BackgroundWorker bg) { Lines = File.ReadAllLines(FilePath); this.size = Lines.Length; chankSise=size/10; reports reportInst = new

我有100Mb的文本文件，我需要检查每一行的特殊单词。我正在寻找快速的方法来做这件事

因此，我将文件分为10：

public void ParseTheFile(BackgroundWorker bg)
    {

        Lines = File.ReadAllLines(FilePath);
        this.size = Lines.Length;
        chankSise=size/10;

        reports reportInst = new reports(bg,size);

        ParserThread [] ParserthreadArray = new ParserThread[10];

        for (int i = 0; i <ParserthreadArray.Length; i++)
        {
            ParserthreadArray[i] = new ParserThread((reportInst));
            ParserthreadArray[i].Init(SubArray(Lines,i * chankSise, chankSise), OutputPath);

        }

        Thread oThread0 = new Thread(ParserthreadArray[0].run);
        oThread0.IsBackground = true;
        Thread oThread1 = new Thread(ParserthreadArray[1].run);
        oThread1.IsBackground = true;
        Thread oThread2 = new Thread(ParserthreadArray[2].run);
        oThread2.IsBackground = true;
        Thread oThread3 = new Thread(ParserthreadArray[3].run);
        oThread3.IsBackground = true;
        Thread oThread4 = new Thread(ParserthreadArray[4].run);
        oThread4.IsBackground = true;
        Thread oThread5 = new Thread(ParserthreadArray[5].run);
        oThread5.IsBackground = true;
        Thread oThread6 = new Thread(ParserthreadArray[6].run);
        oThread6.IsBackground = true;
        Thread oThread7 = new Thread(ParserthreadArray[7].run);
        oThread7.IsBackground = true;
        Thread oThread8 = new Thread(ParserthreadArray[8].run);
        oThread8.IsBackground = true;
        Thread oThread9 = new Thread(ParserthreadArray[9].run);
        oThread9.IsBackground = true;

        oThread0.Start();
        oThread1.Start();
        oThread2.Start();
        oThread3.Start();
        oThread4.Start();
        oThread5.Start();
        oThread6.Start();
        oThread7.Start();
        oThread8.Start();
        oThread9.Start();

        oThread0.Join();
        oThread1.Join();
        oThread2.Join();
        oThread3.Join();
        oThread4.Join();
        oThread5.Join();
        oThread6.Join();
        oThread7.Join();
        oThread8.Join();
        oThread9.Join();

这是子阵列方法：

public string [] SubArray(string [] data, int index, int length)
    {
        string [] result = new string[length];
        Array.Copy(data, index, result, 0, length);
        return result;
    }

每个线程都执行以下操作：

 public void run()
    {

        if (!System.IO.Directory.Exists(OutputPath))
        {
            System.IO.Directory.CreateDirectory(OutputPath);
            DirectoryInfo dir = new DirectoryInfo(OutputPath);
            dir.Attributes |= FileAttributes.Hidden;
        }



        this.size = Lines.Length;
        foreach (string line in Lines)
        {



            bgReports.sendreport(allreadychecked);

            allreadychecked++;
            hadHandlerOrEngine = false;
            words = line.Split(' ');
            if (words.Length>4)
            {
                for (int i = 5; i < words.Length; i++)
                {
                    if (words[i] == "Handler" | words[i] == "Engine")
                    {

                        hadHandlerOrEngine = true;
                        string num = words[1 + i];
                        int realnum = int.Parse(num[0].ToString());
                        cuurentEngine = (realnum);
                        if (engineArry[realnum] == false)
                        {
                            File.Create(OutputPath + "/" + realnum + ".txt").Close();
                            engineArry[realnum] = true;

                        }
                        TextWriter tw = new StreamWriter(OutputPath + "/" + realnum + ".txt", true);
                        tw.WriteLine(line);
                        tw.Close();

                        break;
                    }
                }

            }

            if (hadHandlerOrEngine == false)
            {
                if (engineArry[cuurentEngine] == true)
                {
                    TextWriter tw = new StreamWriter(OutputPath + "/" + cuurentEngine + ".txt", true);
                    tw.WriteLine(line);
                    tw.Close();
                }

            }

        }

public void run（）
{
如果（！System.IO.Directory.Exists（OutputPath））
{
System.IO.Directory.CreateDirectory（OutputPath）；
DirectoryInfo dir=新的DirectoryInfo（OutputPath）；
dir.Attributes |=FileAttributes.Hidden；
}
this.size=线条.长度；
foreach（行中的字符串行）
{
bgReports.sendreport（所有已检查）；
所有就绪检查++；
hadHandlerOrEngine=假；
文字=行分割（“”）；
如果（字数长度>4）
{
for（int i=5；i


我的问题是，有没有办法让它运行得更快？你可能是IO绑定的，所以我猜多个线程不会有多大帮助。（很可能你的程序大部分时间都花在这里：Lines=File.ReadAllLines（FilePath）；而实际解析的时间不多。不过，你应该衡量一下。）事实上，子数组拆分可能比将整个过程传递给单个解析器线程要慢
我会查看MemoryMappedFile（如果这是.NET4）不必复制所有源数据，这将对IO有所帮助。
您还没有显示您的Init
方法，但目前看起来您的每个线程实际上都在检查所有行。此外，看起来所有这些线程可能都在尝试写入相同的文件，而不是在exc中这样做eption安全方式（使用使用语句）
编辑：好的，现在我们可以看到Init
，但是我们看不到子数组
。大概它只是复制了数组的一块
如果你一开始就避免使用线程，那么这会有多慢？它肯定太慢了吗？你的性能目标是什么？不过，使用10个线程似乎不太可能有帮助，因为在这一点上，它完全是内存/CPU受限的。（您还应该尽量避免在启动所有线程时重复这么多代码——为什么不为此使用集合？）我想推荐一些可能有用的东西。正如有人说的，如果你分配多线程读取你的文件是没有意义的，因为这更多的是I/O
活动，在这种情况下，这些活动在OS FileManager
中排队。但是你肯定可以对任何可用的I/O请求异步I/O
要处理的完成线程

现在，在处理文件时，我建议您使用内存映射文件
。内存映射文件非常适合于需要重复/单独访问较大文件的任意块文件（视图）的场景。在您的场景中，如果块到达/处理顺序错误，内存映射文件可以帮助您拆分/组装文件。
目前我没有现成的示例。请看下面的文章。？也可以有两个线程：一个用于读取，一个用于写入。将ReadAllLines更改为按小部分循环读取，可以在不等待文件完全读取的情况下开始搜索。如您所见，我附加了Init方法，并将其拆分这里每个线程的文件：`for（inti=0；i@MoShe:这有多快，需要多快？
 public void run()
    {

        if (!System.IO.Directory.Exists(OutputPath))
        {
            System.IO.Directory.CreateDirectory(OutputPath);
            DirectoryInfo dir = new DirectoryInfo(OutputPath);
            dir.Attributes |= FileAttributes.Hidden;
        }



        this.size = Lines.Length;
        foreach (string line in Lines)
        {



            bgReports.sendreport(allreadychecked);

            allreadychecked++;
            hadHandlerOrEngine = false;
            words = line.Split(' ');
            if (words.Length>4)
            {
                for (int i = 5; i < words.Length; i++)
                {
                    if (words[i] == "Handler" | words[i] == "Engine")
                    {

                        hadHandlerOrEngine = true;
                        string num = words[1 + i];
                        int realnum = int.Parse(num[0].ToString());
                        cuurentEngine = (realnum);
                        if (engineArry[realnum] == false)
                        {
                            File.Create(OutputPath + "/" + realnum + ".txt").Close();
                            engineArry[realnum] = true;

                        }
                        TextWriter tw = new StreamWriter(OutputPath + "/" + realnum + ".txt", true);
                        tw.WriteLine(line);
                        tw.Close();

                        break;
                    }
                }

            }

            if (hadHandlerOrEngine == false)
            {
                if (engineArry[cuurentEngine] == true)
                {
                    TextWriter tw = new StreamWriter(OutputPath + "/" + cuurentEngine + ".txt", true);
                    tw.WriteLine(line);
                    tw.Close();
                }

            }

        }