C# 在C中优化foreach循环，添加线程？_C#_Wpf_Multithreading_Math_Foreach

C# 在C中优化foreach循环，添加线程？

c# wpf multithreading math

C# 在C中优化foreach循环，添加线程？,c#,wpf,multithreading,math,foreach,C#,Wpf,Multithreading,Math,Foreach,我们制作了一个邮件过滤程序，它在测试环境中运行正常。但是当我想在一个真正的Database集合上尝试它时，我等了一个小时，可能还要等10个小时才能得到结果这是我的循环： foreach (var word in mail) { foreach (var wordInSpam in countsWordOccurenceSpam) { fo

我们制作了一个邮件过滤程序，它在测试环境中运行正常。但是当我想在一个真正的Database集合上尝试它时，我等了一个小时，可能还要等10个小时才能得到结果

这是我的循环：

foreach (var word in mail)
            {               
                foreach (var wordInSpam in countsWordOccurenceSpam)
                {
                    foreach (var wordInOk in countsWordOccurenceOk)
                    {
                        if (countsWordOccurenceOk.ContainsKey(word.Key) && countsWordOccurenceSpam.ContainsKey(word.Key))
                        {
                            if (word.Key == wordInOk.Key && word.Key == wordInSpam.Key)
                            {
                             //math
                            }
                        }
                        else if (countsWordOccurenceOk.ContainsKey(word.Key) && (!countsWordOccurenceSpam.ContainsKey(word.Key)))
                        {
                            if (word.Key == wordInOk.Key)
                            {
                             //math
                            }
                        }
                        else if (countsWordOccurenceSpam.ContainsKey(word.Key) && (!countsWordOccurenceOk.ContainsKey(word.Key)))
                        {
                            if (word.Key == wordInSpam.Key)
                            {
                            //math
                            }
                        }
                        else
                        {
                            //math
                        }
                    }
                }
            }

mail是用于检查邮件的字典，其中包含单词和每个单词的计数器，countsWordOccurenceSpam/Ok是用于检查多封邮件的字典，其中包含单词及其计数器

看起来像这样：

   if (openFileDialog.ShowDialog() == true)
    {
        foreach (string filename in openFileDialog.FileNames)
        {
            myOkMail.Add(filename);

        }
    }

    string[] okFiles = myOkMail.ToArray();


    var logFile2 = okFiles
        .SelectMany(i => System.IO.File.ReadAllLines(i)).ToList();

     countsWordOccurenceOk = okFiles
        .SelectMany(i => System.IO.File.ReadAllLines(i)
        .SelectMany(line => line.Split(new[] { ' ', ',', '.', '?', '!', '.' }, StringSplitOptions.RemoveEmptyEntries))
        .Distinct())
        .GroupBy(word => word)
        .ToDictionary(g => g.Key, g => g.Count());

                else if (countsWordOccurenceSpam.ContainsKey(word.Key) && (!countsWordOccurenceOk.ContainsKey(word.Key)))
                {
                    if (word.Key == wordInSpam.Key)
                    {
                        totals = wordInSpam.Value;

                        fprob_spam = ((double)wordInSpam.Value) / ile_spam;

                        sum_spam = (((weight * probability) + (totals * fprob_spam)) / (totals + weight));
                        sum_ok = ((weight * probability) / (totals + weight)); 

                        sum_spam = Math.Pow(sum_spam, word.Value);
                        sum_ok = Math.Pow(sum_ok, word.Value);

                        cos = countsWordOccurenceOk.Count;
                        wp_spam = Math.Pow(sum_spam, (1/cos));
                        last_o = Math.Pow(sum_ok, (1 / cos));

                        wp_spam_1 = wp_spam_1 * wp_spam;
                        last_o_1 = last_o_1 * last_o;

                    }
                }

Parallel.ForEach(mail, this.DoWork);

当我测试50封邮件时，这个程序运行得非常完美，但是当有5万封垃圾邮件，还有5万封火腿邮件时。。只是没有。使用的处理器只有10%左右的水平

另外，可能值得注意的是，在每个选中的类别中，数学部分几乎相同，如下所示：

   if (openFileDialog.ShowDialog() == true)
    {
        foreach (string filename in openFileDialog.FileNames)
        {
            myOkMail.Add(filename);

        }
    }

    string[] okFiles = myOkMail.ToArray();


    var logFile2 = okFiles
        .SelectMany(i => System.IO.File.ReadAllLines(i)).ToList();

     countsWordOccurenceOk = okFiles
        .SelectMany(i => System.IO.File.ReadAllLines(i)
        .SelectMany(line => line.Split(new[] { ' ', ',', '.', '?', '!', '.' }, StringSplitOptions.RemoveEmptyEntries))
        .Distinct())
        .GroupBy(word => word)
        .ToDictionary(g => g.Key, g => g.Count());

                else if (countsWordOccurenceSpam.ContainsKey(word.Key) && (!countsWordOccurenceOk.ContainsKey(word.Key)))
                {
                    if (word.Key == wordInSpam.Key)
                    {
                        totals = wordInSpam.Value;

                        fprob_spam = ((double)wordInSpam.Value) / ile_spam;

                        sum_spam = (((weight * probability) + (totals * fprob_spam)) / (totals + weight));
                        sum_ok = ((weight * probability) / (totals + weight)); 

                        sum_spam = Math.Pow(sum_spam, word.Value);
                        sum_ok = Math.Pow(sum_ok, word.Value);

                        cos = countsWordOccurenceOk.Count;
                        wp_spam = Math.Pow(sum_spam, (1/cos));
                        last_o = Math.Pow(sum_ok, (1 / cos));

                        wp_spam_1 = wp_spam_1 * wp_spam;
                        last_o_1 = last_o_1 * last_o;

                    }
                }

Parallel.ForEach(mail, this.DoWork);

是啊，看起来糟透了。还有一件事我还没有弄清楚，那就是我必须使用它来获得正确的结果：

                        cos = countsWordOccurenceOk.Count;
                        wp_spam = Math.Pow(sum_spam, (1/cos));
                        last_o = Math.Pow(sum_ok, (1 / cos));

因为它是将它乘以数据库中的字数

感谢您的帮助，

Kenichi

您可以尝试的一种简单方法是使用Parallel.ForEach，它可以在不同的线程中运行循环的迭代

您可以尝试更换外部ForEach，看看是否注意到任何性能差异。它应该是这样的：

   if (openFileDialog.ShowDialog() == true)
    {
        foreach (string filename in openFileDialog.FileNames)
        {
            myOkMail.Add(filename);

        }
    }

    string[] okFiles = myOkMail.ToArray();


    var logFile2 = okFiles
        .SelectMany(i => System.IO.File.ReadAllLines(i)).ToList();

     countsWordOccurenceOk = okFiles
        .SelectMany(i => System.IO.File.ReadAllLines(i)
        .SelectMany(line => line.Split(new[] { ' ', ',', '.', '?', '!', '.' }, StringSplitOptions.RemoveEmptyEntries))
        .Distinct())
        .GroupBy(word => word)
        .ToDictionary(g => g.Key, g => g.Count());

                else if (countsWordOccurenceSpam.ContainsKey(word.Key) && (!countsWordOccurenceOk.ContainsKey(word.Key)))
                {
                    if (word.Key == wordInSpam.Key)
                    {
                        totals = wordInSpam.Value;

                        fprob_spam = ((double)wordInSpam.Value) / ile_spam;

                        sum_spam = (((weight * probability) + (totals * fprob_spam)) / (totals + weight));
                        sum_ok = ((weight * probability) / (totals + weight)); 

                        sum_spam = Math.Pow(sum_spam, word.Value);
                        sum_ok = Math.Pow(sum_ok, word.Value);

                        cos = countsWordOccurenceOk.Count;
                        wp_spam = Math.Pow(sum_spam, (1/cos));
                        last_o = Math.Pow(sum_ok, (1 / cos));

                        wp_spam_1 = wp_spam_1 * wp_spam;
                        last_o_1 = last_o_1 * last_o;

                    }
                }

Parallel.ForEach(mail, this.DoWork);

然后可以调用DoWork方法中的下一个循环：

public void DoWork(String word)
{
    foreach (var wordInSpam in countsWordOccurenceSpam)
    {
       ...
    }
}

System.Threading.Tasks.Parallel.ForEach将帮助您将其拆分为多个并行threads@Jibbow这是正确的，但如果你有一个非常好的服务器，这只会在几个小时内将其分解为1/8，因此你可能会重新考虑你的算法；例如：如果你继续检查你的单词是否出现在任何一本字典中，为什么你要循环检查你邮件中的所有单词，以及你的口述——这两个内部循环都不是真的需要…@CarstenKönig，好的观点，需要检查它是如何工作的。如果你有100个垃圾邮件和100个OK单词，那么把它放在数字中，您将处理时间增加了10000倍。是否仍要使用word.Key？如果要实现Parallel.ForEach或类似方法，请确保使用线程安全集合，如System.collections.Concurrent中提供的集合。