c#需要帮助优化字符串数组函数吗_C#_Arrays_String

c#需要帮助优化字符串数组函数吗

c# arrays string

c#需要帮助优化字符串数组函数吗,c#,arrays,string,C#,Arrays,String,下面的代码在一个小数据集上工作得非常好。然而，GetMatchCount和BuildMatchArray在大的结果上非常缓慢。有人能推荐不同的方法来节省处理时间吗？将数组写入文件是否更好？列表通常都很慢，不是最好的选择吗 using System.Collections.Generic; using System.Diagnostics; using System.Linq; public class Client { public int Id; public string

下面的代码在一个小数据集上工作得非常好。然而，GetMatchCount和BuildMatchArray在大的结果上非常缓慢。有人能推荐不同的方法来节省处理时间吗？将数组写入文件是否更好？列表通常都很慢，不是最好的选择吗

using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

public class Client
{
    public int Id;

    public string FirstName
    {
        get
        {
            var firstName = //<call to get from database via Id>

            return firstName;
        }
    }

    public string MiddleName
    {
        get
        {
            var middleName =  //<call to get from database via Id>

            return middleName;
        }
    }

    public string LastName
    {
        get
        {
            var lastName =  //<call to get from database via Id>

            return lastName;
        }
    }

    public string FullName
    {
        get
        {
            return FirstName + " " + MiddleName + " " + LastName;
        }
    }

    public int GetMatchCount(IEnumerable<string> clientFirstNames, IEnumerable<string> clientMiddleNames, IEnumerable<string> clientLastNames)
    {
        var clientFullNames = BuildMatchArray(clientFirstNames, clientMiddleNames, clientLastNames);
        return clientFullNames.Count(x => x == FullName);
    }


    public string[] BuildMatchArray(IEnumerable<string> clientFirstNames, IEnumerable<string> clientMiddleNames, IEnumerable<string> clientLastNames)
    {
        Debug.Assert(clientFirstNames.Count() == clientMiddleNames.Count() && clientMiddleNames.Count() == clientLastNames.Count());

        var clientFullNames = new List<string>();
        for (int i = 0; i < clientFirstNames.Count(); i++)
        {
            clientFullNames.Add(clientFirstNames.ElementAt(i) + " " + clientMiddleNames.ElementAt(i) + " " + clientLastNames.ElementAt(i));
        }
        return clientFullNames.ToArray();
    }
}

使用System.Collections.Generic；
使用系统诊断；
使用System.Linq；
公共类客户端
{
公共int Id；
公共字符串名
{
得到
{
变量名=//
返回名字；
}
}
公共字符串中间名
{
得到
{
变量名称=//
返回中间名；
}
}
公共字符串姓氏
{
得到
{
变量lastName=//
返回姓氏；
}
}
公共字符串全名
{
得到
{
返回FirstName+“”+MiddleName+“”+LastName；
}
}
public int GetMatchCount（IEnumerable clientFirstNames、IEnumerable clientMiddleNames、IEnumerable clientLastNames）
{
var clientFullNames=BuildMatchArray（clientFirstNames、clientMiddleNames、clientLastNames）；
返回clientFullNames.Count（x=>x==FullName）；
}
公共字符串[]BuildMatchArray（IEnumerable clientFirstNames、IEnumerable clientMiddleNames、IEnumerable clientLastNames）
{
Assert（clientFirstNames.Count（）==clientMiddleNames.Count（）&&clientMiddleNames.Count（）==clientLastNames.Count（））；
var clientFullNames=新列表（）；
对于（int i=0；i

您从哪里获得这些字符串？如果使用的是惰性序列，则每次调用

Count（）

时，都必须迭代整个序列，以计算序列中有多少对象。如果

IEnumerable

实际上是一个

T[]

或

列表

，那么

Count（）

被优化为只调用

Length

或

Count

属性，这并不昂贵。类似地，

ElementAt

也非常低效，会迭代集合。因此，对于内存中的惰性序列，性能会很差，但是如果您是从SQL或外部源流式传输结果，则性能会很差，甚至可能不正确

BuildMatchArray

的更高性能实现如下：

public IEnumerable<string> ZipNames(IEnumerable<string> firsts, 
    IEnumerable<string> middles, IEnumerable<string> lasts) 
{
    using(var e1 = firsts.GetEnumerator())
    using(var e2 = middles.GetEnumerator())
    using(var e3 = lasts.GetEnumerator())
    {
        var stop = false;

        while (!stop)
        {
            var hasNext1 = e1.MoveNext();
            var hasNext2 = e2.MoveNext();
            var hasNext3 = e3.MoveNext();

            if (hasNext1 && hasNext2 && hasNext3) 
            {
                yield return $"{e1.Current} {e2.Current} {e3.Current}";
            }
            else
            {
                stop = true;
                Debug.Assert(!(hasNext1 || hasNext2 || hasNext3));
            }
        }
    }
}

public IEnumerable ZipNames（IEnumerable firsts，
i可数中间，i可数最后）
{
使用（var e1=firsts.GetEnumerator（））
使用（var e2=middles.GetEnumerator（））
使用（var e3=lasts.GetEnumerator（））
{
var-stop=false；
当（！停止）
{
var hasNext1=e1.MoveNext（）；
var hasNext2=e2.MoveNext（）；
var hasNext3=e3.MoveNext（）；
如果（hasNext1&&hasNext2&&hasNext3）
{
产生返回$“{e1.Current}{e2.Current}{e3.Current}”；
}
其他的
{
停止=真；
Assert（！（hasNext1 | | hasNext2 | | hasNext3））；
}
}
}
}

这只需要对每个输入集合进行一次迭代，不需要将元素复制到新的

列表中。另一点需要注意的是，List
以4个元素的容量开始，当它填满时，它会将所有元素复制到一个新的列表中，并具有两倍的容量。所以如果你有一个大序列，你会复制很多次
此实现非常类似于System.Linq.Enumerable.Zip

在您的情况下，您也不应该对您的序列执行ToArray
。这将需要另一个拷贝，并且可能是一个巨大的数组。如果您只将该数组发送到.Count（x=>x==y）
，那么保持一个惰性IEnumerable
会更好，因为Count
对惰性序列进行惰性操作，并在其中流式传输数据，并按其所见对元素进行计数，而不需要将整个集合存储在内存中
请参见
您能告诉我们有关呼叫数据库的更多信息吗？出于某种原因，您的第一个中间名和姓氏是否存储在不同的表中？每个属性都有索引吗？实际上，即使获得一个全名，也需要在数据库中使用3条select语句。这可能会被删减吗？看起来它需要三重压缩，并且需要一个查找
是的，你也可以在那篇文章的一个答案中使用建议的ZipThree
方法firsts.ZipThree（中间，最后，（f，m，l）=>$“{f}{m}{l}”）
。这可能是一个比我的答案更好、更可重用的解决方案。不应该有某种循环吗？是的@ThomasHilbert，我的错误，它应该是，而不是，如果/，否则迭代器应该是Move（）
d在循环中：-）你不会想在循环中使用块打开，您需要在循环中调用MoveNext
和Current
，以便每次都得到不同的结果。目前迭代器的位置和hasNext[1,2,3]都不会改变，循环将永远旋转。