C# 是否有更好的方法删除列表中多个属性上的重复值？_C#_Performance_List_Morelinq

C# 是否有更好的方法删除列表中多个属性上的重复值？

c# performance list

C# 是否有更好的方法删除列表中多个属性上的重复值？,c#,performance,list,morelinq,C#,Performance,List,Morelinq,当第一个、第二个或两个属性相等（在列表中出现多次）时，尝试从重复的列表中删除重复项。使用MoreLINQ，下面的代码正在运行： var list = new List<LinqTest> // LinqTest: object containing 2 strings { // are ok new LinqTest { Str1 = "a1", Str2 = "b1"}, new LinqTest { Str1 = "a2", Str2 = "b2"},

当第一个、第二个或两个属性相等（在列表中出现多次）时，尝试从重复的列表中删除重复项。使用MoreLINQ，下面的代码正在运行：

var list = new List<LinqTest> // LinqTest: object containing 2 strings
{
    // are ok
    new LinqTest { Str1 = "a1", Str2 = "b1"},
    new LinqTest { Str1 = "a2", Str2 = "b2"},
    new LinqTest { Str1 = "a3", Str2 = "b3"},
    new LinqTest { Str1 = "a5", Str2 = "b5"},
    new LinqTest { Str1 = "a6", Str2 = "b6"},
    new LinqTest { Str1 = "x1", Str2 = "y1"},
    new LinqTest { Str1 = "y1", Str2 = "x1"},

    // must be removed
    new LinqTest { Str1 = "d1", Str2 = "b4"},
    new LinqTest { Str1 = "d1", Str2 = "d2"},
    new LinqTest { Str1 = "d1", Str2 = "d2"},
    new LinqTest { Str1 = "a4", Str2 = "d2"},
    new LinqTest { Str1 = "d3", Str2 = "b7"},
    new LinqTest { Str1 = "d3", Str2 = "b8"},
    new LinqTest { Str1 = "d3", Str2 = "b8"},
};

var duplicatesStr1 = list
    .GroupBy(x => x.Str1)
    .Where(x => x.Count() > 1)
    .SelectMany(x => x)
    .ToList();

var duplicatesStr2 = list
    .GroupBy(x => x.Str2)
    .Where(x => x.Count() > 1)
    .SelectMany(x => x)
    .ToList(); ;

var res = list
    .ExceptBy(duplicatesStr1, x => x.Str1)
    .ExceptBy(duplicatesStr2, x => x.Str2);

var rem = duplicatesStr1
    .Union(duplicatesStr2)
    .DistinctBy(x => new { x.Str1, x.Str2})
    .ToList();

Console.WriteLine("----------");
foreach (var linqTest in res)
{
    Console.WriteLine("keep> " + linqTest.Str1 + "-" + linqTest.Str2);
}

Console.WriteLine("----------");
foreach (var linqTest in rem)
{
    Console.WriteLine("remove> " + linqTest.Str1 + "-" + linqTest.Str2);
}

var list=new list//LinqTest:包含2个字符串的对象
{
//你还好吗
新的LinqTest{Str1=“a1”，Str2=“b1”}，
新的LinqTest{Str1=“a2”，Str2=“b2”}，
新的LinqTest{Str1=“a3”，Str2=“b3”}，
新的LinqTest{Str1=“a5”，Str2=“b5”}，
新的LinqTest{Str1=“a6”，Str2=“b6”}，
新的LinqTest{Str1=“x1”，Str2=“y1”}，
新的LinqTest{Str1=“y1”，Str2=“x1”}，
//必须删除
新的LinqTest{Str1=“d1”，Str2=“b4”}，
新的LinqTest{Str1=“d1”，Str2=“d2”}，
新的LinqTest{Str1=“d1”，Str2=“d2”}，
新的LinqTest{Str1=“a4”，Str2=“d2”}，
新的LinqTest{Str1=“d3”，Str2=“b7”}，
新的LinqTest{Str1=“d3”，Str2=“b8”}，
新的LinqTest{Str1=“d3”，Str2=“b8”}，
};
var duplicatesStr1=列表
.GroupBy（x=>x.Str1）
.其中（x=>x.Count（）>1）
.SelectMany（x=>x）
.ToList（）；
var duplicatesStr2=列表
.GroupBy（x=>x.Str2）
.Where（x=>x.Count（）>1）
.SelectMany（x=>x）
.ToList（）；
var res=列表
.ExceptBy（duplicatesStr1，x=>x.Str1）
.ExceptBy（duplicatesStr2，x=>x.Str2）；
var rem=duplicatessstr1
.Union（重复SSTR2）
.DistinctBy（x=>new{x.Str1，x.Str2}）
.ToList（）；
Console.WriteLine（--------------”；
foreach（以res为单位的var linqTest）
{
Console.WriteLine（“keep>”+linqTest.Str1+“-”+linqTest.Str2）；
}
Console.WriteLine（--------------”；
foreach（var linqTest in rem）
{
Console.WriteLine（“删除>”+linqTest.Str1+“-”+linqTest.Str2）；
}

问题： 有没有更有效和/或更短的方法来实现这一点？

您可以使用LINQ方法来实现这一点。您必须定义一个自定义项来决定何时将两个元素视为不同的

public class MyComparer : IEqualityComparer<LinqTest>
{
    public bool Equals(LinqTest x, LinqTest y)
    {
        return x.Str1 == y.Str1 || x.Str2 == y.Str2;
    }

    public int GetHashCode(LinqTest obj)
    {
        return 0;
    }
}

公共类MyComparer:IEqualityComparer
{
公共布尔等于（LinqTest x，LinqTest y）
{
返回x.Str1==y.Str1 | | x.Str2==y.Str2；
}
public int GetHashCode（LinqTest obj）
{
返回0；
}
}

然后你可以写：

List<LinqTest> noDuplicates = originalList.Distinct(new MyComparer()).ToList();

List noDuplicates=originalList.Distinct（新的MyComparer（））.ToList（）；

棘手的部分是正确地实现（我第一次没有做到！）

GetHashCode（）

必须为被认为相等的两个对象返回相同的值。由于我们的相等概念是不可传递的，因此满足此要求的唯一方法是返回一个常量值。这是允许的，但违背了哈希代码的目标，哈希代码是一种加速平等性检查的方法：如果哈希代码不同，则对象必须不同，而不需要潜在的更昂贵的“深度”比较

因此，这段代码可以工作，但无法实现尽可能高的性能。

您可以使用LINQ方法来实现这一点。您必须定义一个自定义项来决定何时将两个元素视为不同的

public class MyComparer : IEqualityComparer<LinqTest>
{
    public bool Equals(LinqTest x, LinqTest y)
    {
        return x.Str1 == y.Str1 || x.Str2 == y.Str2;
    }

    public int GetHashCode(LinqTest obj)
    {
        return 0;
    }
}

公共类MyComparer:IEqualityComparer
{
公共布尔等于（LinqTest x，LinqTest y）
{
返回x.Str1==y.Str1 | | x.Str2==y.Str2；
}
public int GetHashCode（LinqTest obj）
{
返回0；
}
}

然后你可以写：

List<LinqTest> noDuplicates = originalList.Distinct(new MyComparer()).ToList();

List noDuplicates=originalList.Distinct（新的MyComparer（））.ToList（）；

棘手的部分是正确地实现（我第一次没有做到！）

GetHashCode（）

因此，这段代码可以工作，但无法实现尽可能高的性能。

您可以进行两次操作，首先多次获取第一个和第二个属性的所有值，然后在此基础上进行筛选。每个属性需要2个哈希集。第一种方法是跟踪某个值是否至少出现过一次。如果它至少被看到过一次，那么它就会被添加到第二个散列集中。因此，为每个属性生成一个哈希集，该哈希集仅包含重复的值。然后只需过滤掉这些散列集中的任何项目

HashSet<string> hash1Once = new HashSet<string>();
HashSet<string> hash1More = new HashSet<string>();
HashSet<string> hash2Once = new HashSet<string>();
HashSet<string> hash2More = new HashSet<string>();

foreach(var item in list){
    if(!hash1Once.Add(item.Str1))
        hash1More.Add(item.Str1);
    if(!hash2Once.Add(item.Str2))
        hash2More.Add(item.Str2);
}

var unique = list.Where(x => !hash1More.Contains(x.Str1) && !hash2More.Contains(x.Str2))
    .ToList();

HashSet hash1Once=new HashSet（）；
HashSet hash1More=新HashSet（）；
HashSet hash2Once=新的HashSet（）；
HashSet hash2More=新HashSet（）；
foreach（列表中的变量项）{
如果（！hash1Once.Add（item.Str1））
HASH1更多。添加（项目Str1）；
如果（！hash2Once.Add（item.Str2））
hash2更多。添加（项目Str2）；
}
var unique=list.Where（x=>！hash1More.Contains（x.Str1）和&！hash2More.Contains（x.Str2））
.ToList（）；

通过多次获取第一个和第二个属性的所有值，然后对其进行筛选，可以完成两个过程。每个属性需要2个哈希集。第一种方法是跟踪某个值是否至少出现过一次。如果它至少被看到过一次，那么它就会被添加到第二个散列集中。因此，为每个属性生成一个哈希集，该哈希集仅包含重复的值。然后只需过滤掉这些散列集中的任何项目

HashSet<string> hash1Once = new HashSet<string>();
HashSet<string> hash1More = new HashSet<string>();
HashSet<string> hash2Once = new HashSet<string>();
HashSet<string> hash2More = new HashSet<string>();

foreach(var item in list){
    if(!hash1Once.Add(item.Str1))
        hash1More.Add(item.Str1);
    if(!hash2Once.Add(item.Str2))
        hash2More.Add(item.Str2);
}

var unique = list.Where(x => !hash1More.Contains(x.Str1) && !hash2More.Contains(x.Str2))
    .ToList();

HashSet hash1Once=new HashSet（）；
HashSet hash1More=新HashSet（）；
HashSet hash2Once=新的HashSet（）；
HashSet hash2More=新HashSet（）；
foreach（列表中的变量项）{
如果（！hash1Once.Add（item.Str1））
HASH1更多。添加（项目Str1）；
如果（！hash2Once.Add（item.Str2））
hash2更多。添加（项目Str2）；
}
var unique=list.Where（x=>！hash1More.Contains（x.Str1）和&！hash2More.Contains（x.Str2））
.ToList（）；

这里是另一种使用