C#LINQ在列表中查找重复项

C#LINQ在列表中查找重复项,linq,list,duplicate-removal,Linq,List,Duplicate Removal,使用LINQ,从列表中,如何检索包含重复多次的条目及其值的列表?解决此问题的最简单方法是根据元素的值对元素进行分组,然后如果组中有多个元素,则选择该组的代表。在LINQ中,这转化为: var query = lst.GroupBy(x => x) .Where(g => g.Count() > 1) .Select(y => y.Key) .ToList(); 如果要知道元素重复了多少次

使用LINQ,从
列表中
,如何检索包含重复多次的条目及其值的列表?

解决此问题的最简单方法是根据元素的值对元素进行分组,然后如果组中有多个元素,则选择该组的代表。在LINQ中,这转化为:

var query = lst.GroupBy(x => x)
              .Where(g => g.Count() > 1)
              .Select(y => y.Key)
              .ToList();
如果要知道元素重复了多少次,可以使用:

var query = lst.GroupBy(x => x)
              .Where(g => g.Count() > 1)
              .Select(y => new { Element = y.Key, Counter = y.Count() })
              .ToList();
这将返回一个匿名类型的
列表
,每个元素都有属性
元素
计数器
,以检索所需的信息

最后,如果你要找的是字典,你可以用

var query = lst.GroupBy(x => x)
              .Where(g => g.Count() > 1)
              .ToDictionary(x => x.Key, y => y.Count());
这将返回一个字典,其中元素为键,重复次数为值。

您可以执行以下操作:

var list = new[] {1,2,3,1,4,2};
var duplicateItems = list.Duplicates();
使用这些扩展方法:

public static class Extensions
{
    public static IEnumerable<TSource> Duplicates<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> selector)
    {
        var grouped = source.GroupBy(selector);
        var moreThan1 = grouped.Where(i => i.IsMultiple());
        return moreThan1.SelectMany(i => i);
    }

    public static IEnumerable<TSource> Duplicates<TSource, TKey>(this IEnumerable<TSource> source)
    {
        return source.Duplicates(i => i);
    }

    public static bool IsMultiple<T>(this IEnumerable<T> source)
    {
        var enumerator = source.GetEnumerator();
        return enumerator.MoveNext() && enumerator.MoveNext();
    }
}
公共静态类扩展
{
公共静态IEnumerable重复项(此IEnumerable源,Func选择器)
{
var group=source.GroupBy(选择器);
var moreThan1=grouped.Where(i=>i.IsMultiple());
返回多于1。选择多个(i=>i);
}
公共静态IEnumerable副本(此IEnumerable源)
{
返回source.Duplicates(i=>i);
}
公共静态布尔值是多个(此IEnumerable源)
{
var枚举器=source.GetEnumerator();
返回枚举数.MoveNext()&&enumerator.MoveNext();
}
}

在Duplicates方法中使用IsMultiple()比Count()快,因为这不会迭代整个集合。

另一种方法是使用
哈希集:

var hash = new HashSet<int>();
var duplicates = list.Where(i => !hash.Add(i));
var hash=new HashSet();
var duplicates=list.Where(i=>!hash.Add(i));
如果要在副本列表中使用唯一值:

var myhash = new HashSet<int>();
var mylist = new List<int>(){1,1,2,2,3,3,3,4,4,4};
var duplicates = mylist.Where(item => !myhash.Add(item)).Distinct().ToList();
var myhash=newhashset();
var mylist=new List(){1,1,2,2,3,3,4,4};
var duplicates=mylist.Where(item=>!myhash.Add(item)).Distinct().ToList();
以下是与通用扩展方法相同的解决方案:

public static class Extensions
{
  public static IEnumerable<TSource> GetDuplicates<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> selector, IEqualityComparer<TKey> comparer)
  {
    var hash = new HashSet<TKey>(comparer);
    return source.Where(item => !hash.Add(selector(item))).ToList();
  }

  public static IEnumerable<TSource> GetDuplicates<TSource>(this IEnumerable<TSource> source, IEqualityComparer<TSource> comparer)
  {
    return source.GetDuplicates(x => x, comparer);      
  }

  public static IEnumerable<TSource> GetDuplicates<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> selector)
  {
    return source.GetDuplicates(selector, null);
  }

  public static IEnumerable<TSource> GetDuplicates<TSource>(this IEnumerable<TSource> source)
  {
    return source.GetDuplicates(x => x, null);
  }
}
公共静态类扩展
{
公共静态IEnumerable GetDuplicates(此IEnumerable源、Func选择器、IEqualityComparer比较器)
{
var hash=新的HashSet(比较器);
返回source.Where(item=>!hash.Add(selector(item))).ToList();
}
公共静态IEnumerable GetDuplicates(此IEnumerable源,IEqualityComparer比较器)
{
返回source.GetDuplicates(x=>x,比较器);
}
公共静态IEnumerable GetDuplicates(此IEnumerable源,Func选择器)
{
返回source.GetDuplicates(选择器,null);
}
公共静态IEnumerable GetDuplicates(此IEnumerable源)
{
返回source.GetDuplicates(x=>x,null);
}
}

找出可枚举项是否包含任何重复项:

var anyDuplicate = enumerable.GroupBy(x => x.Key).Any(g => g.Count() > 1);
var allUnique = enumerable.GroupBy(x => x.Key).All(g => g.Count() == 1);
找出可枚举项中的所有值是否都是唯一的:

var anyDuplicate = enumerable.GroupBy(x => x.Key).Any(g => g.Count() > 1);
var allUnique = enumerable.GroupBy(x => x.Key).All(g => g.Count() == 1);

我创建了一个扩展来响应这个问题,您可以将它包含在您的项目中,我认为当您在列表或Linq中搜索重复项时,返回的情况最多

例如:

//Dummy class to compare in list
public class Person
{
    public int Id { get; set; }
    public string Name { get; set; }
    public string Surname { get; set; }
    public Person(int id, string name, string surname)
    {
        this.Id = id;
        this.Name = name;
        this.Surname = surname;
    }
}


//The extention static class
public static class Extention
{
    public static IEnumerable<T> getMoreThanOnceRepeated<T>(this IEnumerable<T> extList, Func<T, object> groupProps) where T : class
    { //Return only the second and next reptition
        return extList
            .GroupBy(groupProps)
            .SelectMany(z => z.Skip(1)); //Skip the first occur and return all the others that repeats
    }
    public static IEnumerable<T> getAllRepeated<T>(this IEnumerable<T> extList, Func<T, object> groupProps) where T : class
    {
        //Get All the lines that has repeating
        return extList
            .GroupBy(groupProps)
            .Where(z => z.Count() > 1) //Filter only the distinct one
            .SelectMany(z => z);//All in where has to be retuned
    }
}

//how to use it:
void DuplicateExample()
{
    //Populate List
    List<Person> PersonsLst = new List<Person>(){
    new Person(1,"Ricardo","Figueiredo"), //fist Duplicate to the example
    new Person(2,"Ana","Figueiredo"),
    new Person(3,"Ricardo","Figueiredo"),//second Duplicate to the example
    new Person(4,"Margarida","Figueiredo"),
    new Person(5,"Ricardo","Figueiredo")//third Duplicate to the example
    };

    Console.WriteLine("All:");
    PersonsLst.ForEach(z => Console.WriteLine("{0} -> {1} {2}", z.Id, z.Name, z.Surname));
    /* OUTPUT:
        All:
        1 -> Ricardo Figueiredo
        2 -> Ana Figueiredo
        3 -> Ricardo Figueiredo
        4 -> Margarida Figueiredo
        5 -> Ricardo Figueiredo
        */

    Console.WriteLine("All lines with repeated data");
    PersonsLst.getAllRepeated(z => new { z.Name, z.Surname })
        .ToList()
        .ForEach(z => Console.WriteLine("{0} -> {1} {2}", z.Id, z.Name, z.Surname));
    /* OUTPUT:
        All lines with repeated data
        1 -> Ricardo Figueiredo
        3 -> Ricardo Figueiredo
        5 -> Ricardo Figueiredo
        */
    Console.WriteLine("Only Repeated more than once");
    PersonsLst.getMoreThanOnceRepeated(z => new { z.Name, z.Surname })
        .ToList()
        .ForEach(z => Console.WriteLine("{0} -> {1} {2}", z.Id, z.Name, z.Surname));
    /* OUTPUT:
        Only Repeated more than once
        3 -> Ricardo Figueiredo
        5 -> Ricardo Figueiredo
        */
}
//要在列表中比较的伪类
公共阶层人士
{
公共int Id{get;set;}
公共字符串名称{get;set;}
公共字符串姓氏{get;set;}
公众人物(整数id、字符串名称、字符串姓氏)
{
这个.Id=Id;
this.Name=Name;
这个。姓=姓;
}
}
//静态类的扩展
公共静态类扩展
{
公共静态IEnumerable getMoreThanOnceRepeated(此IEnumerable extList,Func groupProps),其中T:class
{//仅返回第二次和下一次重复
返回extList
.GroupBy(groupProps)
.SelectMany(z=>z.Skip(1));//跳过第一次出现的内容,并返回所有重复出现的内容
}
公共静态IEnumerable getAllRepeated(此IEnumerable extList,Func groupProps),其中T:class
{
//获取所有已重复的行
返回extList
.GroupBy(groupProps)
.Where(z=>z.Count()>1)//只过滤不同的一个
.SelectMany(z=>z);//where中的All必须重新调谐
}
}
//如何使用它:
void DuplicateExample()
{
//填充列表
List PersonsLst=新列表(){
new Person(1,“Ricardo”,“Figueiredo”),//第一次复制到示例中
新人(2,“安娜”,“菲格雷多”),
new Person(3,“Ricardo”,“Figueiredo”),//示例的第二个副本
新人(4名,“Margarida”、“Figueiredo”),
new Person(5,“Ricardo”,“Figueiredo”)//示例的第三个副本
};
控制台。WriteLine(“全部:”);
PersonsLst.ForEach(z=>Console.WriteLine(“{0}->{1}{2}”,z.Id,z.Name,z.姓氏));
/*输出:
全部:
1->Ricardo Figueiredo
2->Ana Figueiredo
3->Ricardo Figueiredo
4->Margarida Figueiredo
5->Ricardo Figueiredo
*/
Console.WriteLine(“包含重复数据的所有行”);
PersonsLst.getAllRepeated(z=>new{z.Name,z.姓氏})
托利斯先生()
.ForEach(z=>Console.WriteLine(“{0}->{1}{2}”,z.Id,z.Name,z.姓氏));
/*输出:
包含重复数据的所有行
1->Ricardo Figueiredo
3->Ricardo Figueiredo
5->Ricardo Figueiredo
*/
Console.WriteLine(“仅重复多次”);
personslt.getMoreThanOnceRepeated(z=>new{z.Name,z.姓氏})
托利斯先生()
.ForEach(z=>Console.WriteLine(“{0}->{1}{2}”,z.Id,z.Name,z.姓氏));
/*输出:
只是重复了不止一次
3->Ricardo Figueiredo
5->Ricardo Figueiredo
*/
}

在MS SQL Server中检查的重复函数的完整Linq到SQL扩展集。不使用.ToList()或IEnumerable这些查询在SQL Server中执行,而不是在内存中执行。。结果只在内存中返回

public static class Linq2SqlExtensions {

    public class CountOfT<T> {
        public T Key { get; set; }
        public int Count { get; set; }
    }

    public static IQueryable<TKey> Duplicates<TSource, TKey>(this IQueryable<TSource> source, Expression<Func<TSource, TKey>> groupBy)
        => source.GroupBy(groupBy).Where(w => w.Count() > 1).Select(s => s.Key);

    public static IQueryable<TSource> GetDuplicates<TSource, TKey>(this IQueryable<TSource> source, Expression<Func<TSource, TKey>> groupBy)
        => source.GroupBy(groupBy).Where(w => w.Count() > 1).SelectMany(s => s);

    public static IQueryable<CountOfT<TKey>> DuplicatesCounts<TSource, TKey>(this IQueryable<TSource> source, Expression<Func<TSource, TKey>> groupBy)
        => source.GroupBy(groupBy).Where(w => w.Count() > 1).Select(y => new CountOfT<TKey> { Key = y.Key, Count = y.Count() });

    public static IQueryable<Tuple<TKey, int>> DuplicatesCountsAsTuble<TSource, TKey>(this IQueryable<TSource> source, Expression<Func<TSource, TKey>> groupBy)
        => source.GroupBy(groupBy).Where(w => w.Count() > 1).Select(s => Tuple.Create(s.Key, s.Count()));
}
公共静态类Linq2SqlExtensions{
公共类CountOfT{
公共T密钥{get;set;}
P
var unique = list.GroupBy(x => x.Key).All(g => g.Count() == 1);
var list = new[] {1,2,3,1,4,2};
var anyDuplicate = enumerable.GroupBy(x => x.Key).Any(g => g.Count() > 1);
var duplicates = model.list
                    .GroupBy(s => s.SAME_ID)
                    .Where(g => g.Count() > 1).Count() > 0;
if(duplicates) {
    doSomething();
}
var query = from s2 in (from s in someList group s by new { s.Column1, s.Column2 } into sg select sg) where s2.Count() > 1 select s2;