C# 用LINQ做枢轴_C#_Linq - Fatal编程技术网

C# 用LINQ做枢轴

c# linq

C# 用LINQ做枢轴,c#,linq,C#,Linq,我遇到了这个问题。我有一个CSV文件，格式如下（客户、购买的商品对）：现在，我想在查询结果中显示： item x; item y; how many customers have bought itemx and item together 例如： item1 item2 3 (because cust1 and cust2 and cust3 bought item1 and item2 together) item1 item5 1 (because cust5 and cust3 bo

我遇到了这个问题。我有一个CSV文件，格式如下（客户、购买的商品对）：

现在，我想在查询结果中显示：

item x; item y; how many customers have bought itemx and item together

例如：

item1 item2 3 (because cust1 and cust2 and cust3 bought item1 and item2 together)
item1 item5 1 (because cust5 and cust3 bought item1 and item5 together)

查询返回客户成对购买的所有可能的项目组合。还请注意，对（x，y）与对（y，x）相同

SQL查询如下所示：

SELECT a1.item_id, a2.item_id, COUNT(a1.cust_id) AS how_many_custs_bought_both
  FROM data AS a1
INNER JOIN data AS a2
  ON a2.cust_id=a1.cust_id AND a2.item_id<>a1.item_id AND a1.item_id<a2.item_id
GROUP BY a1.item_id, a2.item_id

IDictionary<int, int> pivotResult = customerItems.ToLookup(c => c.Customer)
                                 .ToDictionary(x=>x.Key, y=>y.Count());

选择a1.item\u id、a2.item\u id、COUNT（a1.cust\u id）作为两个客户购买的数量
从数据中选择a1
内部连接数据为a2
在a2.cust_id=a1.cust_id和a2.item_id 1.item_id和a1.item_id工作的LINQ示例中，不太漂亮
using System;
using System.Collections.Generic;
using System.Linq;

class Data
{
    public Data(int cust, int item)
    {
        item_id = item;
        cust_id = cust;
    }
    public int item_id { get; set; }
    public int cust_id { get; set; }

    static void Main(string[] args)
    {
        var data = new List<Data>
                       {new Data(1,1),new Data(1,2),new Data(1,3),
                        new Data(2,4),new Data(2,2),new Data(3,5),
                        new Data(3,1),new Data(3,2),new Data(4,1),
                        new Data(4,2),new Data(5,5),new Data(5,1)};

           (from a1 in data
            from a2 in data
            where a2.cust_id == a1.cust_id && a2.item_id != a1.item_id && a1.item_id < a2.item_id
            group new {a1, a2} by new {item1 = a1.item_id, item2 = a2.item_id}
            into g
            select new {g.Key.item1, g.Key.item2, count = g.Count()})
            .ToList()
            .ForEach(x=>Console.WriteLine("{0} {1} {2}",x.item1,x.item2,x.count))
            ;
           Console.Read();
    }
}

工作LINQ的例子，不太漂亮
using System;
using System.Collections.Generic;
using System.Linq;

class Data
{
    public Data(int cust, int item)
    {
        item_id = item;
        cust_id = cust;
    }
    public int item_id { get; set; }
    public int cust_id { get; set; }

    static void Main(string[] args)
    {
        var data = new List<Data>
                       {new Data(1,1),new Data(1,2),new Data(1,3),
                        new Data(2,4),new Data(2,2),new Data(3,5),
                        new Data(3,1),new Data(3,2),new Data(4,1),
                        new Data(4,2),new Data(5,5),new Data(5,1)};

           (from a1 in data
            from a2 in data
            where a2.cust_id == a1.cust_id && a2.item_id != a1.item_id && a1.item_id < a2.item_id
            group new {a1, a2} by new {item1 = a1.item_id, item2 = a2.item_id}
            into g
            select new {g.Key.item1, g.Key.item2, count = g.Count()})
            .ToList()
            .ForEach(x=>Console.WriteLine("{0} {1} {2}",x.item1,x.item2,x.count))
            ;
           Console.Read();
    }
}

也许：
var lines = File.ReadLines(csvFilePath);
var custItems = lines
    .Select(l => new { split = l.Split() })
    .Select(x => new { customer = x.split[0].Trim(), item = x.split[1].Trim() })
    .ToList();

var groups = from ci1 in custItems
             join ci2 in custItems
             on ci1.customer equals ci2.customer
             where ci1.item != ci2.item 
             group new { Item1 = ci1.item, Item2 = ci2.item } by new { Item1 = ci1.item, Item2 = ci2.item } into ItemGroup
             select ItemGroup;

var result = groups.Select(g => new
{
    g.Key.Item1,
    g.Key.Item2,
    how_many_custs_bought_both = g.Count()
});

请注意，当文件由于自连接而变大时，使用ToList
的具体化非常重要
{ Item1 = item1, Item2 = item2, how_many_custs_bought_both = 3 }
{ Item1 = item1, Item2 = item3, how_many_custs_bought_both = 1 }
{ Item1 = item2, Item2 = item1, how_many_custs_bought_both = 3 }
{ Item1 = item2, Item2 = item3, how_many_custs_bought_both = 1 }
{ Item1 = item3, Item2 = item1, how_many_custs_bought_both = 1 }
{ Item1 = item3, Item2 = item2, how_many_custs_bought_both = 1 }
{ Item1 = item4, Item2 = item2, how_many_custs_bought_both = 1 }
{ Item1 = item2, Item2 = item4, how_many_custs_bought_both = 1 }
{ Item1 = item5, Item2 = item1, how_many_custs_bought_both = 2 }
{ Item1 = item5, Item2 = item2, how_many_custs_bought_both = 1 }
{ Item1 = item1, Item2 = item5, how_many_custs_bought_both = 2 }
{ Item1 = item2, Item2 = item5, how_many_custs_bought_both = 1 }

也许：
var lines = File.ReadLines(csvFilePath);
var custItems = lines
    .Select(l => new { split = l.Split() })
    .Select(x => new { customer = x.split[0].Trim(), item = x.split[1].Trim() })
    .ToList();

var groups = from ci1 in custItems
             join ci2 in custItems
             on ci1.customer equals ci2.customer
             where ci1.item != ci2.item 
             group new { Item1 = ci1.item, Item2 = ci2.item } by new { Item1 = ci1.item, Item2 = ci2.item } into ItemGroup
             select ItemGroup;

var result = groups.Select(g => new
{
    g.Key.Item1,
    g.Key.Item2,
    how_many_custs_bought_both = g.Count()
});

请注意，当文件由于自连接而变大时，使用ToList
的具体化非常重要
{ Item1 = item1, Item2 = item2, how_many_custs_bought_both = 3 }
{ Item1 = item1, Item2 = item3, how_many_custs_bought_both = 1 }
{ Item1 = item2, Item2 = item1, how_many_custs_bought_both = 3 }
{ Item1 = item2, Item2 = item3, how_many_custs_bought_both = 1 }
{ Item1 = item3, Item2 = item1, how_many_custs_bought_both = 1 }
{ Item1 = item3, Item2 = item2, how_many_custs_bought_both = 1 }
{ Item1 = item4, Item2 = item2, how_many_custs_bought_both = 1 }
{ Item1 = item2, Item2 = item4, how_many_custs_bought_both = 1 }
{ Item1 = item5, Item2 = item1, how_many_custs_bought_both = 2 }
{ Item1 = item5, Item2 = item2, how_many_custs_bought_both = 1 }
{ Item1 = item1, Item2 = item5, how_many_custs_bought_both = 2 }
{ Item1 = item2, Item2 = item5, how_many_custs_bought_both = 1 }

使用LINQ（并遵循Tim回答中的前5行）将链接方法语法与连接部分的查询语法相结合：
var custItems =  new [] {
    new { customer = 1, item = 1 },
    new { customer = 1, item = 2 },
    new { customer = 1, item = 3 },
    new { customer = 2, item = 4 },
    new { customer = 2, item = 2 },
    new { customer = 3, item = 5 },
    new { customer = 3, item = 1 },
    new { customer = 3, item = 2 },
    new { customer = 4, item = 1 },
    new { customer = 4, item = 2 },
    new { customer = 5, item = 5 },
    new { customer = 5, item = 1 }
};
};

var pairs = custItems.GroupBy(x => x.customer)
         .Where(g => g.Count() > 1)
         .Select(x =>  (from a in x.Select( y => y.item )
                        from b in x.Select( y => y.item )
                        where a < b //If you want to avoid duplicate (a,b)+(b,a)
                        // or just: where a != b, if you want to keep the dupes.
                        select new { a, b}))
         .SelectMany(x => x)
         .GroupBy(x => x)
         .Select(g => new { Pair = g.Key, Count = g.Count() })
         .ToList();

pairs.ForEach(x => Console.WriteLine(x));

编辑：回滚字符串并将其更改为整数，因为OP显示了一个以整数为ID的数据集，这样就不需要使用LINQ（并在Tim回答的前5行之后）将链式方法语法与联接部分的查询语法相结合，从而消除了对.GetHashCode（）
的需要：
var custItems =  new [] {
    new { customer = 1, item = 1 },
    new { customer = 1, item = 2 },
    new { customer = 1, item = 3 },
    new { customer = 2, item = 4 },
    new { customer = 2, item = 2 },
    new { customer = 3, item = 5 },
    new { customer = 3, item = 1 },
    new { customer = 3, item = 2 },
    new { customer = 4, item = 1 },
    new { customer = 4, item = 2 },
    new { customer = 5, item = 5 },
    new { customer = 5, item = 1 }
};
};

var pairs = custItems.GroupBy(x => x.customer)
         .Where(g => g.Count() > 1)
         .Select(x =>  (from a in x.Select( y => y.item )
                        from b in x.Select( y => y.item )
                        where a < b //If you want to avoid duplicate (a,b)+(b,a)
                        // or just: where a != b, if you want to keep the dupes.
                        select new { a, b}))
         .SelectMany(x => x)
         .GroupBy(x => x)
         .Select(g => new { Pair = g.Key, Count = g.Count() })
         .ToList();

pairs.ForEach(x => Console.WriteLine(x));

编辑：将字符串回滚并更改为整数，因为OP显示了一个以整数为ID的数据集，这样就不需要.GetHashCode（）
您可以这样编写：
SELECT a1.item_id, a2.item_id, COUNT(a1.cust_id) AS how_many_custs_bought_both
  FROM data AS a1
INNER JOIN data AS a2
  ON a2.cust_id=a1.cust_id AND a2.item_id<>a1.item_id AND a1.item_id<a2.item_id
GROUP BY a1.item_id, a2.item_id

IDictionary<int, int> pivotResult = customerItems.ToLookup(c => c.Customer)
                                 .ToDictionary(x=>x.Key, y=>y.Count());

IDictionary pivotResult=customerItems.ToLookup（c=>c.Customer）
.ToDictionary（x=>x.Key，y=>y.Count（））；
你可以这样写：
SELECT a1.item_id, a2.item_id, COUNT(a1.cust_id) AS how_many_custs_bought_both
  FROM data AS a1
INNER JOIN data AS a2
  ON a2.cust_id=a1.cust_id AND a2.item_id<>a1.item_id AND a1.item_id<a2.item_id
GROUP BY a1.item_id, a2.item_id

IDictionary<int, int> pivotResult = customerItems.ToLookup(c => c.Customer)
                                 .ToDictionary(x=>x.Key, y=>y.Count());

IDictionary pivotResult=customerItems.ToLookup（c=>c.Customer）
.ToDictionary（x=>x.Key，y=>y.Count（））；
您尝试过的linq语句在哪里？还有.csv文件的分隔符是什么。还有，为什么不将该查询放入存储过程，或者为什么不使用system.data.sqlclient
类并使用参数化查询..？您尝试的linq语句在哪里？还有.csv文件的分隔符是什么。还有，为什么不将该查询放入存储过程，或者为什么不使用system.data.sqlclient
类并使用参数化查询..？抱歉，刚刚发现，这根本不起作用。它没有输出正确的信息。为什么它不起作用？这不是你想要的吗？很抱歉说得含糊不清，它会输出重复的信息。例如：（Item1，Item2）对与（Item2，Item1）对相同。上面的代码将（a，b）视为（b，a），这是错误的..哦，你的意思是它实际上应该输出重复的。我会编辑答案（将a.GetHashCode（）

更改为a！=b）我会删除我的答案，但方法不同，所以如果它保留下来可能是好的。哦，该死的抱歉，我承认它将（a，b）与（b，a）一样对待，这是错误的。。“where a！=b”与此无关，它只删除（item1，item1）重复项，而不删除（item1，item2）（item2，item1）重复项。抱歉，刚刚发现，这根本不起作用。它没有输出正确的信息。为什么它不起作用？这不是你想要的吗？很抱歉说得含糊不清，它会输出重复的信息。例如：（Item1，Item2）对与（Item2，Item1）对相同。上面的代码将（a，b）视为（b，a），这是错误的..哦，你的意思是它实际上应该输出重复的。我会编辑答案（将a.GetHashCode（）更改为a！=b）我会删除我的答案，但方法不同，所以如果它保留下来可能是好的。哦，该死的抱歉，我承认它将（a，b）与（b，a）一样对待，这是错误的。。“where a！=b”与此无关，它只删除（item1，item1）重复项，而不删除（item1，item2）（item2，item1）重复项。该代码还将（item1，item2）不同视为（item2，item1），这是错误的。它输出重复项。抱歉，它将它们视为相同而不是不同。该代码也将（item1，item2）视为（item2，item1）不同，这是错误的。它输出重复项。对不起，它对它们一视同仁。