C# 根据第二个列表C的标准偏差将第一个列表与第二个列表合并
给定两个数据集,这两个数据集都是远离某个数字的标准偏差序列,我们正在寻找重叠部分:C# 根据第二个列表C的标准偏差将第一个列表与第二个列表合并,c#,algorithm,dataset,C#,Algorithm,Dataset,给定两个数据集,这两个数据集都是远离某个数字的标准偏差序列,我们正在寻找重叠部分: var list1 = new decimal[] { 357.06, 366.88, 376.70, 386.52, 406.15 }; var list2 = new decimal[] { 370.51, 375.62, 380.72, 385.82, 390.93 }; 我希望执行合并,将列表2中的项目放在距离列表1项目最近的某个范围内,即将列表2元素合并到列表1元素的5.10标准偏差内: 其思想是对列
var list1 = new decimal[] { 357.06, 366.88, 376.70, 386.52, 406.15 };
var list2 = new decimal[] { 370.51, 375.62, 380.72, 385.82, 390.93 };
我希望执行合并,将列表2中的项目放在距离列表1项目最近的某个范围内,即将列表2元素合并到列表1元素的5.10标准偏差内:
其思想是对列表2中的值进行聚类并对其进行计数,在本例中,值为376.70的元素具有最高的重要性,因为它有两个近邻375.52和380.72,其中as 366.88和386.52只有一个匹配项,其余的都不在范围内
哪些C math/stats库可以用于此目的,或者有更好的方法进行统计组合
如果这更多的是计算机科学或统计问题,那么提前道歉将在相关SO网站上关闭并重新打开。类似的方法应该可以奏效
var list1 = new double[] { 357.06, 366.88, 376.70, 386.52, 406.15 };
var list2 = new double[] { 370.51, 375.62, 380.72, 385.82, 390.93 };
double dev = 5.1;
var result = new Dictionary<double, List<double>>();
foreach (var l in list2) {
var diffs = list1.Select(r => new { diff = Math.Abs(r - l), r })
.Where(d => d.diff <= dev)
.MinBy(r => r.diff)
.FirstOrDefault();
if (diffs == null) {
continue;
}
List<double> list;
if (! result.TryGetValue(diffs.r, out list)) {
list = new List<double>();
result.Add(diffs.r, list);
}
list.Add(l);
}
它使用MinBy from,但是很容易修改,以便在没有它的情况下工作。事实上,您不需要额外的lib或其他东西。您可以使用LINQ来实现这一点
internal class Program
{
private static void Main(string[] args)
{
var deviation = 5.1M;
var list1 = new decimal[] { 357.06M, 366.88M, 376.70M, 386.52M, 406.15M };
var list2 = new decimal[] { 370.51M, 375.62M, 380.72M, 385.82M, 390.93M };
var result = GetDistribution(list1.ToList(), list2.ToList(), deviation);
result.ForEach(x => Console.WriteLine($"{x.BaseValue} => {string.Join(", ", x.Destribution)} [{x.Weight}]"));
Console.ReadLine();
}
private static List<Distribution> GetDistribution(List<decimal> baseList, List<decimal> distrebutedList, decimal deviation)
{
return baseList.Select(x =>
new Distribution
{
BaseValue = x,
Destribution = distrebutedList.Where(y => x - deviation < y && y < x + deviation).ToList()
}).ToList();
}
}
internal class Distribution
{
public decimal BaseValue { get; set; }
public List<decimal> Destribution { get; set; }
public int Weight => Destribution.Count;
}
我希望它对您有用。假设列表2已排序,如果未排序,请放置Array.Sortlist2;您可以尝试二进制搜索:
鉴于:
代码:
结果:
internal class Program
{
private static void Main(string[] args)
{
var deviation = 5.1M;
var list1 = new decimal[] { 357.06M, 366.88M, 376.70M, 386.52M, 406.15M };
var list2 = new decimal[] { 370.51M, 375.62M, 380.72M, 385.82M, 390.93M };
var result = GetDistribution(list1.ToList(), list2.ToList(), deviation);
result.ForEach(x => Console.WriteLine($"{x.BaseValue} => {string.Join(", ", x.Destribution)} [{x.Weight}]"));
Console.ReadLine();
}
private static List<Distribution> GetDistribution(List<decimal> baseList, List<decimal> distrebutedList, decimal deviation)
{
return baseList.Select(x =>
new Distribution
{
BaseValue = x,
Destribution = distrebutedList.Where(y => x - deviation < y && y < x + deviation).ToList()
}).ToList();
}
}
internal class Distribution
{
public decimal BaseValue { get; set; }
public List<decimal> Destribution { get; set; }
public int Weight => Destribution.Count;
}
var list1 = new decimal[] { 357.06m, 366.88m, 376.70m, 386.52m, 406.15m };
var list2 = new decimal[] { 370.51m, 375.62m, 380.72m, 385.82m, 390.93m };
decimal sd = 5.10m;
// Array.Sort(list2); // Uncomment, if list2 is not sorted
List<(decimal value, decimal[] list)> result = new List<(decimal value, decimal[] list)>();
foreach (decimal value in list1) {
int leftIndex = Array.BinarySearch<decimal>(list2, value - sd);
if (leftIndex < 0)
leftIndex = -leftIndex - 1;
else // edge case
for (; leftIndex >= 1 && list1[leftIndex - 1] == value - sd; --leftIndex) ;
int rightIndex = Array.BinarySearch<decimal>(list2, value + sd);
if (rightIndex < 0)
rightIndex = -rightIndex - 1;
else // edge case
for (; rightIndex < list1.Length - 1 && list1[rightIndex + 1] == value + sd; ++rightIndex) ;
result.Add((value, list2.Skip(leftIndex).Take(rightIndex - leftIndex).ToArray()));
}
string report = string.Join(Environment.NewLine, result
.Select(item => $"{item.value} => [{string.Join(", ", item.list)}]"));
Console.Write(report);
357.06 => []
366.88 => [370.51]
376.70 => [375.62, 380.72]
386.52 => [385.82, 390.93]
406.15 => []