C# datatable中筛选器内部联接的替代方法
我有一个数据表,我想选择每天的第一个条目,其中所有C# datatable中筛选器内部联接的替代方法,c#,linq,C#,Linq,我有一个数据表,我想选择每天的第一个条目,其中所有curveID都存在。我能想到的唯一方法是使用连接,因为只有在两个数据集都存在的情况下,它才会自动连接 以下是我到目前为止的情况: //core data from sql (I have little control over this) DataTable ds = new DataTable(); da.Fill(ds); //creating dataset with various tables based on curveIDs
curveID
都存在。我能想到的唯一方法是使用连接,因为只有在两个数据集都存在的情况下,它才会自动连接
以下是我到目前为止的情况:
//core data from sql (I have little control over this)
DataTable ds = new DataTable();
da.Fill(ds);
//creating dataset with various tables based on curveIDs I look for
System.Data.DataSet dataSet = new System.Data.DataSet();
for (int i = 0; i < curveIds.Length; i++)
{
dataSet.Tables.Add(ds.AsEnumerable().Where(x => x.Field<short>("curveID") == curveIds[i]).CopyToDataTable());
}
//lets say I have two only and then I join them like this to match timestamps correctly
var result = from table1 in dataSet.Tables[0].AsEnumerable()
join table2 in dataSet.Tables[1].AsEnumerable()
on table1["Timestamp"] equals table2["Timestamp"]
select new
{
Timestamp = (DateTime)table1["Timestamp"],
Spread = (double)table1["mid"] - 0.4 * (double)table2["mid"],
Power = (double)table1["mid"]
};
//lastly I do a firstordefault over the data as I only want the first timestamp where both are present (this step doesnt return the correct data)
var endres = result.OrderBy(a => a.Timestamp).GroupBy(a => a.Timestamp.ToShortDateString()).FirstOrDefault().ToList();
。。。依此类推,每天只有一个条目被选为所有ID都存在的第一个条目。如果我理解得很好,您想找到包含curveID列表中所有“curveID”的每天的最低时间戳(在您拥有的数据表中) 如果是这样的话,那么我已经写了一段代码来解决这个问题。如果有错误,请在评论中告诉我。使用列表比使用集合和数据表更容易理解。所以我只是使用了您的“ds”数据表并构建了一个indenpendt代码 此外,还需要进行优化,但这会使代码更难理解
List<object> endress = new List<object>();
//filter all timestamps, getting only the date info
var timeStamps = ds.AsEnumerable().Select(r=> ((DateTime)r["Timestamp"]).Date).Distinct();
//for each id
foreach (var timeStamp in timeStamps)
{
//find all the same timestamp (on the same day)
var listSameTimestamp = ds.AsEnumerable().Where(r => ((DateTime)r["Timestamp"]).Date == timeStamp);
var listIds = listSameTimestamp.Select(r => (int)r["curveID"]).Distinct();
//ensure they all have the curveIDs you are looking for
var haveThemAll = curveIds.Intersect(listIds).Count() == curveIds.Count();
if (haveThemAll == false)
continue;
//find the lowest timestamp
var rowFound = listSameTimestamp.OrderBy(r => (DateTime)r["Timestamp"]).FirstOrDefault();
if (rowFound == null)
continue;
//create an anonymous object (coud not understand your needs)
endress.Add(new
{
Timestamp = (DateTime)rowFound["Timestamp"],
Spread = (double)rowFound["mid"] - 0.4 * (double)rowFound["mid"],
Power = (double)rowFound["mid"]
});
}
List endress=new List();
//过滤所有时间戳,仅获取日期信息
var Timestamp=ds.AsEnumerable().Select(r=>((DateTime)r[“Timestamp”]).Date.Distinct();
//对于每个id
foreach(时间戳中的var时间戳)
{
//查找所有相同的时间戳(在同一天)
var listsSametimestamp=ds.AsEnumerable()。其中(r=>((DateTime)r[“Timestamp”])。Date==Timestamp);
var listIds=listsSametimestamp.Select(r=>(int)r[“curveID”]).Distinct();
//确保他们都有你想要的curveIDs
var havethemal=curveIds.Intersect(listIds.Count()==curveIds.Count();
如果(haveThemAll==false)
继续;
//查找最低的时间戳
var rowFound=listSameTimestamp.OrderBy(r=>(DateTime)r[“Timestamp”]).FirstOrDefault();
if(rowfind==null)
继续;
//创建匿名对象(无法理解您的需要)
endress.Add(新的)
{
时间戳=(DateTime)行已找到[“时间戳”],
排列=(双)行发现[“中间”]-0.4*(双)行发现[“中间”],
电源=(双)行已找到[“中间”]
});
}
这是“主要”片段。但您可以在这里看到完整的测试代码:
DataTable ds = new DataTable();
List<int> curveIds = new List<int>() {1,2,3,4};
public void Test()
{
LoadDs();
List<object> endress = new List<object>();
//filter all timestamps, getting only the date info
var timeStamps = ds.AsEnumerable().Select(r=> ((DateTime)r["Timestamp"]).Date).Distinct();
//for each id
foreach (var timeStamp in timeStamps)
{
//find all the same timestamp (on the same day)
var listSameTimestamp = ds.AsEnumerable().Where(r => ((DateTime)r["Timestamp"]).Date == timeStamp);
var listIds = listSameTimestamp.Select(r => (int)r["curveID"]).Distinct();
//ensure they all have the curveIDs you are looking for
var haveThemAll = curveIds.Intersect(listIds).Count() == curveIds.Count();
if (haveThemAll == false)
continue;
//find the lowest timestamp
var rowFound = listSameTimestamp.OrderBy(r => (DateTime)r["Timestamp"]).FirstOrDefault();
if (rowFound == null)
continue;
//create an anonymous object (coud not understand your needs)
endress.Add(new
{
Timestamp = (DateTime)rowFound["Timestamp"],
Spread = (double)rowFound["mid"] - 0.4 * (double)rowFound["mid"],
Power = (double)rowFound["mid"]
});
}
foreach (var o in endress)
{
Console.WriteLine(o);
}
}
public void LoadDs()
{
ds = new DataTable();
ds.Columns.Add("curveID",typeof(int));
ds.Columns.Add("Timestamp", typeof(DateTime));
ds.Columns.Add("mid", typeof(double));
for (int i = 0; i < 50000; i++)
{
Random rand = new Random(i);
var row = ds.NewRow();
row["curveID"] = rand.Next(1,5);
row["Timestamp"] = new DateTime(2016,4, rand.Next(1,5), rand.Next(1,3), 0,0);
row["mid"] = rand.NextDouble();
ds.Rows.Add(row);
}
}
DataTable ds=newdatatable();
List curveIds=new List(){1,2,3,4};
公开无效测试()
{
LoadDs();
List endress=新列表();
//过滤所有时间戳,仅获取日期信息
var Timestamp=ds.AsEnumerable().Select(r=>((DateTime)r[“Timestamp”]).Date.Distinct();
//对于每个id
foreach(时间戳中的var时间戳)
{
//查找所有相同的时间戳(在同一天)
var listsSametimestamp=ds.AsEnumerable()。其中(r=>((DateTime)r[“Timestamp”])。Date==Timestamp);
var listIds=listsSametimestamp.Select(r=>(int)r[“curveID”]).Distinct();
//确保他们都有你想要的curveIDs
var havethemal=curveIds.Intersect(listIds.Count()==curveIds.Count();
如果(haveThemAll==false)
继续;
//查找最低的时间戳
var rowFound=listSameTimestamp.OrderBy(r=>(DateTime)r[“Timestamp”]).FirstOrDefault();
if(rowfind==null)
继续;
//创建匿名对象(无法理解您的需要)
endress.Add(新的)
{
时间戳=(DateTime)行已找到[“时间戳”],
排列=(双)行发现[“中间”]-0.4*(双)行发现[“中间”],
电源=(双)行已找到[“中间”]
});
}
foreach(endress中的var o)
{
控制台写入线(o);
}
}
公共void loads()
{
ds=新数据表();
添加(“curveID”,typeof(int));
添加(“时间戳”,typeof(DateTime));
添加(“中间”,类型(双));
对于(int i=0;i<50000;i++)
{
随机随机数=新随机数(i);
var row=ds.NewRow();
行[“curveID”]=下一个随机数(1,5);
行[“时间戳”]=新日期时间(2016,4,兰特下一个(1,5),兰特下一个(1,3),0,0);
行[“中间”]=rand.NextDouble();
ds.Rows.Add(行);
}
}
如果我理解得很好,您希望找到包含您的curveID列表中所有“curveID”的每天的最低时间戳(在您拥有的数据表中)
如果是这样的话,那么我已经写了一段代码来解决这个问题。如果有错误,请在评论中告诉我。使用列表比使用集合和数据表更容易理解。所以我只是使用了您的“ds”数据表并构建了一个indenpendt代码
此外,还需要进行优化,但这会使代码更难理解
List<object> endress = new List<object>();
//filter all timestamps, getting only the date info
var timeStamps = ds.AsEnumerable().Select(r=> ((DateTime)r["Timestamp"]).Date).Distinct();
//for each id
foreach (var timeStamp in timeStamps)
{
//find all the same timestamp (on the same day)
var listSameTimestamp = ds.AsEnumerable().Where(r => ((DateTime)r["Timestamp"]).Date == timeStamp);
var listIds = listSameTimestamp.Select(r => (int)r["curveID"]).Distinct();
//ensure they all have the curveIDs you are looking for
var haveThemAll = curveIds.Intersect(listIds).Count() == curveIds.Count();
if (haveThemAll == false)
continue;
//find the lowest timestamp
var rowFound = listSameTimestamp.OrderBy(r => (DateTime)r["Timestamp"]).FirstOrDefault();
if (rowFound == null)
continue;
//create an anonymous object (coud not understand your needs)
endress.Add(new
{
Timestamp = (DateTime)rowFound["Timestamp"],
Spread = (double)rowFound["mid"] - 0.4 * (double)rowFound["mid"],
Power = (double)rowFound["mid"]
});
}
List endress=new List();
//过滤所有时间戳,仅获取日期信息
var Timestamp=ds.AsEnumerable().Select(r=>((DateTime)r[“Timestamp”]).Date.Distinct();
//对于每个id
foreach(时间戳中的var时间戳)
{
//查找所有相同的时间戳(在同一天)
var listsSametimestamp=ds.AsEnumerable()。其中(r=>((DateTime)r[“Timestamp”])。Date==Timestamp);
var listIds=listsSametimestamp.Select(r=>(int)r[“curveID”]).Distinct();
//确保他们都有你想要的curveIDs
var havethemal=curveIds.Intersect(listIds.Count()==curveIds.Count();
如果(haveThemAll==false)
继续;
//查找最低的时间戳
var rowFound=listSameTimestamp.OrderBy(r=>(DateTime)r[“Timestamp”]).FirstOrDefault();
if(rowfind==null)
继续;
//创建匿名对象(无法理解您的需要)
endress.Add(新的)
{
时间戳=(DateTime)行已找到[“时间戳”],
排列=(双)行发现[“中间”]-0.4*(双)行发现[“中间”],
电源=(双)行已找到[“中间”]
});
// I'll pretend the curveids are in this list...
List<double> curveids = new List<double>();
DataTable table = ds.Tables["Your table"];
// first get a grouping of timestamps for the day containing all curveids
// setup mindate and maxdate of your choosing...
var grouping = table.AsEnumerable()
.Where(x => curveids.Contains(x.curveid) &&
x.timestamp > mindate &&
x.timestamp < maxdate)
.GroupBy(x => x.timestamp);
// this gives a grouping of IEnumerable<IGrouping<DateTime, YourRowType>>
// i.e. timestamps, and group of rows for each with curveids in your selection
// Now get the minimum timestamp, where all curve ids are present..
DateTime minTimestamp = grouping.Where(x => x.Count(y => y.curveid) == curveids.Count)
.Select(x => x.Key).Min();
// .. now can do what you wish with that...
// For example:
var resultRows = table.AsEnumerable().Where(x =>
x.timestamp == minTimestamp &&
curveids.Contains(x.Close));
//selecting into an object for better readability and access
var result = dt.AsEnumerable().Select(r => new
{
TimeStamp = r.Field<DateTime>("TimeStamp"),
CurveID = r.Field<short>("CurveId"),
Mid = r.Field<double>("Mid")
})
// ignoring rows with different curve ID than in the list
.Where(item => ids.Contains(item.CurveID))
// grouping by timestamp
.GroupBy(item => item.TimeStamp)
// selecting only groups that have all curve Ids
.Where(g => g.Select(i=>i.CurveID).Distinct().Count() == ids.Count)
// grouping the groups by date
.GroupBy(g => g.Key.Date)
.Select(g2 =>
{
// getting the first timestamp group by timestamp
var min = g2.OrderBy(i => i.Key).First();
// getting all the Mid values
var values = min.Select(i => i.Mid).ToList();
// returning the desired computation
return new
{
TimeStamp = min.Key,
Spread = spread(values),
Power = power(values)
};
})
.ToList();