C# datatable中筛选器内部联接的替代方法

C# datatable中筛选器内部联接的替代方法,c#,linq,C#,Linq,我有一个数据表,我想选择每天的第一个条目,其中所有curveID都存在。我能想到的唯一方法是使用连接,因为只有在两个数据集都存在的情况下,它才会自动连接 以下是我到目前为止的情况: //core data from sql (I have little control over this) DataTable ds = new DataTable(); da.Fill(ds); //creating dataset with various tables based on curveIDs

我有一个数据表,我想选择每天的第一个条目,其中所有
curveID
都存在。我能想到的唯一方法是使用连接,因为只有在两个数据集都存在的情况下,它才会自动连接

以下是我到目前为止的情况:

//core data from sql (I have little control over this)
DataTable ds = new DataTable(); 
da.Fill(ds);

//creating dataset with various tables based on curveIDs I look for
System.Data.DataSet dataSet = new System.Data.DataSet();
            for (int i = 0; i < curveIds.Length; i++)
            {
                    dataSet.Tables.Add(ds.AsEnumerable().Where(x => x.Field<short>("curveID") == curveIds[i]).CopyToDataTable());
            }

//lets say I have two only and then I join them like this to match timestamps correctly
var result = from table1 in dataSet.Tables[0].AsEnumerable()
                         join table2 in dataSet.Tables[1].AsEnumerable() 
                         on table1["Timestamp"] equals table2["Timestamp"]
                         select new
                         {
                             Timestamp = (DateTime)table1["Timestamp"],
                             Spread = (double)table1["mid"] - 0.4 * (double)table2["mid"],
                             Power = (double)table1["mid"]
                         };

//lastly I do a firstordefault over the data as I only want the first timestamp where both are present (this step doesnt return the correct data)
var endres = result.OrderBy(a => a.Timestamp).GroupBy(a => a.Timestamp.ToShortDateString()).FirstOrDefault().ToList();

。。。依此类推,每天只有一个条目被选为所有ID都存在的第一个条目。

如果我理解得很好,您想找到包含curveID列表中所有“curveID”的每天的最低时间戳(在您拥有的数据表中)

如果是这样的话,那么我已经写了一段代码来解决这个问题。如果有错误,请在评论中告诉我。使用列表比使用集合和数据表更容易理解。所以我只是使用了您的“ds”数据表并构建了一个indenpendt代码

此外,还需要进行优化,但这会使代码更难理解

List<object> endress = new List<object>();

        //filter all timestamps, getting only the date info
        var timeStamps = ds.AsEnumerable().Select(r=> ((DateTime)r["Timestamp"]).Date).Distinct();

        //for each id
        foreach (var timeStamp in timeStamps)
        {
            //find all the same timestamp (on the same day)
            var listSameTimestamp = ds.AsEnumerable().Where(r => ((DateTime)r["Timestamp"]).Date == timeStamp);

            var listIds = listSameTimestamp.Select(r => (int)r["curveID"]).Distinct();

            //ensure they all have the curveIDs you are looking for
            var haveThemAll = curveIds.Intersect(listIds).Count() == curveIds.Count();

            if (haveThemAll == false)
                continue;

            //find the lowest timestamp
            var rowFound = listSameTimestamp.OrderBy(r => (DateTime)r["Timestamp"]).FirstOrDefault();
            if (rowFound == null)
                continue;

            //create an anonymous object (coud not understand your needs)
            endress.Add(new
            {
                Timestamp = (DateTime)rowFound["Timestamp"],
                Spread = (double)rowFound["mid"] - 0.4 * (double)rowFound["mid"],
                Power = (double)rowFound["mid"]
            });                   
        }
List endress=new List();
//过滤所有时间戳,仅获取日期信息
var Timestamp=ds.AsEnumerable().Select(r=>((DateTime)r[“Timestamp”]).Date.Distinct();
//对于每个id
foreach(时间戳中的var时间戳)
{
//查找所有相同的时间戳(在同一天)
var listsSametimestamp=ds.AsEnumerable()。其中(r=>((DateTime)r[“Timestamp”])。Date==Timestamp);
var listIds=listsSametimestamp.Select(r=>(int)r[“curveID”]).Distinct();
//确保他们都有你想要的curveIDs
var havethemal=curveIds.Intersect(listIds.Count()==curveIds.Count();
如果(haveThemAll==false)
继续;
//查找最低的时间戳
var rowFound=listSameTimestamp.OrderBy(r=>(DateTime)r[“Timestamp”]).FirstOrDefault();
if(rowfind==null)
继续;
//创建匿名对象(无法理解您的需要)
endress.Add(新的)
{
时间戳=(DateTime)行已找到[“时间戳”],
排列=(双)行发现[“中间”]-0.4*(双)行发现[“中间”],
电源=(双)行已找到[“中间”]
});                   
}
这是“主要”片段。但您可以在这里看到完整的测试代码:

DataTable ds = new DataTable();
List<int> curveIds = new List<int>() {1,2,3,4};

public void Test()
{
    LoadDs();

    List<object> endress = new List<object>();

    //filter all timestamps, getting only the date info
    var timeStamps = ds.AsEnumerable().Select(r=> ((DateTime)r["Timestamp"]).Date).Distinct();

    //for each id
    foreach (var timeStamp in timeStamps)
    {
        //find all the same timestamp (on the same day)
        var listSameTimestamp = ds.AsEnumerable().Where(r => ((DateTime)r["Timestamp"]).Date == timeStamp);

        var listIds = listSameTimestamp.Select(r => (int)r["curveID"]).Distinct();

        //ensure they all have the curveIDs you are looking for
        var haveThemAll = curveIds.Intersect(listIds).Count() == curveIds.Count();

        if (haveThemAll == false)
            continue;

        //find the lowest timestamp
        var rowFound = listSameTimestamp.OrderBy(r => (DateTime)r["Timestamp"]).FirstOrDefault();
        if (rowFound == null)
            continue;

        //create an anonymous object (coud not understand your needs)
        endress.Add(new
        {
            Timestamp = (DateTime)rowFound["Timestamp"],
            Spread = (double)rowFound["mid"] - 0.4 * (double)rowFound["mid"],
            Power = (double)rowFound["mid"]
        });                   
    }


    foreach (var o in endress)
    {
        Console.WriteLine(o);
    }
}

public void LoadDs()
{
    ds = new DataTable();
    ds.Columns.Add("curveID",typeof(int));
    ds.Columns.Add("Timestamp", typeof(DateTime));
    ds.Columns.Add("mid", typeof(double));

    for (int i = 0; i < 50000; i++)
    {
        Random rand = new Random(i);
        var row = ds.NewRow();
        row["curveID"] = rand.Next(1,5);
        row["Timestamp"] = new  DateTime(2016,4, rand.Next(1,5), rand.Next(1,3), 0,0);
        row["mid"] = rand.NextDouble();

        ds.Rows.Add(row);
    }
}
DataTable ds=newdatatable();
List curveIds=new List(){1,2,3,4};
公开无效测试()
{
LoadDs();
List endress=新列表();
//过滤所有时间戳,仅获取日期信息
var Timestamp=ds.AsEnumerable().Select(r=>((DateTime)r[“Timestamp”]).Date.Distinct();
//对于每个id
foreach(时间戳中的var时间戳)
{
//查找所有相同的时间戳(在同一天)
var listsSametimestamp=ds.AsEnumerable()。其中(r=>((DateTime)r[“Timestamp”])。Date==Timestamp);
var listIds=listsSametimestamp.Select(r=>(int)r[“curveID”]).Distinct();
//确保他们都有你想要的curveIDs
var havethemal=curveIds.Intersect(listIds.Count()==curveIds.Count();
如果(haveThemAll==false)
继续;
//查找最低的时间戳
var rowFound=listSameTimestamp.OrderBy(r=>(DateTime)r[“Timestamp”]).FirstOrDefault();
if(rowfind==null)
继续;
//创建匿名对象(无法理解您的需要)
endress.Add(新的)
{
时间戳=(DateTime)行已找到[“时间戳”],
排列=(双)行发现[“中间”]-0.4*(双)行发现[“中间”],
电源=(双)行已找到[“中间”]
});                   
}
foreach(endress中的var o)
{
控制台写入线(o);
}
}
公共void loads()
{
ds=新数据表();
添加(“curveID”,typeof(int));
添加(“时间戳”,typeof(DateTime));
添加(“中间”,类型(双));
对于(int i=0;i<50000;i++)
{
随机随机数=新随机数(i);
var row=ds.NewRow();
行[“curveID”]=下一个随机数(1,5);
行[“时间戳”]=新日期时间(2016,4,兰特下一个(1,5),兰特下一个(1,3),0,0);
行[“中间”]=rand.NextDouble();
ds.Rows.Add(行);
}
}

如果我理解得很好,您希望找到包含您的curveID列表中所有“curveID”的每天的最低时间戳(在您拥有的数据表中)

如果是这样的话,那么我已经写了一段代码来解决这个问题。如果有错误,请在评论中告诉我。使用列表比使用集合和数据表更容易理解。所以我只是使用了您的“ds”数据表并构建了一个indenpendt代码

此外,还需要进行优化,但这会使代码更难理解

List<object> endress = new List<object>();

        //filter all timestamps, getting only the date info
        var timeStamps = ds.AsEnumerable().Select(r=> ((DateTime)r["Timestamp"]).Date).Distinct();

        //for each id
        foreach (var timeStamp in timeStamps)
        {
            //find all the same timestamp (on the same day)
            var listSameTimestamp = ds.AsEnumerable().Where(r => ((DateTime)r["Timestamp"]).Date == timeStamp);

            var listIds = listSameTimestamp.Select(r => (int)r["curveID"]).Distinct();

            //ensure they all have the curveIDs you are looking for
            var haveThemAll = curveIds.Intersect(listIds).Count() == curveIds.Count();

            if (haveThemAll == false)
                continue;

            //find the lowest timestamp
            var rowFound = listSameTimestamp.OrderBy(r => (DateTime)r["Timestamp"]).FirstOrDefault();
            if (rowFound == null)
                continue;

            //create an anonymous object (coud not understand your needs)
            endress.Add(new
            {
                Timestamp = (DateTime)rowFound["Timestamp"],
                Spread = (double)rowFound["mid"] - 0.4 * (double)rowFound["mid"],
                Power = (double)rowFound["mid"]
            });                   
        }
List endress=new List();
//过滤所有时间戳,仅获取日期信息
var Timestamp=ds.AsEnumerable().Select(r=>((DateTime)r[“Timestamp”]).Date.Distinct();
//对于每个id
foreach(时间戳中的var时间戳)
{
//查找所有相同的时间戳(在同一天)
var listsSametimestamp=ds.AsEnumerable()。其中(r=>((DateTime)r[“Timestamp”])。Date==Timestamp);
var listIds=listsSametimestamp.Select(r=>(int)r[“curveID”]).Distinct();
//确保他们都有你想要的curveIDs
var havethemal=curveIds.Intersect(listIds.Count()==curveIds.Count();
如果(haveThemAll==false)
继续;
//查找最低的时间戳
var rowFound=listSameTimestamp.OrderBy(r=>(DateTime)r[“Timestamp”]).FirstOrDefault();
if(rowfind==null)
继续;
//创建匿名对象(无法理解您的需要)
endress.Add(新的)
{
时间戳=(DateTime)行已找到[“时间戳”],
排列=(双)行发现[“中间”]-0.4*(双)行发现[“中间”],
电源=(双)行已找到[“中间”]
});
// I'll pretend the curveids are in this list...
List<double> curveids = new List<double>();

DataTable table = ds.Tables["Your table"];

// first get a grouping of timestamps for the day containing all curveids
// setup mindate and maxdate of your choosing...
var grouping = table.AsEnumerable()
    .Where(x => curveids.Contains(x.curveid) && 
                x.timestamp > mindate &&
                x.timestamp < maxdate)
    .GroupBy(x => x.timestamp);
// this gives a grouping of IEnumerable<IGrouping<DateTime, YourRowType>> 
// i.e. timestamps, and group of rows for each with curveids in your selection

// Now get the minimum timestamp, where all curve ids are present..
DateTime minTimestamp = grouping.Where(x => x.Count(y => y.curveid) == curveids.Count)
                                .Select(x => x.Key).Min();

// .. now can do what you wish with that...
// For example:
var resultRows = table.AsEnumerable().Where(x => 
                    x.timestamp == minTimestamp &&
                    curveids.Contains(x.Close));
    //selecting into an object for better readability and access
    var result = dt.AsEnumerable().Select(r => new
    {
        TimeStamp = r.Field<DateTime>("TimeStamp"),
        CurveID = r.Field<short>("CurveId"),
        Mid = r.Field<double>("Mid")
    })
    // ignoring rows with different curve ID than in the list
    .Where(item => ids.Contains(item.CurveID))
    // grouping by timestamp
    .GroupBy(item => item.TimeStamp)
    // selecting only groups that have all curve Ids
    .Where(g => g.Select(i=>i.CurveID).Distinct().Count() == ids.Count)
    // grouping the groups by date
    .GroupBy(g => g.Key.Date)
    .Select(g2 =>
    {
        // getting the first timestamp group by timestamp
        var min = g2.OrderBy(i => i.Key).First();
        // getting all the Mid values
        var values = min.Select(i => i.Mid).ToList();
        // returning the desired computation
        return new
        {
            TimeStamp = min.Key,
            Spread = spread(values),
            Power = power(values)
        };
    })
    .ToList();