Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/csharp/273.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/algorithm/11.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C# 基于连续调用之间的运行时间优化批处理大小_C#_Algorithm_Batch Processing_Mathematical Optimization - Fatal编程技术网

C# 基于连续调用之间的运行时间优化批处理大小

C# 基于连续调用之间的运行时间优化批处理大小,c#,algorithm,batch-processing,mathematical-optimization,C#,Algorithm,Batch Processing,Mathematical Optimization,我已经开始尝试创建以下内容: public static IEnumerable<List<T>> OptimizedBatches<T>(this IEnumerable<T> items) 下面是一个例子: batch length time 1 100ms 2 102ms 4 110ms 8

我已经开始尝试创建以下内容:

public static IEnumerable<List<T>> OptimizedBatches<T>(this IEnumerable<T> items)
下面是一个例子:

batch length         time
    1                 100ms
    2                 102ms
    4                 110ms
    8                 111ms
    16                118ms
    32                119ms
    64                134ms
    128               500ms <-- doubled length but time it took more than doubled
    256               1100ms <-- oh no!!
批次长度时间
1100毫秒
2 102ms
4 110毫秒
811ms
16 118ms
32 119ms
64134ms

128 500毫秒< P>我在这里看到的主要问题是创建“最佳规模”,也就是说,为什么你认为32 -119ms是可接受的,而256–1100ms是不可接受的。或者为什么某些配置比其他配置更好

完成后,算法将变得简单:只需返回每个输入条件的排名值,并根据“哪个值更高”做出决策

创建此量表的第一步是找出更能描述理想行为的变量。简单的第一种方法:长度/时间。也就是说,根据您的输入:

batch length           time             ratio1
    1                 100ms              0.01
    2                 102ms              0.019  
    4                 110ms              0.036  
    8                 111ms              0.072
    16                118ms              0.136
    32                119ms              0.269  
    64                134ms              0.478
    128               500ms              0.256
    256               1100ms             0.233
比率越大越好。从逻辑上讲,长度为32的0.269与长度为128的0.256并不相同,因此必须考虑更多的信息

您可以创建一个更复杂的排名比率,更好地加权两个相关变量(例如,尝试不同的指数)。但我认为解决这个问题的最佳方法是创建一个“区域”系统,并根据它计算一个通用的排名。例如:

Zone 1 -> length from 1 to 8; ideal ratio for this zone = 0.1
Zone 2 -> length from 9 to 32; ideal ratio for this zone = 0.3
Zone 3 -> length from 33 to 64; ideal ratio for this zone = 0.45
Zone 4 -> length from 65 to 256; ideal ratio for this zone = 0.35
与每个配置相关联的排名将是给定区域的理想值与给定比率1的结果

2      102ms        0.019 -> (zone 1) 0.019/0.1 = 0.19 (or 1.9 in a 0-10 scale)
16     118ms        0.136 -> (zone 2) 0.136/0.3 = 0.45 (or 4.5 in a 0-10 scale)  
etc.
可以比较这些值,因此您会自动知道第二种情况比第一种情况好得多


这只是一个简单的例子,但我想这提供了一个足够好的洞察什么是真正的问题:建立一个准确的排名,允许完美地确定哪个配置更好

我会采用瓦罗卡巴斯建议的排名方法

以下是一个初始实现,可以帮助您开始:

public sealed class DataFlowOptimizer<T>
{
    private readonly IEnumerable<T> _collection;
    private RateRecord bestRate = RateRecord.Default;
    private uint batchLength = 1;

    private struct RateRecord
    {
        public static RateRecord Default = new RateRecord { Length = 1, ElapsedTicks = 0 };
        private float _rate;

        public int Length { get; set; }
        public long ElapsedTicks { get; set; }
        public float Rate
        {
            get
            {
                if(_rate == default(float) && ElapsedTicks > 0)
                {
                    _rate = ((float)Length) / ElapsedTicks;
                }

                return _rate;
            }
        }
    }

    public DataFlowOptimizer(IEnumerable<T> collection)
    {
        _collection = collection;
    }

    public int BatchLength { get { return (int)batchLength; } }
    public float Rate { get { return bestRate.Rate; } }

    public IEnumerable<IList<T>> GetBatch()
    {
        var stopwatch = new Stopwatch();

        var batch = new List<T>();
        var benchmarks = new List<RateRecord>(5);
        IEnumerator<T> enumerator = null;

        try
        {
            enumerator = _collection.GetEnumerator();

            uint count = 0;
            stopwatch.Start();

            while(enumerator.MoveNext())
            {   
                if(count == batchLength)
                {
                    benchmarks.Add(new RateRecord { Length = BatchLength, ElapsedTicks = stopwatch.ElapsedTicks });

                    var currentBatch = batch.ToList();
                    batch.Clear();

                    if(benchmarks.Count == 10)
                    {
                        var currentRate = benchmarks.Average(x => x.Rate);
                        if(currentRate > bestRate.Rate)
                        {
                            bestRate = new RateRecord { Length = BatchLength, ElapsedTicks = (long)benchmarks.Average(x => x.ElapsedTicks) };
                            batchLength = NextPowerOf2(batchLength);
                        }
                        // Set margin of error at 10%
                        else if((bestRate.Rate * .9) > currentRate)
                        {
                            // Shift the current length and make sure it's >= 1
                            var currentPowOf2 = ((batchLength >> 1) | 1);
                            batchLength = PreviousPowerOf2(currentPowOf2);
                        }

                        benchmarks.Clear();
                    }
                    count = 0;
                    stopwatch.Restart();

                    yield return currentBatch;
                }

                batch.Add(enumerator.Current);
                count++;
            }
        }
        finally
        {
            if(enumerator != null)
                enumerator.Dispose();
        }

        stopwatch.Stop();
    }

    uint PreviousPowerOf2(uint x)
    {
        x |= (x >> 1);
        x |= (x >> 2);
        x |= (x >> 4);
        x |= (x >> 8);
        x |= (x >> 16);

        return x - (x >> 1);
    }

    uint NextPowerOf2(uint x)
    {
        x |= (x >> 1);
        x |= (x >> 2);
        x |= (x >> 4);
        x |= (x >> 8);
        x |= (x >> 16);

        return (x+1);
    }
}
公共密封类数据流优化器
{
私有只读IEnumerable\u集合;
private RateRecord bestRate=RateRecord.Default;
专用uint batchLength=1;
私有结构速率记录
{
public static ratecord Default=new ratecord{Length=1,ElapsedTicks=0};
私人浮动利率;
公共整数长度{get;set;}
公共长ElapsedTicks{get;set;}
公众浮动汇率
{
得到
{
如果(_rate==默认值(浮动)&&ElapsedTicks>0)
{
_速率=((浮动)长度)/ElapsedTicks;
}
回报率;
}
}
}
公共数据流优化器(IEnumerable集合)
{
_收集=收集;
}
public int BatchLength{get{return(int)BatchLength;}}
公共浮动利率{get{return bestRate.Rate;}
公共IEnumerable GetBatch()
{
var stopwatch=新秒表();
var batch=新列表();
var基准=新列表(5);
IEnumerator枚举数=null;
尝试
{
枚举数=_集合。GetEnumerator();
单位计数=0;
秒表。开始();
while(枚举数.MoveNext())
{   
if(count==batchLength)
{
添加(新速率记录{Length=BatchLength,ElapsedTicks=stopwatch.ElapsedTicks});
var currentBatch=batch.ToList();
batch.Clear();
如果(benchmarks.Count==10)
{
var currentRate=基准平均值(x=>x.Rate);
如果(当前速率>最佳速率)
{
bestRate=new RateRecord{Length=BatchLength,ElapsedTicks=(长)基准。平均值(x=>x.ElapsedTicks)};
batchLength=NextPowerOf2(batchLength);
}
//将误差幅度设置为10%
否则如果((bestRate.Rate*.9)>当前速率)
{
//移动当前长度并确保其>=1
var CurrentPow2=((batchLength>>1)| 1);
batchLength=以前的功率2(当前功率2);
}
基准。清除();
}
计数=0;
stopwatch.Restart();
批量生产;
}
batch.Add(枚举数.Current);
计数++;
}
}
最后
{
if(枚举数!=null)
枚举数。Dispose();
}
秒表;
}
uint先前的幂2(uint x)
{
x |=(x>>1);
x |=(x>>2);
x |=(x>>4);
x |=(x>>8);
x |=(x>>16);
返回x-(x>>1);
}
uint NextPowerOf2(uint x)
{
x |=(x>>1);
x |=(x>>2);
x |=(x>>4);
x |=(x>>8);
x |=(x>>16);
返回(x+1);
}
}
LinqPad中的示例程序:

public IEnumerable<int> GetData()
{
    return Enumerable.Range(0, 100000000);
}

void Main()
{
    var optimizer = new DataFlowOptimizer<int>(GetData());

    foreach(var batch in optimizer.GetBatch())
    {
        string.Format("Length: {0} Rate {1}", optimizer.BatchLength, optimizer.Rate).Dump();
    }
}
public IEnumerable GetData()
{
返回可枚举的范围(0,100000000);
}
void Main()
{
var optimizer=newdataflowoptimizer(GetData());
foreach(optimizer.GetBatch()中的var batch)
{
Format(“长度:{0}速率{1}”,optimizer.BatchLength,optimizer.Rate).Dump();
}
}
  • 描述一个目标函数
    f
    ,它将批量
    s
    和运行时
    t(s)
    映射到分数
    f(s,t(s))
  • 尝试大量的
    s
    值,并为每个值计算
    f(s,t(s))
  • 选择要最大化的
    s
    public sealed class DataFlowOptimizer<T>
    {
        private readonly IEnumerable<T> _collection;
        private RateRecord bestRate = RateRecord.Default;
        private uint batchLength = 1;
    
        private struct RateRecord
        {
            public static RateRecord Default = new RateRecord { Length = 1, ElapsedTicks = 0 };
            private float _rate;
    
            public int Length { get; set; }
            public long ElapsedTicks { get; set; }
            public float Rate
            {
                get
                {
                    if(_rate == default(float) && ElapsedTicks > 0)
                    {
                        _rate = ((float)Length) / ElapsedTicks;
                    }
    
                    return _rate;
                }
            }
        }
    
        public DataFlowOptimizer(IEnumerable<T> collection)
        {
            _collection = collection;
        }
    
        public int BatchLength { get { return (int)batchLength; } }
        public float Rate { get { return bestRate.Rate; } }
    
        public IEnumerable<IList<T>> GetBatch()
        {
            var stopwatch = new Stopwatch();
    
            var batch = new List<T>();
            var benchmarks = new List<RateRecord>(5);
            IEnumerator<T> enumerator = null;
    
            try
            {
                enumerator = _collection.GetEnumerator();
    
                uint count = 0;
                stopwatch.Start();
    
                while(enumerator.MoveNext())
                {   
                    if(count == batchLength)
                    {
                        benchmarks.Add(new RateRecord { Length = BatchLength, ElapsedTicks = stopwatch.ElapsedTicks });
    
                        var currentBatch = batch.ToList();
                        batch.Clear();
    
                        if(benchmarks.Count == 10)
                        {
                            var currentRate = benchmarks.Average(x => x.Rate);
                            if(currentRate > bestRate.Rate)
                            {
                                bestRate = new RateRecord { Length = BatchLength, ElapsedTicks = (long)benchmarks.Average(x => x.ElapsedTicks) };
                                batchLength = NextPowerOf2(batchLength);
                            }
                            // Set margin of error at 10%
                            else if((bestRate.Rate * .9) > currentRate)
                            {
                                // Shift the current length and make sure it's >= 1
                                var currentPowOf2 = ((batchLength >> 1) | 1);
                                batchLength = PreviousPowerOf2(currentPowOf2);
                            }
    
                            benchmarks.Clear();
                        }
                        count = 0;
                        stopwatch.Restart();
    
                        yield return currentBatch;
                    }
    
                    batch.Add(enumerator.Current);
                    count++;
                }
            }
            finally
            {
                if(enumerator != null)
                    enumerator.Dispose();
            }
    
            stopwatch.Stop();
        }
    
        uint PreviousPowerOf2(uint x)
        {
            x |= (x >> 1);
            x |= (x >> 2);
            x |= (x >> 4);
            x |= (x >> 8);
            x |= (x >> 16);
    
            return x - (x >> 1);
        }
    
        uint NextPowerOf2(uint x)
        {
            x |= (x >> 1);
            x |= (x >> 2);
            x |= (x >> 4);
            x |= (x >> 8);
            x |= (x >> 16);
    
            return (x+1);
        }
    }
    
    public IEnumerable<int> GetData()
    {
        return Enumerable.Range(0, 100000000);
    }
    
    void Main()
    {
        var optimizer = new DataFlowOptimizer<int>(GetData());
    
        foreach(var batch in optimizer.GetBatch())
        {
            string.Format("Length: {0} Rate {1}", optimizer.BatchLength, optimizer.Rate).Dump();
        }
    }