C# 基于连续调用之间的运行时间优化批处理大小
我已经开始尝试创建以下内容:C# 基于连续调用之间的运行时间优化批处理大小,c#,algorithm,batch-processing,mathematical-optimization,C#,Algorithm,Batch Processing,Mathematical Optimization,我已经开始尝试创建以下内容: public static IEnumerable<List<T>> OptimizedBatches<T>(this IEnumerable<T> items) 下面是一个例子: batch length time 1 100ms 2 102ms 4 110ms 8
public static IEnumerable<List<T>> OptimizedBatches<T>(this IEnumerable<T> items)
下面是一个例子:
batch length time
1 100ms
2 102ms
4 110ms
8 111ms
16 118ms
32 119ms
64 134ms
128 500ms <-- doubled length but time it took more than doubled
256 1100ms <-- oh no!!
批次长度时间
1100毫秒
2 102ms
4 110毫秒
811ms
16 118ms
32 119ms
64134ms
128 500毫秒< P>我在这里看到的主要问题是创建“最佳规模”,也就是说,为什么你认为32 -119ms是可接受的,而256–1100ms是不可接受的。或者为什么某些配置比其他配置更好
完成后,算法将变得简单:只需返回每个输入条件的排名值,并根据“哪个值更高”做出决策
创建此量表的第一步是找出更能描述理想行为的变量。简单的第一种方法:长度/时间。也就是说,根据您的输入:
batch length time ratio1
1 100ms 0.01
2 102ms 0.019
4 110ms 0.036
8 111ms 0.072
16 118ms 0.136
32 119ms 0.269
64 134ms 0.478
128 500ms 0.256
256 1100ms 0.233
比率越大越好。从逻辑上讲,长度为32的0.269与长度为128的0.256并不相同,因此必须考虑更多的信息
您可以创建一个更复杂的排名比率,更好地加权两个相关变量(例如,尝试不同的指数)。但我认为解决这个问题的最佳方法是创建一个“区域”系统,并根据它计算一个通用的排名。例如:
Zone 1 -> length from 1 to 8; ideal ratio for this zone = 0.1
Zone 2 -> length from 9 to 32; ideal ratio for this zone = 0.3
Zone 3 -> length from 33 to 64; ideal ratio for this zone = 0.45
Zone 4 -> length from 65 to 256; ideal ratio for this zone = 0.35
与每个配置相关联的排名将是给定区域的理想值与给定比率1的结果
2 102ms 0.019 -> (zone 1) 0.019/0.1 = 0.19 (or 1.9 in a 0-10 scale)
16 118ms 0.136 -> (zone 2) 0.136/0.3 = 0.45 (or 4.5 in a 0-10 scale)
etc.
可以比较这些值,因此您会自动知道第二种情况比第一种情况好得多
这只是一个简单的例子,但我想这提供了一个足够好的洞察什么是真正的问题:建立一个准确的排名,允许完美地确定哪个配置更好 我会采用瓦罗卡巴斯建议的排名方法
以下是一个初始实现,可以帮助您开始:
public sealed class DataFlowOptimizer<T>
{
private readonly IEnumerable<T> _collection;
private RateRecord bestRate = RateRecord.Default;
private uint batchLength = 1;
private struct RateRecord
{
public static RateRecord Default = new RateRecord { Length = 1, ElapsedTicks = 0 };
private float _rate;
public int Length { get; set; }
public long ElapsedTicks { get; set; }
public float Rate
{
get
{
if(_rate == default(float) && ElapsedTicks > 0)
{
_rate = ((float)Length) / ElapsedTicks;
}
return _rate;
}
}
}
public DataFlowOptimizer(IEnumerable<T> collection)
{
_collection = collection;
}
public int BatchLength { get { return (int)batchLength; } }
public float Rate { get { return bestRate.Rate; } }
public IEnumerable<IList<T>> GetBatch()
{
var stopwatch = new Stopwatch();
var batch = new List<T>();
var benchmarks = new List<RateRecord>(5);
IEnumerator<T> enumerator = null;
try
{
enumerator = _collection.GetEnumerator();
uint count = 0;
stopwatch.Start();
while(enumerator.MoveNext())
{
if(count == batchLength)
{
benchmarks.Add(new RateRecord { Length = BatchLength, ElapsedTicks = stopwatch.ElapsedTicks });
var currentBatch = batch.ToList();
batch.Clear();
if(benchmarks.Count == 10)
{
var currentRate = benchmarks.Average(x => x.Rate);
if(currentRate > bestRate.Rate)
{
bestRate = new RateRecord { Length = BatchLength, ElapsedTicks = (long)benchmarks.Average(x => x.ElapsedTicks) };
batchLength = NextPowerOf2(batchLength);
}
// Set margin of error at 10%
else if((bestRate.Rate * .9) > currentRate)
{
// Shift the current length and make sure it's >= 1
var currentPowOf2 = ((batchLength >> 1) | 1);
batchLength = PreviousPowerOf2(currentPowOf2);
}
benchmarks.Clear();
}
count = 0;
stopwatch.Restart();
yield return currentBatch;
}
batch.Add(enumerator.Current);
count++;
}
}
finally
{
if(enumerator != null)
enumerator.Dispose();
}
stopwatch.Stop();
}
uint PreviousPowerOf2(uint x)
{
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
return x - (x >> 1);
}
uint NextPowerOf2(uint x)
{
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
return (x+1);
}
}
公共密封类数据流优化器
{
私有只读IEnumerable\u集合;
private RateRecord bestRate=RateRecord.Default;
专用uint batchLength=1;
私有结构速率记录
{
public static ratecord Default=new ratecord{Length=1,ElapsedTicks=0};
私人浮动利率;
公共整数长度{get;set;}
公共长ElapsedTicks{get;set;}
公众浮动汇率
{
得到
{
如果(_rate==默认值(浮动)&&ElapsedTicks>0)
{
_速率=((浮动)长度)/ElapsedTicks;
}
回报率;
}
}
}
公共数据流优化器(IEnumerable集合)
{
_收集=收集;
}
public int BatchLength{get{return(int)BatchLength;}}
公共浮动利率{get{return bestRate.Rate;}
公共IEnumerable GetBatch()
{
var stopwatch=新秒表();
var batch=新列表();
var基准=新列表(5);
IEnumerator枚举数=null;
尝试
{
枚举数=_集合。GetEnumerator();
单位计数=0;
秒表。开始();
while(枚举数.MoveNext())
{
if(count==batchLength)
{
添加(新速率记录{Length=BatchLength,ElapsedTicks=stopwatch.ElapsedTicks});
var currentBatch=batch.ToList();
batch.Clear();
如果(benchmarks.Count==10)
{
var currentRate=基准平均值(x=>x.Rate);
如果(当前速率>最佳速率)
{
bestRate=new RateRecord{Length=BatchLength,ElapsedTicks=(长)基准。平均值(x=>x.ElapsedTicks)};
batchLength=NextPowerOf2(batchLength);
}
//将误差幅度设置为10%
否则如果((bestRate.Rate*.9)>当前速率)
{
//移动当前长度并确保其>=1
var CurrentPow2=((batchLength>>1)| 1);
batchLength=以前的功率2(当前功率2);
}
基准。清除();
}
计数=0;
stopwatch.Restart();
批量生产;
}
batch.Add(枚举数.Current);
计数++;
}
}
最后
{
if(枚举数!=null)
枚举数。Dispose();
}
秒表;
}
uint先前的幂2(uint x)
{
x |=(x>>1);
x |=(x>>2);
x |=(x>>4);
x |=(x>>8);
x |=(x>>16);
返回x-(x>>1);
}
uint NextPowerOf2(uint x)
{
x |=(x>>1);
x |=(x>>2);
x |=(x>>4);
x |=(x>>8);
x |=(x>>16);
返回(x+1);
}
}
LinqPad中的示例程序:
public IEnumerable<int> GetData()
{
return Enumerable.Range(0, 100000000);
}
void Main()
{
var optimizer = new DataFlowOptimizer<int>(GetData());
foreach(var batch in optimizer.GetBatch())
{
string.Format("Length: {0} Rate {1}", optimizer.BatchLength, optimizer.Rate).Dump();
}
}
public IEnumerable GetData()
{
返回可枚举的范围(0,100000000);
}
void Main()
{
var optimizer=newdataflowoptimizer(GetData());
foreach(optimizer.GetBatch()中的var batch)
{
Format(“长度:{0}速率{1}”,optimizer.BatchLength,optimizer.Rate).Dump();
}
}
描述一个目标函数f
,它将批量s
和运行时t(s)
映射到分数f(s,t(s))
尝试大量的s
值,并为每个值计算f(s,t(s))
选择要最大化的s
值
public sealed class DataFlowOptimizer<T>
{
private readonly IEnumerable<T> _collection;
private RateRecord bestRate = RateRecord.Default;
private uint batchLength = 1;
private struct RateRecord
{
public static RateRecord Default = new RateRecord { Length = 1, ElapsedTicks = 0 };
private float _rate;
public int Length { get; set; }
public long ElapsedTicks { get; set; }
public float Rate
{
get
{
if(_rate == default(float) && ElapsedTicks > 0)
{
_rate = ((float)Length) / ElapsedTicks;
}
return _rate;
}
}
}
public DataFlowOptimizer(IEnumerable<T> collection)
{
_collection = collection;
}
public int BatchLength { get { return (int)batchLength; } }
public float Rate { get { return bestRate.Rate; } }
public IEnumerable<IList<T>> GetBatch()
{
var stopwatch = new Stopwatch();
var batch = new List<T>();
var benchmarks = new List<RateRecord>(5);
IEnumerator<T> enumerator = null;
try
{
enumerator = _collection.GetEnumerator();
uint count = 0;
stopwatch.Start();
while(enumerator.MoveNext())
{
if(count == batchLength)
{
benchmarks.Add(new RateRecord { Length = BatchLength, ElapsedTicks = stopwatch.ElapsedTicks });
var currentBatch = batch.ToList();
batch.Clear();
if(benchmarks.Count == 10)
{
var currentRate = benchmarks.Average(x => x.Rate);
if(currentRate > bestRate.Rate)
{
bestRate = new RateRecord { Length = BatchLength, ElapsedTicks = (long)benchmarks.Average(x => x.ElapsedTicks) };
batchLength = NextPowerOf2(batchLength);
}
// Set margin of error at 10%
else if((bestRate.Rate * .9) > currentRate)
{
// Shift the current length and make sure it's >= 1
var currentPowOf2 = ((batchLength >> 1) | 1);
batchLength = PreviousPowerOf2(currentPowOf2);
}
benchmarks.Clear();
}
count = 0;
stopwatch.Restart();
yield return currentBatch;
}
batch.Add(enumerator.Current);
count++;
}
}
finally
{
if(enumerator != null)
enumerator.Dispose();
}
stopwatch.Stop();
}
uint PreviousPowerOf2(uint x)
{
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
return x - (x >> 1);
}
uint NextPowerOf2(uint x)
{
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
return (x+1);
}
}
public IEnumerable<int> GetData()
{
return Enumerable.Range(0, 100000000);
}
void Main()
{
var optimizer = new DataFlowOptimizer<int>(GetData());
foreach(var batch in optimizer.GetBatch())
{
string.Format("Length: {0} Rate {1}", optimizer.BatchLength, optimizer.Rate).Dump();
}
}