Warning: file_get_contents(/data/phpspider/zhask/data//catemap/0/performance/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
如何最大化进程吞吐量(C#)?_C#_Performance_System.reactive_Tpl Dataflow_Blockingcollection - Fatal编程技术网

如何最大化进程吞吐量(C#)?

如何最大化进程吞吐量(C#)?,c#,performance,system.reactive,tpl-dataflow,blockingcollection,C#,Performance,System.reactive,Tpl Dataflow,Blockingcollection,我想以最大吞吐量处理一些文件。文件的路径保存在数据库中。我需要从数据库中获取文件路径,将其状态更改为“正在处理”,对其进行处理,然后将其状态更改为“已完成”或“失败” 目前,我分批获取文件(100个文件),以减少完成的查询数量并并行处理它们(并行度为10)。但这样一来,在批处理快结束时,我就失去了吞吐量。当批处理中剩余的文件少于10个时,并行度不再是10,而是降低 以下是我所拥有的: private async Task CopyPendingFilesAsync(SourcePath sour

我想以最大吞吐量处理一些文件。文件的路径保存在数据库中。我需要从数据库中获取文件路径,将其状态更改为“正在处理”,对其进行处理,然后将其状态更改为“已完成”或“失败”

目前,我分批获取文件(100个文件),以减少完成的查询数量并并行处理它们(并行度为10)。但这样一来,在批处理快结束时,我就失去了吞吐量。当批处理中剩余的文件少于10个时,并行度不再是10,而是降低

以下是我所拥有的:

private async Task CopyPendingFilesAsync(SourcePath sourcePath, Options options)
{
    var batchIndex = 0;
    while (true)
    {
        var fileBatch = _sourceFileService.GetSourceFileBatchBySourcePathId(
            sourcePath.Id, _dataSourceExportConfig.FileCopyBatchSize, Status.Pending);
        if (fileBatch.Count == 0)
            return;

        await SetInProgressStatusForBatch(fileBatch)
            .ConfigureAwait(false);

        fileBatch
            .AsParallel()
            .WithDegreeOfParallelism(_dataSourceExportConfig.FileCopyDegreeOfParallelism)
            .ForAll(file => ProcessFile(file, destinationBase, options));

        await _sourceFileService
            .UpdateSourceFilesStatusAsync(fileBatch)
            .ConfigureAwait(false);

        batchIndex++;
    }
}

private async Task SetInProgressStatusForBatch(IEnumerable<SourceFile> fileBatch)
{
    foreach (var file in fileBatch)
        file.Status = Status.InProgress;

    await _sourceFileService
        .UpdateSourceFilesStatusAsync(fileBatch)
        .ConfigureAwait(false);
}

private void ProcessFile(
    SourceFile file,
    string destinationBase,
    Options options)
{
    try
    {
        //do something ...

        file.Status = Status.Success;
        file.ExceptionMessage = null;
    }
    catch (Exception ex)
    {
        _logger.Error(ex);
        file.Status = Status.Failed;
        file.ExceptionMessage = ex.Message;
    }
}
private async Task copypendingfileasync(SourcePath SourcePath,Options)
{
var batchIndex=0;
while(true)
{
var fileBatch=\u sourceFileService.GetSourceFileBatchBySourcePathId(
sourcePath.Id,_dataSourceExportConfig.FileCopyBatchSize,Status.Pending);
如果(fileBatch.Count==0)
返回;
等待SetInProgressStatusForBatch(fileBatch)
.配置等待(错误);
文件批处理
.天冬酰胺()
.WithDegreeOfParallelism(_dataSourceExportConfig.FileCopyDegreeOfParallelism)
.ForAll(file=>ProcessFile(file,destinationBase,options));
等待(u sourceFileService)
.UpdateSourceFileStatusAsync(文件批处理)
.配置等待(错误);
batchIndex++;
}
}
专用异步任务SetInProgressStatusForBatch(IEnumerable fileBatch)
{
foreach(fileBatch中的var文件)
file.Status=Status.InProgress;
等待(u sourceFileService)
.UpdateSourceFileStatusAsync(文件批处理)
.配置等待(错误);
}
私有void进程文件(
源文件,
字符串destinationBase,
选项(可选)
{
尝试
{
//做点什么。。。
file.Status=Status.Success;
file.ExceptionMessage=null;
}
捕获(例外情况除外)
{
_记录器错误(ex);
file.Status=Status.Failed;
file.ExceptionMessage=ex.Message;
}
}
如何使吞吐量最大化?我读到了关于BlockingCollection、TPL Dataflow和Rx的生产者-消费者模式,我非常确定我想要实现的目标可以通过上面的任何一个实现,但到目前为止我还无法实现。有了生产者-消费者模式,我的生产者比消费者的速度快得多,有了TPL数据流,我被BatchBlock卡住了,我还没有试过Rx。有人能给我指一下正确的方向吗

更新: 以下是一个最小、完整且可验证的示例:

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Threading;

namespace ConsoleApp1
{
    internal static class Program
    {
        private static void Main()
        {
            Console.WriteLine("Processing files");

            var stopWatch = new Stopwatch();
            stopWatch.Start();

            var fileService = new FileService();
            fileService.ProcessPendingFiles();

            foreach (var sourceFile in fileService.SourceFiles)
            {
                Console.WriteLine($"{sourceFile.Id} {sourceFile.Status}");
            }

            Console.WriteLine(stopWatch.Elapsed);

            Console.ReadLine();
        }
    }

    public class FileService
    {
        private const int BatchSize = 100;
        private const int DegreeOfParallelism = 10;
        //this SourceFiles property replaces the Sqlite database where the data is actually stored
        public ICollection<SourceFile> SourceFiles =
            Enumerable
                .Range(0, 1000)
                .Select(i =>
                    new SourceFile
                    {
                        Id = i,
                        Path = "source file path",
                        Status = Status.Pending,
                    })
                .ToList();

        public void ProcessPendingFiles()
        {
            while (true)
            {
                var fileBatch = GetSourceFileBatch(BatchSize, Status.Pending);
                if (fileBatch.Count == 0)
                    return;

                SetInProgressStatusForBatch(fileBatch);

                fileBatch
                    .AsParallel()
                    .WithDegreeOfParallelism(DegreeOfParallelism)
                    .ForAll(ProcessFile);

                UpdateSourceFiles(fileBatch);
            }
        }

        private ICollection<SourceFile> GetSourceFileBatch(int batchSize, Status status)
            => SourceFiles
                .Where(sf => sf.Status == status)
                .Take(batchSize)
                .ToList();

        //set status to in progress for all files in the batch
        //and save the changes to database
        //in the application this is actually done with a bulk update and the method is async
        private void SetInProgressStatusForBatch(IEnumerable<SourceFile> fileBatch)
        {
            foreach (var file in fileBatch)
            {
                file.Status = Status.InProgress;

                var sourceFile = SourceFiles.First(sf => sf.Id == file.Id);
                sourceFile.Status = file.Status;
            }
        }

        //set status and exception messages for all files in the batch
        //and save the changes to database
        //in the application this is actually done with a bulk update and the method is async
        private void UpdateSourceFiles(IEnumerable<SourceFile> fileBatch)
        {
            foreach (var file in fileBatch)
            {
                var sourceFile = SourceFiles.First(sf => sf.Id == file.Id);
                sourceFile.Status = file.Status;
                sourceFile.ExceptionMessage = file.ExceptionMessage;
            }
        }

        private void ProcessFile(SourceFile file)
        {
            try
            {
                //do something ...
                Thread.Sleep(20);

                file.Status = Status.Success;
                file.ExceptionMessage = null;
            }
            catch (Exception ex)
            {
                file.Status = Status.Failed;
                file.ExceptionMessage = ex.Message;
            }
        }
    }

    public class SourceFile
    {
        public int Id { get; set; }

        public string Path { get; set; }

        public Status Status { get; set; }

        public string ExceptionMessage { get; set; }
    }

    public enum Status
    {
        Pending,

        InProgress,

        Success,

        Failed,
    }
}
使用系统;
使用System.Collections.Generic;
使用系统诊断;
使用System.Linq;
使用系统线程;
名称空间控制台EAPP1
{
内部静态类程序
{
私有静态void Main()
{
Console.WriteLine(“处理文件”);
var stopWatch=新秒表();
秒表。开始();
var fileService=new fileService();
ProcessPendingFiles();
foreach(fileService.SourceFiles中的var sourceFile)
{
WriteLine($“{sourceFile.Id}{sourceFile.Status}”);
}
控制台写入线(秒表已过);
Console.ReadLine();
}
}
公共类文件服务
{
私有常量int BatchSize=100;
私有常数int DegreeOfParallelism=10;
//此SourceFiles属性替换实际存储数据的Sqlite数据库
公共ICollection源文件=
可枚举
.范围(0,1000)
.选择(i=>
新源文件
{
Id=i,
Path=“源文件路径”,
状态=状态。挂起,
})
.ToList();
public void ProcessPendingFiles()
{
while(true)
{
var fileBatch=GetSourceFileBatch(BatchSize,Status.Pending);
如果(fileBatch.Count==0)
返回;
SetInProgressStatusForBatch(fileBatch);
文件批处理
.天冬酰胺()
.带平行度(平行度)
.ForAll(进程文件);
更新资源文件(fileBatch);
}
}
私有ICollection GetSourceFileBatch(int batchSize,状态)
=>源文件
.Where(sf=>sf.Status==Status)
.Take(批量大小)
.ToList();
//将批处理中所有文件的状态设置为“进行中”
//并将更改保存到数据库
//在应用程序中,这实际上是通过批量更新完成的,方法是异步的
私有void SetInProgressStatusForBatch(IEnumerable fileBatch)
{
foreach(fileBatch中的var文件)
{
file.Status=Status.InProgress;
var sourceFile=SourceFiles.First(sf=>sf.Id==file.Id);
sourceFile.Status=file.Status;
}
}
//为批处理中的所有文件设置状态和异常消息
//并将更改保存到数据库
//在应用程序中,这实际上是通过批量更新完成的,方法是异步的
私有void更新资源文件(IEnumerable fileBatch)
{
foreach(fileBatch中的var文件)
{
var sourceFile=SourceFiles.First(sf=>sf.Id==file.Id);
sourceFile.Status=file.Status;
sourceFile.ExceptionMessage=file.ExceptionMessage;
}
}
私有void进程文件(源文件)
{
尝试
{
//做点什么。。。
睡眠(20);
public class YourCode
{
  private BlockingCollection<object> queue = new BlockingCollection<object>();

  public YourCode()
  {
    var thread = new Thread(StartConsuming);
    thread.IsBackground = true;
    thread.Start();
  }

  public void Produce(object item)
  {
    queue.Add(item);
  }

  private void StartConsuming()
  {
    while (true)
    {
      object item = queue.Take();
      // Add your code to process the item here.
      // Do not start another task or thread. 
    }
  }
}
class WorkController
{
    private DataSourceExportConfig _dataSourceExportConfig;
    private SourceFileService _sourceFileService;
    private string destinationBase;

    public async Task CopyPendingFilesAsync(SourcePath sourcePath, Options options)
    {
        await Task.WhenAll(Enumerable.Range(0, 10).Select(x => Worker(sourcePath, options)));
    }

    public async Task Worker(SourcePath sourcePath, Options options)
    {
        SourceFile file = null;

        while (_sourceFileService.GetNextFile(out file))
        {
            ProcessFile(file, destinationBase, options);
        }
    }

    private void ProcessFile(SourceFile file, string destinationBase, Options options)
    {
    }
}
public class ProcessFilesFlow
{
    private TransformBlock<SourcePath, IEnumerable<SourceFile>> _getSourceFileBatch;
    private TransformBlock<IEnumerable<SourceFile>, IEnumerable<SourceFile>> _setStatusToProcessing;
    private TransformBlock<IEnumerable<SourceFile>, IEnumerable<SourceFile>> _processFiles;
    private ActionBlock<IEnumerable<SourceFile>> _setStatusToComplete;

    public ProcessFilesFlow()
    {
        //Setup options
        //All of these options and more can be tuned for throughput
        var getSourceFileBatchOptions = new ExecutionDataflowBlockOptions()
        {
            BoundedCapacity = 10, //How many source paths to queue at one time
            MaxDegreeOfParallelism = 10, //How many source paths to get batches for at one time
            EnsureOrdered = false //Process batches as soon as ready
        };
        var setStatusToProcessingOptions = new ExecutionDataflowBlockOptions()
        {
            BoundedCapacity = 10, //How many batches to queue at one time
            MaxDegreeOfParallelism = 10, //Unlimited, how many batches to updates status for
            EnsureOrdered = false //Process batches as soon as ready
        };
        var processFilesOptions = new ExecutionDataflowBlockOptions()
        {
            BoundedCapacity = 10, //Batches to queue at one time
            MaxDegreeOfParallelism = 10, //Batches to work on at the same time
            EnsureOrdered = false //Process batches as soon as ready
        };
        var setStatusToCompleteOptions = new ExecutionDataflowBlockOptions()
        {
            BoundedCapacity = 10, //Batches to queue at one time
            MaxDegreeOfParallelism = 10, //Batches to update at once
            EnsureOrdered = false //Process batches as soon as ready
        };

        //Build the dataflow pipeline
        _getSourceFileBatch = new TransformBlock<SourcePath, IEnumerable<SourceFile>>(path => GetSourceFileBatch(path), getSourceFileBatchOptions);
        _setStatusToProcessing = new TransformBlock<IEnumerable<SourceFile>, IEnumerable<SourceFile>>(batch => SetStatusToProcessingAsync(batch), setStatusToProcessingOptions);
        _processFiles = new TransformBlock<IEnumerable<SourceFile>, IEnumerable<SourceFile>>(batch => ProcessFiles(batch), processFilesOptions);
        _setStatusToComplete = new ActionBlock<IEnumerable<SourceFile>>(batch => SetStatusToCompleteAsync(batch), setStatusToCompleteOptions);

        //Link the pipeline
        _getSourceFileBatch.LinkTo(_setStatusToProcessing, new DataflowLinkOptions() { PropagateCompletion = true });
        _setStatusToProcessing.LinkTo(_processFiles, new DataflowLinkOptions() { PropagateCompletion = true });
        _processFiles.LinkTo(_setStatusToComplete, new DataflowLinkOptions() { PropagateCompletion = true });
    }

    public async Task ProcessAll(IEnumerable<SourcePath> sourcePaths)
    {
        foreach(var path in sourcePaths)
        {
            await _getSourceFileBatch.SendAsync(path);
        }
        _getSourceFileBatch.Complete();
        await _setStatusToComplete.Completion;
    }

    private IEnumerable<SourceFile> GetSourceFileBatch(SourcePath sourcePath)
    {
        //Get batch of files based on sourcePath
        return Enumerable.Empty<SourceFile>();
    }

    private async Task<IEnumerable<SourceFile>> SetStatusToProcessingAsync(IEnumerable<SourceFile> sourceFiles)
    {
        //Update file status
        foreach (var file in sourceFiles)
            await file.UpdateStatusAsync("In Progress");
        return sourceFiles;
    }

    private IEnumerable<SourceFile> ProcessFiles(IEnumerable<SourceFile> sourceFiles)
    {
        //process files
        foreach (var file in sourceFiles)
            file.Process();
        return sourceFiles;
    }

    private async Task SetStatusToCompleteAsync(IEnumerable<SourceFile> sourceFiles)
    {
        //Update file status
        foreach (var file in sourceFiles)
            await file.UpdateStatusAsync("Completed");
    }
}