C# 如何标记要完成的第三方物流数据流周期?
给定TPL数据流中的以下设置C# 如何标记要完成的第三方物流数据流周期?,c#,.net,task-parallel-library,tpl-dataflow,C#,.net,Task Parallel Library,Tpl Dataflow,给定TPL数据流中的以下设置 var directory = new DirectoryInfo(@"C:\dev\kortforsyningen_dsm\tiles"); var dirBroadcast=new BroadcastBlock<DirectoryInfo>(dir=>dir); var dirfinder = new TransformManyBlock<DirectoryInfo, DirectoryInfo>((dir) => {
var directory = new DirectoryInfo(@"C:\dev\kortforsyningen_dsm\tiles");
var dirBroadcast=new BroadcastBlock<DirectoryInfo>(dir=>dir);
var dirfinder = new TransformManyBlock<DirectoryInfo, DirectoryInfo>((dir) =>
{
return directory.GetDirectories();
});
var tileFilder = new TransformManyBlock<DirectoryInfo, FileInfo>((dir) =>
{
return directory.GetFiles();
});
dirBroadcast.LinkTo(dirfinder);
dirBroadcast.LinkTo(tileFilder);
dirfinder.LinkTo(dirBroadcast);
var block = new XYZTileCombinerBlock<FileInfo>(3, (file) =>
{
var coordinate = file.FullName.Split('\\').Reverse().Take(3).Reverse().Select(s => int.Parse(Path.GetFileNameWithoutExtension(s))).ToArray();
return XYZTileCombinerBlock<CloudBlockBlob>.TileXYToQuadKey(coordinate[0], coordinate[1], coordinate[2]);
},
(quad) =>
XYZTileCombinerBlock<FileInfo>.QuadKeyToTileXY(quad,
(z, x, y) => new FileInfo(Path.Combine(directory.FullName,string.Format("{0}/{1}/{2}.png", z, x, y)))),
() => new TransformBlock<string, string>((s) =>
{
Trace.TraceInformation("Combining {0}", s);
return s;
}));
tileFilder.LinkTo(block);
using (new TraceTimer("Time"))
{
dirBroadcast.Post(directory);
block.LinkTo(new ActionBlock<FileInfo>((s) =>
{
Trace.TraceInformation("Done combining : {0}", s.Name);
}));
block.Complete();
block.Completion.Wait();
}
var directory=newdirectoryinfo(@“C:\dev\kortforsyningen\u dsm\tiles”);
var dirBroadcast=新广播块(dir=>dir);
var dirfinder=new TransformManyBlock((dir)=>
{
返回directory.GetDirectories();
});
var tilefile=new TransformManyBlock((dir)=>
{
返回directory.GetFiles();
});
dirBroadcast.LinkTo(dirfinder);
dirBroadcast.LinkTo(tilefirder);
dirfinder.LinkTo(dirBroadcast);
var块=新的XYZTileCombinerBlock(3,(文件)=>
{
var coordinate=file.FullName.Split('\\').Reverse().Take(3).Reverse().Select(s=>int.Parse(Path.GetFileNameWithoutExtension)).ToArray();
返回XYZTileCombinerBlock.TileXYToQuadKey(坐标[0],坐标[1],坐标[2]);
},
(四元)=>
XYZTileCombinerBlock.QuadKeyToTileXY(四元,
(z,x,y)=>newfileinfo(Path.Combine(directory.FullName,string.Format(“{0}/{1}/{2}.png”,z,x,y)),
()=>新转换块((s)=>
{
Trace.TraceInformation(“合并{0}”,s);
返回s;
}));
tileFilder.LinkTo(块);
使用(新TraceTimer(“时间”))
{
dirBroadcast.Post(目录);
block.LinkTo(新操作块)=>
{
Trace.TraceInformation(“完成组合:{0}”,s.Name);
}));
block.Complete();
block.Completion.Wait();
}
我想知道我如何才能标记这一点,以完成,因为周期。一个目录被发布到dirBroadcast广播公司,它发布到dirfinder,可能会将新的Dir发回给广播公司,因此我不能简单地将其标记为完整,因为它会阻止从dirfinder添加任何目录。我是否应该重新设计它以跟踪dir的数量,或者在TPL中是否有任何相关内容。我看不出有任何方法可以做到这一点,因为每个块(
dirBroadcast
和tileFilder
)都依赖于另一个块,无法单独完成
我建议您在没有TPL数据流的情况下重新设计目录遍历,这不适合此类问题。在我看来,更好的方法是递归扫描目录,并用文件流填充块
:
private static void FillBlock(DirectoryInfo directoryInfo, XYZTileCombinerBlock<FileInfo> block)
{
foreach (var fileInfo in directoryInfo.GetFiles())
{
block.Post(fileInfo);
}
foreach (var subDirectory in directoryInfo.GetDirectories())
{
FillBlock(subDirectory, block);
}
}
如果代码的目的是使用某种并行性遍历目录结构,那么我建议不要使用TPL数据流,而是使用Microsoft的反应式框架。我认为它变得简单多了 我会这样做的 首先定义递归函数以构建目录列表:
Func<DirectoryInfo, IObservable<DirectoryInfo>> recurse = null;
recurse = di =>
Observable
.Return(di)
.Concat(di.GetDirectories()
.ToObservable()
.SelectMany(di2 => recurse(di2)))
.ObserveOn(Scheduler.Default);
现在,我可以这样操作查询:
query
.Subscribe(s =>
{
Trace.TraceInformation("Done combining : {0}", s.Name);
});
现在,我可能在您的自定义代码中遗漏了一点,但如果您希望采用这种方法,我相信您可以非常轻松地修复任何逻辑问题
此代码在子目录和文件用完时自动处理完成
要将Rx添加到您的项目中,请在NuGet中查找“Rx Main”。只是为了显示我的真实答案,即TPL和Rx的组合
Func<DirectoryInfo, IObservable<DirectoryInfo>> recurse = null;
recurse = di =>
Observable
.Return(di)
.Concat(di.GetDirectories()
.Where(d => int.Parse(d.Name) <= br_tile[0] && int.Parse(d.Name) >= tl_tile[0])
.ToObservable()
.SelectMany(di2 => recurse(di2)))
.ObserveOn(Scheduler.Default);
var query =
from di in recurse(new DirectoryInfo(Path.Combine(directory.FullName, baselvl.ToString())))
from fi in di.GetFiles().Where(f => int.Parse(Path.GetFileNameWithoutExtension(f.Name)) >= br_tile[1]
&& int.Parse(Path.GetFileNameWithoutExtension(f.Name)) <= tl_tile[1]).ToObservable()
select fi;
query.Subscribe(block.AsObserver());
Console.WriteLine("Done subscribing");
block.Complete();
block.Completion.Wait();
Console.WriteLine("Done TPL Block");
Func recurse=null;
递归=di=>
可观察
.返回(di)
.Concat(di.GetDirectories()
。其中(d=>int.Parse(d.Name)=tl_tile[0])
.TooObservable()文件
.SelectMany(di2=>recurse(di2)))
.ObserveOn(Scheduler.Default);
变量查询=
来自递归中的di(新目录信息(Path.Combine(directory.FullName,baselvl.ToString()))
来自di.GetFiles()中的fi,其中(f=>int.Parse(Path.GetFileNameWithoutExtension(f.Name))>=br_tile[1]
&&int.Parse(Path.GetFileNameWithoutExtension(f.Name))我确信这并不总是可能的,但在许多情况下(包括目录枚举),您可以使用运行计数器和互锁
函数来拥有循环的一对多数据流,以完成:
public static ISourceBlock<string> GetDirectoryEnumeratorBlock(string path, int maxParallel = 5)
{
var outputBuffer = new BufferBlock<string>();
var count = 1;
var broadcastBlock = new BroadcastBlock<string>(s => s);
var getDirectoriesBlock = new TransformManyBlock<string, string>(d =>
{
var files = Directory.EnumerateDirectories(d).ToList();
Interlocked.Add(ref count, files.Count - 1); //Adds the subdir count, minus 1 for the current directory.
if (count == 0) //if count reaches 0 then all directories have been enumerated.
broadcastBlock.Complete();
return files;
}, new ExecutionDataflowBlockOptions() { MaxDegreeOfParallelism = maxParallel });
broadcastBlock.LinkTo(outputBuffer, new DataflowLinkOptions() { PropagateCompletion = true });
broadcastBlock.LinkTo(getDirectoriesBlock, new DataflowLinkOptions() { PropagateCompletion = true });
getDirectoriesBlock.LinkTo(broadcastBlock);
getDirectoriesBlock.Post(path);
return outputBuffer;
}
公共静态ISourceBlock GetDirectoryEnumeratorBlock(字符串路径,int-maxParallel=5)
{
var outputBuffer=new BufferBlock();
var计数=1;
var broadcastBlock=新的广播块(s=>s);
var getDirectoriesBlock=新的TransformManyBlock(d=>
{
var files=Directory.EnumerateDirectories(d).ToList();
Interlocked.Add(ref count,files.count-1);//添加子目录计数,减去当前目录的1。
if(count==0)//如果count达到0,则所有目录都已枚举。
broadcastBlock.Complete();
归还文件;
},新的ExecutionDataflowBlockOptions(){MaxDegreeOfParallelism=maxParallel});
LinkTo(outputBuffer,新的DataflowLinkOptions(){PropagateCompletion=true});
LinkTo(getDirectoriesBlock,newDataFlowLinkOptions(){PropagateCompletion=true});
getDirectoriesBlock.LinkTo(广播块);
getDirectoriesBlock.Post(路径);
返回输出缓冲区;
}
我对它稍作修改就使用它来枚举文件,但它工作得很好。注意最大程度的并行性,这可能会很快使网络文件系统饱和!这是Andrew Hanlon的一种通用方法。它返回一个
TransformBlock
,支持将消息递归地发布到自身,并完成当没有更多消息要处理时自动执行
transform
lambda有三个参数,而不是通常的参数。第一个参数是正在处理的项。第二个参数是“path”已处理消息的,它是一个包含其父消息的序列IEnumerable
。第三个参数是一个Action
,它将新消息作为当前消息的子消息发布到块中
/// <summary>Creates a dataflow block that supports posting messages to itself,
/// and knows when it has completed processing all messages.</summary>
public static IPropagatorBlock<TInput, TOutput>
CreateRecursiveTransformBlock<TInput, TOutput>(
Func<TInput, IEnumerable<TInput>, Action<TInput>, Task<TOutput>> transform,
ExecutionDataflowBlockOptions dataflowBlockOptions = null)
{
if (transform == null) throw new ArgumentNullException(nameof(transform));
dataflowBlockOptions = dataflowBlockOptions ?? new ExecutionDataflowBlockOptions();
int pendingCount = 1; // The initial 1 represents the completion of input1 block
var input1 = new TransformBlock<TInput, (TInput, IEnumerable<TInput>)>(item =>
{
Interlocked.Increment(ref pendingCount);
return (item, Enumerable.Empty<TInput>());
}, new ExecutionDataflowBlockOptions()
{
CancellationToken = dataflowBlockOptions.CancellationToken,
BoundedCapacity = dataflowBlockOptions.BoundedCapacity
});
var input2 = new BufferBlock<(TInput, IEnumerable<TInput>)>(new DataflowBlockOptions()
{
CancellationToken = dataflowBlockOptions.CancellationToken
// Unbounded capacity
});
var output = new TransformBlock<(TInput, IEnumerable<TInput>), TOutput>(async entry =>
{
try
{
var (item, path) = entry;
var postChildAction = CreatePostAction(item, path);
return await transform(item, path, postChildAction).ConfigureAwait(false);
}
finally
{
if (Interlocked.Decrement(ref pendingCount) == 0) input2.Complete();
}
}, dataflowBlockOptions);
Action<TInput> CreatePostAction(TInput parentItem, IEnumerable<TInput> parentPath)
{
return item =>
{
// The Post will be unsuccessful only in case of block failure
// or cancellation, so no specific action is needed here.
if (input2.Post((item, parentPath.Append(parentItem))))
{
Interlocked.Increment(ref pendingCount);
}
};
}
input1.LinkTo(output);
input2.LinkTo(output);
PropagateCompletion(input1, input2,
condition: () => Interlocked.Decrement(ref pendingCount) == 0);
PropagateCompletion(input2, output);
PropagateFailure(output, input1, input2); // Ensure that all blocks are faulted
return DataflowBlock.Encapsulate(input1, output);
async void PropagateCompletion(IDataflowBlock block1, IDataflowBlock block2,
Func<bool> condition = null)
{
try
{
await block1.Completion.ConfigureAwait(false);
}
catch { }
if (block1.Completion.Exception != null)
{
block2.Fault(block1.Completion.Exception.InnerException);
}
else
{
if (block1.Completion.IsCanceled) return; // On cancellation do nothing
if (condition == null || condition()) block2.Complete();
}
}
async void PropagateFailure(IDataflowBlock block1, IDataflowBlock block2,
IDataflowBlock block3)
{
try
{
await block1.Completion.ConfigureAwait(false);
}
catch (Exception ex)
{
if (block1.Completion.IsCanceled) return; // On cancellation do nothing
block2.Fault(ex); block3.Fault(ex);
}
}
}
// Overload with synchronous delegate
public static IPropagatorBlock<TInput, TOutput>
CreateRecursiveTransformBlock<TInput, TOutput>(
Func<TInput, IEnumerable<TInput>, Action<TInput>, TOutput> transform,
ExecutionDataflowBlockOptions dataflowBlockOptions = null)
{
return CreateRecursiveTransformBlock<TInput, TOutput>((item, path, postAction) =>
Task.FromResult(transform(item, path, postAction)), dataflowBlockOptions);
}
上面的代码在控制台中打印文件夹“MyDocuments”的所有子文件夹。完成对您来说意味着什么?您希望停止哪个链接,哪个链接保持运行?完成tileFinder后,我希望标记block.complete()并等待其完成。但是
public static ISourceBlock<string> GetDirectoryEnumeratorBlock(string path, int maxParallel = 5)
{
var outputBuffer = new BufferBlock<string>();
var count = 1;
var broadcastBlock = new BroadcastBlock<string>(s => s);
var getDirectoriesBlock = new TransformManyBlock<string, string>(d =>
{
var files = Directory.EnumerateDirectories(d).ToList();
Interlocked.Add(ref count, files.Count - 1); //Adds the subdir count, minus 1 for the current directory.
if (count == 0) //if count reaches 0 then all directories have been enumerated.
broadcastBlock.Complete();
return files;
}, new ExecutionDataflowBlockOptions() { MaxDegreeOfParallelism = maxParallel });
broadcastBlock.LinkTo(outputBuffer, new DataflowLinkOptions() { PropagateCompletion = true });
broadcastBlock.LinkTo(getDirectoriesBlock, new DataflowLinkOptions() { PropagateCompletion = true });
getDirectoriesBlock.LinkTo(broadcastBlock);
getDirectoriesBlock.Post(path);
return outputBuffer;
}
/// <summary>Creates a dataflow block that supports posting messages to itself,
/// and knows when it has completed processing all messages.</summary>
public static IPropagatorBlock<TInput, TOutput>
CreateRecursiveTransformBlock<TInput, TOutput>(
Func<TInput, IEnumerable<TInput>, Action<TInput>, Task<TOutput>> transform,
ExecutionDataflowBlockOptions dataflowBlockOptions = null)
{
if (transform == null) throw new ArgumentNullException(nameof(transform));
dataflowBlockOptions = dataflowBlockOptions ?? new ExecutionDataflowBlockOptions();
int pendingCount = 1; // The initial 1 represents the completion of input1 block
var input1 = new TransformBlock<TInput, (TInput, IEnumerable<TInput>)>(item =>
{
Interlocked.Increment(ref pendingCount);
return (item, Enumerable.Empty<TInput>());
}, new ExecutionDataflowBlockOptions()
{
CancellationToken = dataflowBlockOptions.CancellationToken,
BoundedCapacity = dataflowBlockOptions.BoundedCapacity
});
var input2 = new BufferBlock<(TInput, IEnumerable<TInput>)>(new DataflowBlockOptions()
{
CancellationToken = dataflowBlockOptions.CancellationToken
// Unbounded capacity
});
var output = new TransformBlock<(TInput, IEnumerable<TInput>), TOutput>(async entry =>
{
try
{
var (item, path) = entry;
var postChildAction = CreatePostAction(item, path);
return await transform(item, path, postChildAction).ConfigureAwait(false);
}
finally
{
if (Interlocked.Decrement(ref pendingCount) == 0) input2.Complete();
}
}, dataflowBlockOptions);
Action<TInput> CreatePostAction(TInput parentItem, IEnumerable<TInput> parentPath)
{
return item =>
{
// The Post will be unsuccessful only in case of block failure
// or cancellation, so no specific action is needed here.
if (input2.Post((item, parentPath.Append(parentItem))))
{
Interlocked.Increment(ref pendingCount);
}
};
}
input1.LinkTo(output);
input2.LinkTo(output);
PropagateCompletion(input1, input2,
condition: () => Interlocked.Decrement(ref pendingCount) == 0);
PropagateCompletion(input2, output);
PropagateFailure(output, input1, input2); // Ensure that all blocks are faulted
return DataflowBlock.Encapsulate(input1, output);
async void PropagateCompletion(IDataflowBlock block1, IDataflowBlock block2,
Func<bool> condition = null)
{
try
{
await block1.Completion.ConfigureAwait(false);
}
catch { }
if (block1.Completion.Exception != null)
{
block2.Fault(block1.Completion.Exception.InnerException);
}
else
{
if (block1.Completion.IsCanceled) return; // On cancellation do nothing
if (condition == null || condition()) block2.Complete();
}
}
async void PropagateFailure(IDataflowBlock block1, IDataflowBlock block2,
IDataflowBlock block3)
{
try
{
await block1.Completion.ConfigureAwait(false);
}
catch (Exception ex)
{
if (block1.Completion.IsCanceled) return; // On cancellation do nothing
block2.Fault(ex); block3.Fault(ex);
}
}
}
// Overload with synchronous delegate
public static IPropagatorBlock<TInput, TOutput>
CreateRecursiveTransformBlock<TInput, TOutput>(
Func<TInput, IEnumerable<TInput>, Action<TInput>, TOutput> transform,
ExecutionDataflowBlockOptions dataflowBlockOptions = null)
{
return CreateRecursiveTransformBlock<TInput, TOutput>((item, path, postAction) =>
Task.FromResult(transform(item, path, postAction)), dataflowBlockOptions);
}
var fileCounter = CreateRecursiveTransformBlock<string, int>(
(folderPath, parentPaths, postChild) =>
{
var subfolders = Directory.EnumerateDirectories(folderPath);
foreach (var subfolder in subfolders) postChild(subfolder);
var files = Directory.EnumerateFiles(folderPath);
Console.WriteLine($"{folderPath} has {files.Count()} files"
+ $", and is {parentPaths.Count()} levels deep");
return files.Count();
});
fileCounter.LinkTo(DataflowBlock.NullTarget<int>());
fileCounter.Post(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments));
fileCounter.Complete();
fileCounter.Completion.Wait();