C# 使用最大值的FluentFTP从FTP同时下载多个文件
我想从FTP目录递归下载多个下载文件,为此,我使用FluentFTP库,代码如下:C# 使用最大值的FluentFTP从FTP同时下载多个文件,c#,multithreading,concurrency,ftp,fluentftp,C#,Multithreading,Concurrency,Ftp,Fluentftp,我想从FTP目录递归下载多个下载文件,为此,我使用FluentFTP库,代码如下: private async Task downloadRecursively(string src, string dest, FtpClient ftp) { foreach(var item in ftp.GetListing(src)) { if (item.Type == FtpFileSystemObjectType.Directory) {
private async Task downloadRecursively(string src, string dest, FtpClient ftp)
{
foreach(var item in ftp.GetListing(src))
{
if (item.Type == FtpFileSystemObjectType.Directory)
{
if (item.Size != 0)
{
System.IO.Directory.CreateDirectory(Path.Combine(dest, item.Name));
downloadRecursively(Path.Combine(src, item.Name), Path.Combine(dest, item.Name), ftp);
}
}
else if (item.Type == FtpFileSystemObjectType.File)
{
await ftp.DownloadFileAsync(Path.Combine(dest, item.Name), Path.Combine(src, item.Name));
}
}
}
我知道每次下载都需要一个FtpClient,但是我如何才能最大限度地使用一定数量的连接,我想我的想法是创建、连接、下载和关闭我找到的每个文件,但同时只需要X个下载文件。另外,我不确定是否应该使用异步线程创建任务,我最大的问题是如何实现所有这些
这里的答案似乎很好,但这个问题确实读取了必须从外部文件下载的每个文件,并且没有最大的并发下载值,因此我不确定如何应用这两个要求。使用:
- 实现连接池李>
- 并行化操作李>
- 限制并发线程的数量
var clients=new ConcurrentBag();
var opts=new ParallelOptions{maxdegreeofpparallelism=maxConnections};
Parallel.ForEach(文件、选项、文件=>
{
file=Path.GetFileName(文件);
string thread=$“thread{thread.CurrentThread.ManagedThreadId}”;
如果(!clients.TryTake(out var client))
{
WriteLine($“{thread}打开连接…”);
客户端=新的FtpClient(主机、用户、通行证);
client.Connect();
WriteLine($“{thread}打开的连接{client.GetHashCode()}”);
}
字符串remotePath=sourcePath+“/”+文件;
字符串localPath=Path.Combine(destPath,file);
字符串描述=
$“{thread},连接{client.GetHashCode()},”+
$“文件{remotePath}=>{localPath}”;
WriteLine($“{desc}-开始…”);
client.DownloadFile(localPath、remotePath);
WriteLine($“{desc}-Done.”);
客户。添加(客户);
});
WriteLine($“正在关闭{clients.Count}个连接”);
foreach(客户机中的var客户机)
{
WriteLine($“正在关闭连接{client.GetHashCode()}”);
client.Dispose();
}
另一种方法是启动固定数量的线程,每个线程有一个连接,并让它们从队列中拾取文件 有关实现的示例,请参阅我关于WinSCP.NET程序集的文章:
关于SFTP的类似问题:
我把它分成三部分
private async Task DownloadRecursively(string src, string dest, FtpClient ftp)
{
/* 1 */
IEnumerable<(string source, string destination)> Recurse(string s, string d)
{
foreach (var item in ftp.GetListing(s))
{
if (item.Type == FtpFileSystemObjectType.Directory)
{
if (item.Size != 0)
{
foreach(var pair in Recurse(Path.Combine(s, item.Name), Path.Combine(d, item.Name)))
{
yield return pair;
}
}
}
else if (item.Type == FtpFileSystemObjectType.File)
{
yield return (Path.Combine(s, item.Name), Path.Combine(d, item.Name));
}
}
}
var pairs = Recurse(src, dest).ToArray();
/* 2 */
foreach (var d in pairs.Select(x => x.destination).Distinct())
{
System.IO.Directory.CreateDirectory(d);
}
/* 3 */
var downloads =
pairs
.AsParallel()
.Select(x => ftp.DownloadFileAsync(x.source, x.destination))
.ToArray();
await Task.WhenAll(downloads);
}
private异步任务递归下载(string src、string dest、FtpClient ftp)
{
/* 1 */
IEnumerable递归(字符串s、字符串d)
{
foreach(ftp.GetListing中的var项)
{
if(item.Type==FtpFileSystemObjectType.Directory)
{
如果(item.Size!=0)
{
foreach(递归中的变量对(Path.Combine(s,item.Name),Path.Combine(d,item.Name)))
{
收益-收益对;
}
}
}
else if(item.Type==FtpFileSystemObjectType.File)
{
收益返回(Path.Combine(s,item.Name),Path.Combine(d,item.Name));
}
}
}
var pairs=Recurse(src,dest).ToArray();
/* 2 */
foreach(成对变量d.Select(x=>x.destination).Distinct()
{
System.IO.Directory.CreateDirectory(d);
}
/* 3 */
var下载=
对
.天冬酰胺()
.Select(x=>ftp.DownloadFileAsync(x.source,x.destination))
.ToArray();
等待任务。WhenAll(下载);
}
它应该干净、整洁,并且易于对代码进行推理。以下是一种方法。BufferBlock
用作FtpClient
对象池。递归枚举采用类型为IEnumerable
的参数,该参数保存一个文件路径的段。在构造本地和远程文件路径时,这些段的组合方式不同。作为调用递归枚举的副作用,远程文件的路径被发送到ActionBlock
。此块处理文件的并行下载。它的Completion
属性最终包含整个操作过程中可能发生的所有异常
public static Task FtpDownloadDeep(string ftpHost, string ftpRoot,
string targetDirectory, string username = null, string password = null,
int maximumConnections = 1)
{
// Arguments validation omitted
if (!Directory.Exists(targetDirectory))
throw new DirectoryNotFoundException(targetDirectory);
var fsLocker = new object();
var ftpClientPool = new BufferBlock<FtpClient>();
async Task<TResult> UsingFtpAsync<TResult>(Func<FtpClient, Task<TResult>> action)
{
var client = await ftpClientPool.ReceiveAsync();
try { return await action(client); }
finally { ftpClientPool.Post(client); } // Return to the pool
}
var downloader = new ActionBlock<IEnumerable<string>>(async path =>
{
var remotePath = String.Join("/", path);
var localPath = Path.Combine(path.Prepend(targetDirectory).ToArray());
var localDir = Path.GetDirectoryName(localPath);
lock (fsLocker) Directory.CreateDirectory(localDir);
var status = await UsingFtpAsync(client =>
client.DownloadFileAsync(localPath, remotePath));
if (status == FtpStatus.Failed) throw new InvalidOperationException(
$"Download of '{remotePath}' failed.");
}, new ExecutionDataflowBlockOptions()
{
MaxDegreeOfParallelism = maximumConnections,
BoundedCapacity = maximumConnections,
});
async Task Recurse(IEnumerable<string> path)
{
if (downloader.Completion.IsCompleted) return; // The downloader has failed
var listing = await UsingFtpAsync(client =>
client.GetListingAsync(String.Join("/", path)));
foreach (var item in listing)
{
if (item.Type == FtpFileSystemObjectType.Directory)
{
if (item.Size != 0) await Recurse(path.Append(item.Name));
}
else if (item.Type == FtpFileSystemObjectType.File)
{
var accepted = await downloader.SendAsync(path.Append(item.Name));
if (!accepted) break; // The downloader has failed
}
}
}
// Move on to the thread pool, to avoid ConfigureAwait(false) everywhere
return Task.Run(async () =>
{
// Fill the FtpClient pool
for (int i = 0; i < maximumConnections; i++)
{
var client = new FtpClient(ftpHost);
if (username != null && password != null)
client.Credentials = new NetworkCredential(username, password);
ftpClientPool.Post(client);
}
try
{
// Enumerate the files to download
await Recurse(new[] { ftpRoot });
downloader.Complete();
}
catch (Exception ex) { ((IDataflowBlock)downloader).Fault(ex); }
try
{
// Await the downloader to complete
await downloader.Completion;
}
catch (OperationCanceledException)
when (downloader.Completion.IsCanceled) { throw; }
catch { downloader.Completion.Wait(); } // Propagate AggregateException
finally
{
// Clean up
if (ftpClientPool.TryReceiveAll(out var clients))
foreach (var client in clients) client.Dispose();
}
});
}
注意:上述实现按照下载过程的节奏缓慢地枚举远程目录。如果您希望急切地枚举它,尽快收集有关远程列表的所有可用信息,只需从下载文件的
ActionBlock
中删除BoundedCapacity=maximumConnections
配置即可。请注意,如果远程目录的子文件夹层次很深,并且累积包含大量小文件,那么这样做可能会导致内存消耗高。除非我错了,否则此解决方案将同时下载所有文件。但是OP希望同时只下载X个文件。顺便说一句,PLINQ不是异步友好的。@TheodorZoulias-当然,我需要添加max并行选项。在任何情况下,我的代码中都没有使用带PLINQ的async
。没错,这不是async void
委托的情况。问题是DownloadFileAsync
返回一个任务
,而PLINQ对任务一无所知。因此,DownloadFileAsync
方法创建的所有任务都将被忽略,它们不会被wait
ed,因此它们将成为触发和遗忘任务。默认情况下,您不能在一个FtpClient
实例上运行多个并行传输。如果设置了EnableThreadSafeDataConnections
,则可以启用,但随后将打开一个新连接
public static Task FtpDownloadDeep(string ftpHost, string ftpRoot,
string targetDirectory, string username = null, string password = null,
int maximumConnections = 1)
{
// Arguments validation omitted
if (!Directory.Exists(targetDirectory))
throw new DirectoryNotFoundException(targetDirectory);
var fsLocker = new object();
var ftpClientPool = new BufferBlock<FtpClient>();
async Task<TResult> UsingFtpAsync<TResult>(Func<FtpClient, Task<TResult>> action)
{
var client = await ftpClientPool.ReceiveAsync();
try { return await action(client); }
finally { ftpClientPool.Post(client); } // Return to the pool
}
var downloader = new ActionBlock<IEnumerable<string>>(async path =>
{
var remotePath = String.Join("/", path);
var localPath = Path.Combine(path.Prepend(targetDirectory).ToArray());
var localDir = Path.GetDirectoryName(localPath);
lock (fsLocker) Directory.CreateDirectory(localDir);
var status = await UsingFtpAsync(client =>
client.DownloadFileAsync(localPath, remotePath));
if (status == FtpStatus.Failed) throw new InvalidOperationException(
$"Download of '{remotePath}' failed.");
}, new ExecutionDataflowBlockOptions()
{
MaxDegreeOfParallelism = maximumConnections,
BoundedCapacity = maximumConnections,
});
async Task Recurse(IEnumerable<string> path)
{
if (downloader.Completion.IsCompleted) return; // The downloader has failed
var listing = await UsingFtpAsync(client =>
client.GetListingAsync(String.Join("/", path)));
foreach (var item in listing)
{
if (item.Type == FtpFileSystemObjectType.Directory)
{
if (item.Size != 0) await Recurse(path.Append(item.Name));
}
else if (item.Type == FtpFileSystemObjectType.File)
{
var accepted = await downloader.SendAsync(path.Append(item.Name));
if (!accepted) break; // The downloader has failed
}
}
}
// Move on to the thread pool, to avoid ConfigureAwait(false) everywhere
return Task.Run(async () =>
{
// Fill the FtpClient pool
for (int i = 0; i < maximumConnections; i++)
{
var client = new FtpClient(ftpHost);
if (username != null && password != null)
client.Credentials = new NetworkCredential(username, password);
ftpClientPool.Post(client);
}
try
{
// Enumerate the files to download
await Recurse(new[] { ftpRoot });
downloader.Complete();
}
catch (Exception ex) { ((IDataflowBlock)downloader).Fault(ex); }
try
{
// Await the downloader to complete
await downloader.Completion;
}
catch (OperationCanceledException)
when (downloader.Completion.IsCanceled) { throw; }
catch { downloader.Completion.Wait(); } // Propagate AggregateException
finally
{
// Clean up
if (ftpClientPool.TryReceiveAll(out var clients))
foreach (var client in clients) client.Dispose();
}
});
}
await FtpDownloadDeep("ftp://ftp.test.com", "", @"C:\FtpTest",
"username", "password", maximumConnections: 10);