C# 在.NET中有没有更快的方法递归地扫描目录?

C# 在.NET中有没有更快的方法递归地扫描目录?,c#,.net,filesystems,C#,.net,Filesystems,我正在.NET中编写一个目录扫描程序 对于每个文件/目录,我需要以下信息 class Info { public bool IsDirectory; public string Path; public DateTime ModifiedDate; public DateTime CreatedDate; } 我有这个功能: static List<Info> RecursiveMovieFo

我正在.NET中编写一个目录扫描程序

对于每个文件/目录,我需要以下信息

   class Info {
        public bool IsDirectory;
        public string Path;
        public DateTime ModifiedDate;
        public DateTime CreatedDate;
    }
我有这个功能:

      static List<Info> RecursiveMovieFolderScan(string path){

        var info = new List<Info>();
        var dirInfo = new DirectoryInfo(path);
        foreach (var dir in dirInfo.GetDirectories()) {
            info.Add(new Info() {
                IsDirectory = true,
                CreatedDate = dir.CreationTimeUtc,
                ModifiedDate = dir.LastWriteTimeUtc,
                Path = dir.FullName
            });

            info.AddRange(RecursiveMovieFolderScan(dir.FullName));
        }

        foreach (var file in dirInfo.GetFiles()) {
            info.Add(new Info()
            {
                IsDirectory = false,
                CreatedDate = file.CreationTimeUtc,
                ModifiedDate = file.LastWriteTimeUtc,
                Path = file.FullName
            });
        }

        return info; 
    }
静态列表递归MovieFolderScan(字符串路径){
var info=新列表();
var dirInfo=新目录信息(路径);
foreach(dirInfo.GetDirectories()中的var dir){
info.Add(新信息(){
IsDirectory=true,
CreatedDate=dir.CreationTimeUtc,
ModifiedDate=dir.LastWriteTimeUtc,
Path=dir.FullName
});
info.AddRange(递归MovieFolderScan(dir.FullName));
}
foreach(dirInfo.GetFiles()中的var文件){
info.Add(新信息()
{
IsDirectory=false,
CreatedDate=file.CreationTimeUtc,
ModifiedDate=file.LastWriteTimeUtc,
Path=file.FullName
});
}
退货信息;
}
事实证明,这种实现相当缓慢。有没有办法加快速度?我正在考虑用FindFirstFileW手工编写代码,但如果有一种内置的方式更快,我想避免这种情况

尝试以下操作(即先进行初始化,然后重用列表和directoryInfo对象):

静态列表递归MovieFolderScan1(){
var info=新列表();
var dirInfo=新目录信息(路径);
递归MovieFolderScan(dirInfo,info);
退货信息;
} 
静态列表递归MovieFolderScan(目录信息目录信息,列表信息){
foreach(dirInfo.GetDirectories()中的var dir){
info.Add(新信息(){
IsDirectory=true,
CreatedDate=dir.CreationTimeUtc,
ModifiedDate=dir.LastWriteTimeUtc,
Path=dir.FullName
});
递归MovieFolderScan(目录,信息);
}
foreach(dirInfo.GetFiles()中的var文件){
info.Add(新信息()
{
IsDirectory=false,
CreatedDate=file.CreationTimeUtc,
ModifiedDate=file.LastWriteTimeUtc,
Path=file.FullName
});
}
退货信息;
}

根据您尝试删除函数的时间长短,直接调用Win32 API函数可能是值得的,因为现有API会执行大量额外处理来检查您可能不感兴趣的内容

如果您还没有这样做,并且假设您不打算为Mono项目做出贡献,我强烈建议您下载并查看Microsoft如何实现您当前使用的API调用。这将给你一个想法,你需要打什么电话,你可以省略什么


例如,您可以选择创建一个迭代器,该迭代器产生目录名,而不是返回列表的函数,这样您就不会在所有不同级别的代码中对同一个名称列表进行两到三次迭代。

这个需要稍加调整的实现速度提高了5-10倍

    static List<Info> RecursiveScan2(string directory) {
        IntPtr INVALID_HANDLE_VALUE = new IntPtr(-1);
        WIN32_FIND_DATAW findData;
        IntPtr findHandle = INVALID_HANDLE_VALUE;

        var info = new List<Info>();
        try {
            findHandle = FindFirstFileW(directory + @"\*", out findData);
            if (findHandle != INVALID_HANDLE_VALUE) {

                do {
                    if (findData.cFileName == "." || findData.cFileName == "..") continue;

                    string fullpath = directory + (directory.EndsWith("\\") ? "" : "\\") + findData.cFileName;

                    bool isDir = false;

                    if ((findData.dwFileAttributes & FileAttributes.Directory) != 0) {
                        isDir = true;
                        info.AddRange(RecursiveScan2(fullpath));
                    }

                    info.Add(new Info()
                    {
                        CreatedDate = findData.ftCreationTime.ToDateTime(),
                        ModifiedDate = findData.ftLastWriteTime.ToDateTime(),
                        IsDirectory = isDir,
                        Path = fullpath
                    });
                }
                while (FindNextFile(findHandle, out findData));

            }
        } finally {
            if (findHandle != INVALID_HANDLE_VALUE) FindClose(findHandle);
        }
        return info;
    }

我会使用或基于这个多线程库:


它很浅,有371迪尔 每个目录中平均有10个文件。 某些目录包含其他子目录

这只是一个评论,但你的数字似乎很高。我使用与您使用的递归方法基本相同的方法运行了下面的代码,尽管创建了字符串输出,但我的时间要少得多

    public void RecurseTest(DirectoryInfo dirInfo, 
                            StringBuilder sb, 
                            int depth)
    {
        _dirCounter++;
        if (depth > _maxDepth)
            _maxDepth = depth;

        var array = dirInfo.GetFileSystemInfos();
        foreach (var item in array)
        {
            sb.Append(item.FullName);
            if (item is DirectoryInfo)
            {
                sb.Append(" (D)");
                sb.AppendLine();

                RecurseTest(item as DirectoryInfo, sb, depth+1);
            }
            else
            { _fileCounter++; }

            sb.AppendLine();
        }
    }
我在许多不同的目录上运行了上述代码。在我的机器上,扫描目录树的第二次调用通常更快,因为运行时或文件系统都进行了缓存。请注意,这个系统没有什么特别之处,只是一个有1年历史的开发工作站

// cached call Dirs = 150, files = 420, max depth = 5 Time taken = 53 milliseconds // cached call Dirs = 1117, files = 9076, max depth = 11 Time taken = 433 milliseconds // first call Dirs = 1052, files = 5903, max depth = 12 Time taken = 11921 milliseconds // first call Dirs = 793, files = 10748, max depth = 10 Time taken = 5433 milliseconds (2nd run 363 milliseconds) //缓存调用 目录=150,文件=420,最大深度=5 所用时间=53毫秒 //缓存调用 目录=1117,文件=9076,最大深度=11 所用时间=433毫秒 //第一个电话 目录=1052,文件=5903,最大深度=12 所用时间=11921毫秒 //第一个电话 目录=793,文件=10748,最大深度=10 所用时间=5433毫秒(第二次运行363毫秒) 考虑到我没有得到创建和修改的日期,代码被修改为输出该日期以及以下时间

// now grabbing last update and creation time. Dirs = 150, files = 420, max depth = 5 Time taken = 103 milliseconds (2nd run 93 milliseconds) Dirs = 1117, files = 9076, max depth = 11 Time taken = 992 milliseconds (2nd run 984 milliseconds) Dirs = 793, files = 10748, max depth = 10 Time taken = 1382 milliseconds (2nd run 735 milliseconds) Dirs = 1052, files = 5903, max depth = 12 Time taken = 936 milliseconds (2nd run 595 milliseconds) //现在抓取上次更新和创建时间。 目录=150,文件=420,最大深度=5 所用时间=103毫秒(第二次运行93毫秒) 目录=1117,文件=9076,最大深度=11 所用时间=992毫秒(第二次运行984毫秒) 目录=793,文件=10748,最大深度=10 所用时间=1382毫秒(第二次运行735毫秒) 目录=1052,文件=5903,最大深度=12 所用时间=936毫秒(第二次运行595毫秒)
注意:System.Diagnostics.StopWatch类用于计时。

我刚刚遇到了这个问题。本机版本的良好实现

这个版本虽然仍然比使用
FindFirst
FindNext
的版本慢,但比原始.NET版本快很多

    static List<Info> RecursiveMovieFolderScan(string path)
    {
        var info = new List<Info>();
        var dirInfo = new DirectoryInfo(path);
        foreach (var entry in dirInfo.GetFileSystemInfos())
        {
            bool isDir = (entry.Attributes & FileAttributes.Directory) != 0;
            if (isDir)
            {
                info.AddRange(RecursiveMovieFolderScan(entry.FullName));
            }
            info.Add(new Info()
            {
                IsDirectory = isDir,
                CreatedDate = entry.CreationTimeUtc,
                ModifiedDate = entry.LastWriteTimeUtc,
                Path = entry.FullName
            });
        }
        return info;
    }
同样,如果您将其更改为
GetFileSystemInfos
,它会稍微快一点(但只是稍微快一点)


就我而言,上面的第一个解决方案已经足够快了。本机版本的运行时间约为1.6秒。使用
DirectoryInfo
的版本运行约2.9秒。我想如果我经常运行这些扫描,我会改变主意。

长期以来,.NET文件枚举方法的速度很慢。问题是,没有一种可以立即枚举大型目录结构的方法。即使是这里公认的答案也有GC分配的问题

我所能做的最好的事情就是把它包装在我的库中,并在中作为()类公开。此类可以枚举文件和文件夹,而无需不必要的GC分配和字符串封送处理

用法非常简单,RaiseOnAccessDenied属性将跳过目录和文件 // cached call Dirs = 150, files = 420, max depth = 5 Time taken = 53 milliseconds // cached call Dirs = 1117, files = 9076, max depth = 11 Time taken = 433 milliseconds // first call Dirs = 1052, files = 5903, max depth = 12 Time taken = 11921 milliseconds // first call Dirs = 793, files = 10748, max depth = 10 Time taken = 5433 milliseconds (2nd run 363 milliseconds) // now grabbing last update and creation time. Dirs = 150, files = 420, max depth = 5 Time taken = 103 milliseconds (2nd run 93 milliseconds) Dirs = 1117, files = 9076, max depth = 11 Time taken = 992 milliseconds (2nd run 984 milliseconds) Dirs = 793, files = 10748, max depth = 10 Time taken = 1382 milliseconds (2nd run 735 milliseconds) Dirs = 1052, files = 5903, max depth = 12 Time taken = 936 milliseconds (2nd run 595 milliseconds)
    static List<Info> RecursiveMovieFolderScan(string path)
    {
        var info = new List<Info>();
        var dirInfo = new DirectoryInfo(path);
        foreach (var entry in dirInfo.GetFileSystemInfos())
        {
            bool isDir = (entry.Attributes & FileAttributes.Directory) != 0;
            if (isDir)
            {
                info.AddRange(RecursiveMovieFolderScan(entry.FullName));
            }
            info.Add(new Info()
            {
                IsDirectory = isDir,
                CreatedDate = entry.CreationTimeUtc,
                ModifiedDate = entry.LastWriteTimeUtc,
                Path = entry.FullName
            });
        }
        return info;
    }
    static List<Info> RecursiveScan3(string path)
    {
        var info = new List<Info>();

        var dirInfo = new DirectoryInfo(path);
        foreach (var entry in dirInfo.EnumerateFileSystemInfos("*", SearchOption.AllDirectories))
        {
            info.Add(new Info()
            {
                IsDirectory = (entry.Attributes & FileAttributes.Directory) != 0,
                CreatedDate = entry.CreationTimeUtc,
                ModifiedDate = entry.LastWriteTimeUtc,
                Path = entry.FullName
            });
        }
        return info;
    }
    private static long SizeOf(string directory)
    {
        var fcounter = new CSharpTest.Net.IO.FindFile(directory, "*", true, true, true);
        fcounter.RaiseOnAccessDenied = false;

        long size = 0, total = 0;
        fcounter.FileFound +=
            (o, e) =>
            {
                if (!e.IsDirectory)
                {
                    Interlocked.Increment(ref total);
                    size += e.Length;
                }
            };

        Stopwatch sw = Stopwatch.StartNew();
        fcounter.Find();
        Console.WriteLine("Enumerated {0:n0} files totaling {1:n0} bytes in {2:n3} seconds.",
                          total, size, sw.Elapsed.TotalSeconds);
        return size;
    }
cmd.exe /u /c dir "M:\" /s /b >"c:\flist1.txt"
The top answer: 2010100 files, time: 53023
cmd.exe method: 2010100 files, cmd time: 64907, scan output file time: 19832.
/// <summary>
/// https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-findfirstfilew
/// </summary>
[DllImport("kernel32.dll", SetLastError = true)]
public static extern IntPtr FindFirstFile(
    string lpFileName,
    ref WIN32_FIND_DATA lpFindFileData
    );

/// <summary>
/// https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-findnextfilew
/// </summary>
[DllImport("kernel32.dll", SetLastError = true)]
public static extern bool FindNextFile(
    IntPtr hFindFile,
    ref WIN32_FIND_DATA lpFindFileData
    );

/// <summary>
/// https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-findclose
/// </summary>
[DllImport("kernel32.dll", SetLastError = true)]
public static extern bool FindClose(
    IntPtr hFindFile
    );
public static Tuple<long, long> CountFilesDirectories(
    string path,
    CancellationToken token
    )
{
    if (String.IsNullOrWhiteSpace(path))
        throw new ArgumentNullException("path", "The provided path is NULL or empty.");

    // If the provided path doesn't end in a backslash, append one.
    if (path.Last() != '\\')
        path += '\\';

    IntPtr hFile = IntPtr.Zero;
    Win32.Kernel32.WIN32_FIND_DATA fd = new Win32.Kernel32.WIN32_FIND_DATA();

    long files = 0;
    long dirs = 0;

    try
    {
        hFile = Win32.Kernel32.FindFirstFile(
            path + "*", // Discover all files/folders by ending a directory with "*", e.g. "X:\*".
            ref fd
            );

        // If we encounter an error, or there are no files/directories, we return no entries.
        if (hFile.ToInt64() == -1)
            return Tuple.Create<long, long>(0, 0);

        //
        // Find (and count) each file/directory, then iterate through each directory in parallel to maximize performance.
        //

        List<string> directories = new List<string>();

        do
        {
            // If a directory (and not a Reparse Point), and the name is not "." or ".." which exist as concepts in the file system,
            // count the directory and add it to a list so we can iterate over it in parallel later on to maximize performance.
            if ((fd.dwFileAttributes & FileAttributes.Directory) != 0 &&
                (fd.dwFileAttributes & FileAttributes.ReparsePoint) == 0 &&
                fd.cFileName != "." && fd.cFileName != "..")
            {
                directories.Add(System.IO.Path.Combine(path, fd.cFileName));
                dirs++;
            }
            // Otherwise, if this is a file ("archive"), increment the file count.
            else if ((fd.dwFileAttributes & FileAttributes.Archive) != 0)
            {
                files++;
            }
        }
        while (Win32.Kernel32.FindNextFile(hFile, ref fd));

        // Iterate over each discovered directory in parallel to maximize file/directory counting performance,
        // calling itself recursively to traverse each directory completely.
        Parallel.ForEach(
            directories,
            new ParallelOptions()
            {
                CancellationToken = token
            },
            directory =>
            {
                var count = CountFilesDirectories(
                    directory,
                    token
                    );

                lock (directories)
                {
                    files += count.Item1;
                    dirs += count.Item2;
                }
            });
    }
    catch (Exception)
    {
        // Handle as desired.
    }
    finally
    {
        if (hFile.ToInt64() != 0)
            Win32.Kernel32.FindClose(hFile);
    }

    return Tuple.Create<long, long>(files, dirs);
}