我的C函数在系统目录上递归，在大的输入上递归。我怎样才能解决这个问题？_C_Recursion_Segmentation Fault_Stack Overflow_Directory Structure

我的C函数在系统目录上递归，在大的输入上递归。我怎样才能解决这个问题？

c recursion

我的C函数在系统目录上递归，在大的输入上递归。我怎样才能解决这个问题？,c,recursion,segmentation-fault,stack-overflow,directory-structure,C,Recursion,Segmentation Fault,Stack Overflow,Directory Structure,我试图编写一个程序，搜索给定目录及其所有子目录和文件（以及子目录的子目录和文件等），并打印出具有给定权限集的所有文件（int target\u perm）它可以在较小的输入上正常工作，但当它必须在包含大量文件的目录上递归时，会返回分段错误（内核转储）。Valgrind揭示这是由于堆栈溢出造成的有没有什么方法可以修复我的函数，使它可以处理任意大的目录 void recurse_dir(struct stat *sb, struct dirent *de, DIR *dr, int target

我试图编写一个程序，搜索给定目录及其所有子目录和文件（以及子目录的子目录和文件等），并打印出具有给定权限集的所有文件（

int target\u perm

）

它可以在较小的输入上正常工作，但当它必须在包含大量文件的目录上递归时，会返回

分段错误（内核转储）

。Valgrind揭示这是由于堆栈溢出造成的

有没有什么方法可以修复我的函数，使它可以处理任意大的目录

void recurse_dir(struct stat *sb, struct dirent *de, DIR *dr, int target_perm, char* curr_path) {
    if ((strcmp(".", de->d_name) != 0) && (strcmp("..", de->d_name) != 0)) {

        char full_file_name[strlen(curr_path) + strlen(de->d_name)+1];
        strcpy(full_file_name, curr_path);
        strcpy(full_file_name + strlen(curr_path), de->d_name);
        full_file_name[strlen(curr_path) + strlen(de->d_name)] = '\0';

        if (stat(full_file_name, sb) < 0) {
            fprintf(stderr, "Error: Cannot stat '%s'. %s\n", full_file_name, strerror(errno));
        } else {
            char* curr_perm_str = permission_string(sb);
            int curr_perm = permission_string_to_bin(curr_perm_str);
            free(curr_perm_str);

            if ((curr_perm == target_perm )) {
                printf("%s\n", full_file_name);
            }

            if (S_ISDIR(sb->st_mode)) {
                DIR *dp;
                struct dirent *dent;
                struct stat b;
                dp = opendir(full_file_name);

                char new_path[PATH_MAX];
                strcpy(new_path, full_file_name);
                new_path[strlen(full_file_name)] ='/';
                new_path[strlen(full_file_name)+1] ='\0';

                if (dp != NULL) {
                    if ((dent = readdir(dp)) != NULL) {
                        recurse_dir(&b, dent, dp, target_perm, new_path);
                    }
                    closedir(dp);               
                } else {
                    fprintf(stderr, "Error: Cannot open directory '%s'. %s.\n", de->d_name, strerror(errno));
                }
            }           
        }
    }

    if ((de = readdir(dr)) != NULL) {
        recurse_dir(sb, de, dr, target_perm, curr_path);
    }
}

void recurse\u dir（struct stat*sb，struct dirent*de，dir*dr，int target\u perm，char*curr\u path）{
如果（（strcmp（“.”，de->d_name）！=0）和&（strcmp（“…”，de->d_name）！=0））{
char full_file_name[strlen（curr_path）+strlen（de->d_name）+1]；
strcpy（完整文件名、当前路径）；
strcpy（完整文件名+strlen（当前路径），de->d\u名）；
完整文件名[strlen（curr_path）+strlen（de->d_name）]='\0'；
if（stat（完整文件名，sb）<0）{
fprintf（stderr，“错误：无法统计“%s”。%s\n”，完整的文件名，strerror（errno））；
}否则{
char*curr\u perm\u str=权限字符串（sb）；
int curr\u perm=权限字符串（curr\u perm\u str）；
免费（curr_perm_str）；
如果（（电流=目标电流））{
printf（“%s\n”，完整文件名）；
}
如果（S_ISDIR（sb->st_模式））{
DIR*dp；
结构方向*凹痕；
结构统计b；
dp=opendir（完整文件名）；
char new_path[path_MAX]；
strcpy（新路径、完整文件名）；
新路径[strlen（完整文件名）]='/'；
新路径[strlen（完整文件名）+1]='\0'；
如果（dp！=NULL）{
如果（（dent=readdir（dp））！=NULL）{
递归目录（&b、凹痕、dp、目标路径、新路径）；
}
closedir（dp）；
}否则{
fprintf（stderr，“错误：无法打开目录“%s”。%s.\n”，de->d_name，strerror（errno））；
}
}           
}
}
如果（（de=readdir（dr））！=NULL）{
递归目录（sb、de、dr、目标路径、当前路径）；
}
}

这里的问题实际上不是递归，尽管我在下面已经解决了这个特定的问题。问题是，目录层次结构可能包含符号链接，这些链接使某些目录成为其父目录的别名。Ubuntu安装的一个示例：

$ ls -ld /usr/bin/X11
lrwxrwxrwx 1 root root 1 Jan 25  2018 /usr/bin/X11 -> .

$ # Just for clarity:
$ readlink -f /usr/bin/X11
usr/bin

因此，一旦遇到

/usr/bin/X11

，就会进入一个无限循环。这将迅速耗尽堆栈，但摆脱递归不会解决问题，因为无限循环仍然是无限循环

您需要做的是：

避免使用以下符号链接，或
（更好）避免使用解析为目录的符号链接，或
跟踪递归扫描期间遇到的所有目录，并检查以确保尚未检查任何新目录

前两种解决方案比较简单（只需检查

struct stat

中的filetype字段），但它们将无法列出您可能感兴趣的一些文件（例如，当符号链接解析为您正在检查的目录结构之外的目录时）

一旦你解决了以上问题，你可能会考虑这些建议：

在递归函数中，最好将堆栈帧的大小减小到尽可能小的程度。目录遍历期间的最大递归深度不应超过文件名中的最大路径段数（但请参见下面的第3点），该值不应太大。（例如，在我的系统中，

/usr

层次结构中文件的最大深度是16。）但使用的堆栈量是堆栈帧大小和最大递归深度的乘积，因此，如果堆栈帧较大，则递归容量较小

为了实现上述目标，您应该避免使用本地数组。例如，声明

char new_path[PATH_MAX];

将

PATH\u MAX

字节添加到每个堆栈帧（在我的系统上，是4k）。这是对VLA

完整文件名的补充。值得一提的是，我在64位Linux系统上编译了您的函数，发现堆栈帧大小是4280字节加上VLA的大小（为了对齐，四舍五入到16的倍数）。假设一个合理的文件层次结构在限制范围内，那么可能不会使用超过150Kb的堆栈。但是，如果您的系统具有更大的值PATH\u MAX
（在任何情况下，都不能依赖该值作为文件路径的最大大小），则该值可能会显著增加
好的样式要求为这些变量使用动态分配的内存。但更好的方法是避免使用这么多不同的缓冲区

另外，您还需要了解strlen的成本。为了计算字符串的长度，strlen
函数需要扫描所有字节以查找NUL终止符。与高级语言中的字符串对象不同，C字符串不包含任何长度指示。所以当你这样做的时候：
char full_file_name[strlen(curr_path) + strlen(de->d_name)+1];
strcpy(full_file_name, curr_path);
strcpy(full_file_name + strlen(curr_path), de->d_name);
full_file_name[strlen(curr_path) + strlen(de->d_name)] = '\0';

最后，即使这些字符串的长度不会改变，也会扫描三次curr\u path
和两次de->d\u name
。与其这样做，不如将长度保存在局部变量中，以便可以重用
或者，您可以找到另一种连接字符串的方法。一个简单的可能性是动态分配内存
char* full_file_name;
asprintf(&full_file_name, "%s%s", curr_path, de->d_name);

char* newpath;
asprintf("%s/", full_file_path);

char* full_file_name;
int full_file_name_len = asprintf(&full_file_name, "%s%s\0",
                                     curr_path, de->d_name);
if (full_file_name_len < 0) { /* handle error */ }
--full_file_name_len; /* Bytes written includes the \0 in the format */

/* Much later, instead of creating new_path: */

if (dp != NULL) {
    full_file_name[full_file_name_len - 1] = '/';

    if ((dent = readdir(dp)) != NULL) {
        recurse_dir(&b, dent, dp, target_perm, full_file_name);
    }

    full_file_name[full_file_name_len - 1] = '\0';

    closedir(dp);               
} 

    char* curr_perm_str = permission_string(sb);
    int curr_perm = permission_string_to_bin(curr_perm_str);
    free(curr_perm_str);

    int curr_perm = sb->st_mode & (S_IRWXU|S_IRWXG|S_IRWXO);

    int curr_perm = sb->st_mode
                    & (S_ISUID|S_ISGID|S_ISVTX|S_IRWXU|S_IRWXG|S_IRWXO);