Linux POSIX C LibPCRE`double-free或corruption(fasttop)`crash

Linux POSIX C LibPCRE`double-free或corruption(fasttop)`crash,c,linux,pcre,glibc,C,Linux,Pcre,Glibc,我有以下代码(它读取进程虚拟内存并使用libpcre匹配一些字符串),它编译时没有错误,但如果我使用-Wall编译它,我会在代码后显示一些警告 编译后的代码运行但崩溃,检测到***glibc***./readmempcreuniq:double-free或corruption(fasttop):0x097b9c80***,我怀疑问题出在pcre\u-get\u子字符串上(页面、向量、对、0和buff)因为函数的第一个参数需要'const char*'但得到'unsigned char*',我如何

我有以下代码(它读取进程虚拟内存并使用libpcre匹配一些字符串),它编译时没有错误,但如果我使用
-Wall
编译它,我会在代码后显示一些警告

编译后的代码运行但崩溃,检测到
***glibc***./readmempcreuniq:double-free或corruption(fasttop):0x097b9c80***
,我怀疑问题出在
pcre\u-get\u子字符串上(页面、向量、对、0和buff)
因为函数的第一个参数需要
'const char*'
但得到
'unsigned char*'
,我如何才能使它正确

#ifdef TARGET_64
// for 64bit target (see /proc/cpuinfo addr size virtual)
#define MEM_MAX (1ULL << 48)
#else
#define MEM_MAX (1ULL << 32)
#endif

#define _LARGEFILE64_SOURCE
#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/ptrace.h>
#include <pcre.h>
#include <locale.h>

int main(int argc, char **argv)
{
    if (argc < 2) {
        printf("Usage: %s <pid>\n", argv[0]);
        exit(1);
    }

    char buf[128];
    int pid = atoi(argv[1]);
    snprintf(buf, sizeof(buf), "/proc/%d/mem", pid);
    int fd = open(buf, O_RDONLY);
    if (fd == -1) {
        fprintf(stderr, "Error opening mem file: %m\n");
        exit(1);
    }

    pcre *f;
    pcre_extra *f_ext;
    char *pattern = "([0-9]{20,22})";
    const char *errstr;
    int errchar;
    int vector[50];
    int vecsize = 50;
    int pairs;
    const char *buff;
    const unsigned char *tables;
    int a;
    int count = 0;
    const char **matches = NULL;
    const char **more_matches;

    char *loc = setlocale(LC_ALL, 0);
    setlocale(LC_ALL, loc);
    tables = pcre_maketables();

    long ptret = ptrace(PTRACE_ATTACH, pid, 0, 0);
    if (ptret == -1) {
        fprintf(stderr, "Ptrace failed: %s\n", strerror(errno));
        close(fd);
        exit(1);
    }

    unsigned char page[4096];
    unsigned long long offset = 0;


    while (offset < MEM_MAX) {
        lseek64(fd, offset, SEEK_SET);

        ssize_t ret;
        ret = read(fd, page, sizeof(page));

        if (ret > 0) {
            page[ret] = '\0';
            if((f = pcre_compile(pattern, PCRE_CASELESS|PCRE_MULTILINE, &errstr, &errchar, tables)) == NULL)
            {
                printf("Error: %s\nCharacter N%i\nPattern:%s\n", errstr, errchar, pattern);
            }
            else
            {
                f_ext = pcre_study(f, 0, &errstr);
                a = 0;

                while((pairs = pcre_exec(f, f_ext, page, sizeof(page), a, PCRE_NOTEMPTY, vector, vecsize)) >=0)
                {
                    pcre_get_substring(page, vector, pairs, 0, &buff);
                    //printf("%s\n", buff);
                    more_matches = realloc(matches, (count+1)* sizeof(*more_matches));
                    if (more_matches!=NULL)
                    {
                        matches=more_matches;
                        matches[count++]=buff;
                    }
                    else
                    {
                        free(matches);
                        puts("Error (re)allocating memory");
                        exit(1);
                    }
                    a = vector[1] + 1;
                }
                int matches_len = count;
                const char *uniques[matches_len];
                int uniques_len = 0;
                int already_exists;
                int i, j;
                for (i = 0; i < matches_len; i++)
                {
                    already_exists = 0;
                    for ( j = 0; j < uniques_len; j++)
                    {
                        if (!strcmp(matches[i], uniques[j]))
                        {
                            already_exists = 1;
                            break;
                        }
                    }
                    if (!already_exists)
                    {
                        uniques[uniques_len] = matches[i];
                        uniques_len++;
                    }
                }
                for (i = 0; i < uniques_len; i++)
                {
                    printf("%s\n", uniques[i]);
                }
                free(matches);
                pcre_free(f);
            }

        }

        offset += sizeof(page);
    }

    ptrace(PTRACE_DETACH, pid, 0, 0);
    close(fd);
    return 0;
}
使用
-Wall
开关编译代码时出现警告:

xtmtrx@server:~/regex/proc$gcc-o readmempcreuniq readmempcreuniq.c -lpcre-Wall readmempcreuniq.c:在函数“main”中:readmempcreuniq.c:83:警告:传递参数3中的指针目标 “pcre_exec”的签名不同/usr/include/pcre.h:286:注: 应为“const char*”,但参数的类型为“unsigned char*” readmempcreuniq.c:85:警告:传递参数1中的指针目标 “pcre_get_substring”的签名不同/usr/include/pcre.h:297: 注意:应为“const char*”,但参数的类型为“unsigned char” *"

编辑:

根据@stdcall技巧,我使用
efence
编译了程序,然后在核心转储上使用GDB:

xtmtrx@server:~/regex/proc$ ./readmempcreuniq 6036

  Electric Fence 2.1 Copyright (C) 1987-1998 Bruce Perens.
5,
Segmentation fault (core dumped)
xtmtrx@server:~/regex/proc$ gdb ./readmempcreuniq core
GNU gdb (GDB) 7.2-ubuntu
Copyright (C) 2010 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "i686-linux-gnu".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
Reading symbols from /root/regex/proc/readmempcreuniq...done.
[New Thread 6093]
Reading symbols from /lib/libpcre.so.3...(no debugging symbols found)...done.
Loaded symbols for /lib/libpcre.so.3
Reading symbols from /usr/lib/libefence.so.0...(no debugging symbols found)...done.
Loaded symbols for /usr/lib/libefence.so.0
Reading symbols from /lib/libc.so.6...(no debugging symbols found)...done.
Loaded symbols for /lib/libc.so.6
Reading symbols from /lib/libpthread.so.0...(no debugging symbols found)...done.
Loaded symbols for /lib/libpthread.so.0
Reading symbols from /lib/ld-linux.so.2...(no debugging symbols found)...done.
Loaded symbols for /lib/ld-linux.so.2
Core was generated by `./readmempcreuniq 6036'.
Program terminated with signal 11, Segmentation fault.
#0  0x08048ef8 in main (argc=2, argv=0xbfe1d2d4) at readmempcreuniq.c:125
125                                                     uniques[uniques_len] = matches[i];
致:


free()
之后,您永远不会将
匹配项
重置为
NULL
。在循环的下一轮之前将其重置。因此,在第一轮reallocs之后,它仍然保留原始值

在进入内部处理循环之前将其设置为
NULL
(第一次循环时冗余),或者在
空闲(匹配)
之后立即将其设置回
NULL
。或者,您可以简单地使用初始值
NULL
将其设置为下一个外部循环的包含范围的局部值,但前面提到的更改是我所能想到的最小更改

示例

matches = NULL; // HERE
while((pairs = pcre_exec(f, f_ext, page, sizeof(page), a, PCRE_NOTEMPTY, vector, vecsize)) >=0)
{
        pcre_get_substring(page, vector, pairs, 0, &buff);
        //printf("%s\n", buff);
        more_matches = realloc(matches, (count+1)* sizeof(*more_matches));
        if (more_matches!=NULL)
        {
                matches=more_matches;
                matches[count++]=buff;
        }
        else
        {
                free(matches);
                puts("Error (re)allocating memory");
                exit(1);
        }
        a = vector[1] + 1;
}
或者

for (i = 0; i < uniques_len; i++)
{
        printf("%s\n", uniques[i]);
}
free(matches);
matches = NULL; // or HERE
pcre_free(f);
似乎正在尝试设置空字符终止符。如果是这样,那么您正在对已填充的缓冲区调用未定义的行为。应该是这样的:

ssize_t ret = read(fd, page, sizeof(page)-1); // NOTE SPACE FOR TERM
if (ret > 0) {
        page[ret] = 0;
如果缓冲区的大小是特定的(您选择4K是有原因的),那么它应该是4097,以确保最大精确的4K缓冲区


还有另一个…

您正在阅读该页面,我不能声称它需要或不需要像我之前在代码中显示的那样被终止。但假设是,而你是我建议的(或..不是),这看起来也是错误的:

while((pairs = pcre_exec(f, f_ext, page, sizeof(page), a, PCRE_NOTEMPTY, vector, vecsize)) >=0)
这里传递的是整个缓冲区的大小;不是您读取的实际数据的大小。我是第一个告诉您我不熟悉API的人,但我相当肯定这应该是:

// notice the length of the buffer passed, ret
while((pairs = pcre_exec(f, f_ext, page, ret, a, PCRE_NOTEMPTY, vector, vecsize)) >=0)
换句话说,在一个较小的读数上,你告诉它数据比实际长度长。同样,我对他们的API很天真,但这似乎是合理的


唯一匹配项的数量…

希望更容易阅读

int matches_len = count, uniques_len = 0;
int i = 0, j = 0;

const char *uniques[matches_len];
for (i=0; i < matches_len; ++i)
{
    for (j = 0; j < uniques_len; ++j)
    {
        if (!strcmp(matches[i], uniques[j]))
            break;
    }

    if (j == uniques_len)
        uniques[uniques_len++] = matches[i];
}

for (i = 0; i < uniques_len; ++i)
    printf("%s\n", uniques[i]);

警告。我对这个API一无所知,但我在这里看到的和在网上简要回顾的内容。YMMV UAYOR。但似乎你一直都有。只需积累独立于页面的Unique(我认为这仍然是一个问题,页面边界,但这是另一天的问题)。

除了’s指出的问题之外:

代码定义了

  const char *uniques[matches_len];

但是循环其索引
j
,直到
警告是无害的。使用显式转换来摆脱它们,或者首先在整个过程中使用
char
。我不认为我可以只使用
char
而不是
unsigned char page[4096]pcre\u get\u子字符串(页面、向量、对、0和buff)行上得到了错误
因为此函数需要一个
无符号字符*
表必须是无符号字符,所以页面为普通字符。在我的计算机上
pcre\u get\u子字符串
需要普通字符指针。我在网上也找不到任何不同的资源。好的。我将再添加一件事,这就是我认为您正在尝试做的,构建一个独特的匹配列表)。我不能保证它是完美的,因为我不知道API,但是我知道算法(无论如何,我想是这样)紧接着
自由(匹配)
现在我得到了预期的
28
中的
5个
匹配项,并且我得到了一个
分段错误
@xtmtrx一个类似的堆栈转储(谢谢你,顺便说一句)很可能需要找到那个堆栈转储。然而,这确实是一个问题,在重用的
realloc()
算法中并不少见。是的,我这样做了,现在
glibc
错误被修复,仍然得到
分段错误我甚至可以删除,这是我用来欺骗POSIX正则表达式以更好地匹配的东西,因为它匹配(不像pcre),直到找到第一个'\0'。(我以前在这段代码中使用过POSIX正则表达式,但后来使用了更可靠的pcre)@xtmtrx ok,发布。希望就是这样。不管它值多少钱,都要接受它。为了加快实现速度,我在调试时刚开始硬编码了一个本地pid。你需要把pid的cmd行解析放回去,但其他的都应该可以。我完全会投这个票,因为在你之前我错过了或没有找到的东西=P@WhozCraig:我已经解决了空指针问题,您的速度更快。以及
页面[ret]=0查找。;-)如何正确编写(j=0;j
循环的
,这样我就不会越界了?好的。我得打电话给wtf。将整个段归零,然后只保存一个(最后一个)。realloc保留先前的内容有点重要。换句话说,丢失循环中收集的所有先前指针如何修复strcmp调用@xtmtrx:修复了我建议的调整。
ssize_t ret = read(fd, page, sizeof(page)-1); // NOTE SPACE FOR TERM
if (ret > 0) {
        page[ret] = 0;
while((pairs = pcre_exec(f, f_ext, page, sizeof(page), a, PCRE_NOTEMPTY, vector, vecsize)) >=0)
// notice the length of the buffer passed, ret
while((pairs = pcre_exec(f, f_ext, page, ret, a, PCRE_NOTEMPTY, vector, vecsize)) >=0)
int matches_len = count, uniques_len = 0;
int i = 0, j = 0;

const char *uniques[matches_len];
for (i=0; i < matches_len; ++i)
{
    for (j = 0; j < uniques_len; ++j)
    {
        if (!strcmp(matches[i], uniques[j]))
            break;
    }

    if (j == uniques_len)
        uniques[uniques_len++] = matches[i];
}

for (i = 0; i < uniques_len; ++i)
    printf("%s\n", uniques[i]);
#define _LARGEFILE64_SOURCE
#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/ptrace.h>
#include <pcre.h>
#include <locale.h>
#include <ctype.h>

int main(int argc, char **argv)
{
    // CHANGE TO ACCEPT PROC-ID FROM CMDLINE    
    int pid = 5916;

    setlocale(LC_ALL,"");

    const char *error = NULL;
    int erroffset = 0;
    const char **uniques = NULL;
    size_t uniques_len = 0;

    const char regex[] = "[0-9A-Fa-f]{8}";
    pcre* re = pcre_compile (regex,          /* the pattern */
                    PCRE_MULTILINE|PCRE_DOTALL|PCRE_NEWLINE_ANYCRLF,
                    &error,         /* for error message */
                    &erroffset,     /* for error offset */
                    0);             /* use default character tables */
    if (!re)
    {
        printf("pcre_compile failed (offset: %d), %s\n", erroffset, error);
        return -1;
    }

    // start proc trace
    long ptret = ptrace(PTRACE_ATTACH, pid, 0, 0);
    if (ptret == -1)
    {
        fprintf(stderr, "Ptrace failed: %s\n", strerror(errno));
        exit(1);
    }

    char path[256];
    snprintf(path, sizeof(path), "/proc/%d/maps", pid);
    FILE *maps = fopen(path, "r");
    snprintf(path, sizeof(path), "/proc/%d/mem", pid);
    int mem = open(path, O_RDONLY);

    if(maps && (mem != -1))
    {
        char buf[BUFSIZ + 1];
        while(fgets(buf, BUFSIZ, maps))
        {
            long long unsigned int start, end;
            if (sscanf(buf, "%llx-%llx", &start, &end) != 2)
                break;

            printf("reading %llx - %llx\n", start, end);

            lseek64(mem, start, SEEK_SET);
            while (start < end)
            {
                char page[4096] =  {0};
                int rd = read(mem, page, sizeof(page));
                if (rd < 0)
                    break;

                start += sizeof(page);

                int ov[128] = {0};
                unsigned int ov_len = 0;
                int rc = 0;

                while ((rc = pcre_exec(re, 0, page, (int)(rd), ov_len, 0, ov, 128)) >= 0)
                {
                    int i = 0;
                    for(; i < rc; ++i)
                    {
                        const char *sp = NULL;
                        pcre_get_substring(page, ov, rc, i, &sp);

                        // search unique list
                        size_t j=0;
                        for (;j<uniques_len;++j)
                        {
                            if (!strcmp(sp, uniques[j]))
                            break;
                        }

                        if (uniques_len == j)
                        {
                            const char **tmp = realloc(uniques, (uniques_len+1)*sizeof(*uniques));
                            if (tmp == NULL)
                            {
                                perror("Failed to resize uniques.");
                                pcre_free_substring(sp);
                            }
                            else
                            {
                                uniques = tmp;
                                uniques[uniques_len++] = sp;
                            }
                        }
                        else
                        {   // delete string. not needed
                            pcre_free_substring(sp);
                        }
                    }
                    ov_len = ov[2*(rc-1)]+1;
                }
            }
        }

        fclose(maps);
        close(mem);
    }

    size_t n = 0;
    for (; n<uniques_len; ++n)
    {
        printf("%s\n", uniques[n]);
        pcre_free_substring(uniques[n]);
    }
    printf("total uniques: %lu\n", uniques_len);
    free(uniques);

    ptrace(PTRACE_DETACH, pid, 0, 0);
    return 0;
}
  const char *uniques[matches_len];
    for ( j = 0; j < uniques_len; j++)
    {
      if (!strcmp(matches[i], uniques[j]))
      {
        already_exists = 1;
        break;
      }
    }
 if (!strcmp(matches[i], uniques[j]))
          int count = 0;
          const char ** matches = NULL;
          [...]

                    more_matches = realloc(matches, (count+1)* sizeof(*more_matches));
                    if (more_matches!=NULL)
                    {
                        matches=more_matches;
                        matches[count++]=buff;
                    }
          size_t count = 0, count_prev = 0;
          const char ** matches = NULL;
          [...]

                    more_matches = realloc(matches, (count + 1) * sizeof(*more_matches));
                    if (more_matches != NULL)
                    {
                        memset(more_matches + count_prev, 0, (count + 1 - count_prev) * sizeof(*more_matches));
                        count_prev = count;
                        matches = more_matches;
                        matches[count++] = buff;
                    }