Linux posix C regexec（）未返回所有匹配项_C_Regex_Linux_Posix

Linux posix C regexec（）未返回所有匹配项

c regex linux

Linux posix C regexec（）未返回所有匹配项,c,regex,linux,posix,C,Regex,Linux,Posix,我有下面的脚本，它解析一个进程内存，寻找字符串匹配，一切正常，但转储一个编辑器的进程（在本例中为nano）和1193可能的匹配（如果我转储内存，然后对其执行egrep，则该脚本工作），但我的代码只输出3匹配。有什么想法吗 #ifdef TARGET_64 // for 64bit target (see /proc/cpuinfo addr size virtual) #define MEM_MAX (1ULL << 48) #else #define MEM_MAX (1ULL

我有下面的脚本，它解析一个进程内存，寻找字符串匹配，一切正常，但转储一个编辑器的进程（在本例中为nano）和

可能的匹配（如果我转储内存，然后对其执行egrep，则该脚本工作），但我的代码只输出

匹配。有什么想法吗

#ifdef TARGET_64
// for 64bit target (see /proc/cpuinfo addr size virtual)
 #define MEM_MAX (1ULL << 48)
#else
 #define MEM_MAX (1ULL << 32)
#endif

#define _LARGEFILE64_SOURCE
#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/ptrace.h>
#include <regex.h>

int main(int argc, char **argv)
{
        if (argc < 2) {
                printf("Usage: %s <pid>\n", argv[0]);
                exit(1);
        }

        char buf[128];
        int pid = atoi(argv[1]);
        snprintf(buf, sizeof(buf), "/proc/%d/mem", pid);
        int fd = open(buf, O_RDONLY);
        if (fd == -1) {
                fprintf(stderr, "Error opening mem file: %m\n");
                exit(1);
        }

        int status ,i;
        int cflags = REG_EXTENDED;
        regmatch_t pmatch[1];
        const size_t nmatch=1;
        regex_t reg;
        const char *pattern="([a-zA-Z]{18,20})";
        regcomp(&reg, pattern, cflags);

        long ptret = ptrace(PTRACE_ATTACH, pid, 0, 0);
        if (ptret == -1) {
                fprintf(stderr, "Ptrace failed: %s\n", strerror(errno));
                close(fd);
                exit(1);
        }

        unsigned char page[4096];
        unsigned long long offset = 0;

        while (offset < MEM_MAX) {
                lseek64(fd, offset, SEEK_SET);

                ssize_t ret;
                ret = read(fd, page, sizeof(page));

                if (ret > 0) {
                        status = regexec(&reg, page, nmatch, pmatch, 0);
                        if(status == 0){
                                for (i=pmatch[0].rm_so; i<pmatch[0].rm_eo; ++i) {
                                        putchar(page[i]);
                                }
                                printf("\n");
                        }
                }

                offset += sizeof(page);
        }

        ptrace(PTRACE_DETACH, pid, 0, 0);
        close(fd);
        regfree(&reg);
        return 0;
}

好的，用

libpcre

：

#include <pcre.h>
#include <locale.h>

....

        const char *error;
        int   erroffset;
        pcre *re;
        int   rc;
        int   i;
        int   ovector[100];
        char *regex = "([a-zA-Z]{18,20})";
        re = pcre_compile (regex,          /* the pattern */
                        PCRE_MULTILINE|PCRE_DOTALL|PCRE_NEWLINE_ANYCRLF,
                        &error,         /* for error message */
                        &erroffset,     /* for error offset */
                        0);             /* use default character tables */
        if (!re)
        {
                printf("pcre_compile failed (offset: %d), %s\n", erroffset, error);
        return -1;
        }

....

                if (ret > 0) {
                        //
                        unsigned int offset = 0;
                        while (offset < sizeof(page) && (rc = pcre_exec(re, 0, page, sizeof(page), offset, 0, ovector, sizeof(ovector))) >= 0)
                        {
                                for(i = 0; i < rc; ++i)
                                {
                                        printf("%.*s\n", ovector[2*i+1] - ovector[2*i], page + ovector[2*i]);
                                }
                                offset = ovector[1];
                        }
                        //
                }

#包括
#包括
....
常量字符*错误；
内部误差补偿；
pcre*re；
int rc；
int i；
int OVERECTOR[100]；
char*regex=“（[a-zA-Z]{18,20}）”；
re=pcre_compile（regex，/*模式*/
PCRE_多线| PCRE_DOTALL | PCRE_新线|任何CRLF，
&错误，/*表示错误消息*/
&erroffset，/*表示错误偏移量*/
0);             /* 使用默认字符表*/
如果（！re）
{
printf（“pcre_编译失败（偏移量：%d），%s\n”，erroffset，error）；
返回-1；
}
....
如果（ret>0）{
//
无符号整数偏移量=0；
而（偏移量=0）
{
对于（i=0；i

我认为使用

libpcre

regexec

会在

\0

字符处停止解析，因此您需要将嵌入的

\0

转换为其他内容（并以

\0

结束每个页面）。此外，一次解析一个页面的方法将丢失扩展到分页符上的字符串if（ret>0）之后编码{？或者任何关于如何一次性解析所有字符串的想法，这样我就不会错过字符串了？已经制作了一个

libpcre

版本，但是我想让它与posix正则表达式一起工作。如果你有足够的时间来编码，并且你真的想加速它，你应该像gnu

grep

那样做。为你的特殊CA创建一个跳转表e不是很难。处理页面边界需要更多的工作。如果您对许可证没问题，您可能可以重用gnu

grep

中的大量实际源代码。您可以在这里找到有关实现的更多信息：更简单的方法是使用

egrep

。（不过，我不确定

egrep

是否适用于大于4GB的文件。

#include <pcre.h>
#include <locale.h>

....

        const char *error;
        int   erroffset;
        pcre *re;
        int   rc;
        int   i;
        int   ovector[100];
        char *regex = "([a-zA-Z]{18,20})";
        re = pcre_compile (regex,          /* the pattern */
                        PCRE_MULTILINE|PCRE_DOTALL|PCRE_NEWLINE_ANYCRLF,
                        &error,         /* for error message */
                        &erroffset,     /* for error offset */
                        0);             /* use default character tables */
        if (!re)
        {
                printf("pcre_compile failed (offset: %d), %s\n", erroffset, error);
        return -1;
        }

....

                if (ret > 0) {
                        //
                        unsigned int offset = 0;
                        while (offset < sizeof(page) && (rc = pcre_exec(re, 0, page, sizeof(page), offset, 0, ovector, sizeof(ovector))) >= 0)
                        {
                                for(i = 0; i < rc; ++i)
                                {
                                        printf("%.*s\n", ovector[2*i+1] - ovector[2*i], page + ovector[2*i]);
                                }
                                offset = ovector[1];
                        }
                        //
                }