C 正则表达式文件刮取_C_Regex

C 正则表达式文件刮取

c regex

C 正则表达式文件刮取,c,regex,C,Regex,我正在用regex删除一个文件中的电子邮件地址很遗憾，我的正则表达式规则与此字符串不匹配： ” 我找不到stackoverflow的原因，我希望有人能告诉我我的规则出了什么问题这是我测试它的代码： #include <stdio.h> #include <stdlib.h> #include <regex.h> int main (void) { int match; int err; regex_t preg; regmatch_t p

我正在用regex删除一个文件中的电子邮件地址

很遗憾，我的正则表达式规则与此字符串不匹配：

”

我找不到stackoverflow的原因，我希望有人能告诉我我的规则出了什么问题

这是我测试它的代码：

#include <stdio.h>
#include <stdlib.h>
#include <regex.h>

int main (void)
{
  int match;
  int err;
  regex_t preg;
  regmatch_t pmatch[5];
  size_t nmatch = 5;
  const char *str_request = "         <font size=-1><a href=mailto:mrnours@citeweb.net>_ MR NOURS _</a></font>          ";

 const char *str_regex = "[a-zA-Z0-9][a-zA-Z0-9_.]+@[a-zA-Z0-9_]+\\.(com|net|[a-zA-Z]{2})$";

  err = regcomp(&preg, str_regex, REG_EXTENDED);
  if (err == 0)
    {
      match = regexec(&preg, str_request, nmatch, pmatch, 0);
      nmatch = preg.re_nsub;
      regfree(&preg);
      if (match == 0)
        {
          printf ("match\n");
          int start = pmatch[0].rm_so;
          int end  = pmatch[0].rm_eo;
          printf("%d - %d\n", start, end);
        }
      else if (match == REG_NOMATCH)
        {
          printf("unmatch\n");
        }
    }
  puts ("\nPress any key\n");
  getchar ();
  return (EXIT_SUCCESS);
 }

#包括
#包括
#包括
内部主（空）
{
整数匹配；
INTERR；
regex_t preg；
regmatch_t pmatch[5]；
尺寸匹配=5；
const char*str_request=“”；
const char*str|u regex=“[a-zA-Z0-9][a-zA-Z0-9.]+@[a-zA-Z0-9.]+\\（com | net |[a-zA-Z]{2}）$”；
err=regcomp（&preg，str_regex，REG_EXTENDED）；
如果（错误==0）
{
match=regexec（&preg，str_请求，nmatch，pmatch，0）；
nmatch=preg.re_nsub；
regfree（&preg）；
如果（匹配==0）
{
printf（“匹配\n”）；
int start=pmatch[0].rm_so；
int end=pmatch[0]。rm_eo；
printf（“%d-%d\n”，开始，结束）；
}
else if（匹配==REG\u NOMATCH）
{
printf（“不匹配\n”）；
}
}
puts（“\n按任意键\n”）；
getchar（）；
返回（退出成功）；
}

我怀疑您试图将子字符串作为一个完整的单词进行匹配，因此，您在模式的末尾使用了

（字符串结尾）锚点。但是，您要查找的子字符串不在输入字符串的末尾

由于

regex.h

不支持单词边界，您可以使用一种变通方法：

const char *str_regex = "([a-zA-Z0-9][a-zA-Z0-9_.]+@[a-zA-Z0-9_]+\\.(com|net|[a-zA-Z]{2}))([^a-zA-Z]|$)";
                                                                                          ^^^^^^^^^^^^^

您需要的值将驻留在捕获组1中

见此：

#包括
#包括
#包括
内部主（空）
{
整数匹配；
INTERR；
regex_t preg；
regmatch_t pmatch[5]；
size\u t nmatch=4；//匹配结果有4个组：0-整个匹配，3个捕获组
const char*str_request=“”；
const char*str|u regex=“（[a-zA-Z0-9][a-zA-Z0-9.]+@[a-zA-Z0-9.]+\\（com | net |[a-zA-Z]{2}”）（[^a-zA-Z]\$）；
err=regcomp（&preg，str_regex，REG_EXTENDED）；
如果（错误==0）
{
match=regexec（&preg，str_请求，nmatch，pmatch，0）；
nmatch=preg.re_nsub；
regfree（&preg）；
如果（匹配==0）
{
printf（“匹配\n”）；
int start=pmatch[1].rm_so；//我怀疑您试图将子字符串作为一个单词进行匹配，因此，您在模式的末尾使用了$
（字符串结尾）锚。但是，您要查找的子字符串不在输入字符串的结尾
由于regex.h
不支持单词边界，您可以使用一种变通方法：
const char *str_regex = "([a-zA-Z0-9][a-zA-Z0-9_.]+@[a-zA-Z0-9_]+\\.(com|net|[a-zA-Z]{2}))([^a-zA-Z]|$)";
                                                                                          ^^^^^^^^^^^^^

您需要的值将驻留在捕获组1中
见此：
#包括
#包括
#包括
内部主（空）
{
整数匹配；
INTERR；
regex_t preg；
regmatch_t pmatch[5]；
size\u t nmatch=4；//匹配结果有4个组：0-整个匹配，3个捕获组
const char*str_request=“”；
const char*str|u regex=“（[a-zA-Z0-9][a-zA-Z0-9.]+@[a-zA-Z0-9.]+\\（com | net |[a-zA-Z]{2}”）（[^a-zA-Z]\$）；
err=regcomp（&preg，str_regex，REG_EXTENDED）；
如果（错误==0）
{
match=regexec（&preg，str_请求，nmatch，pmatch，0）；
nmatch=preg.re_nsub；
regfree（&preg）；
如果（匹配==0）
{
printf（“匹配\n”）；
int start=pmatch[1].rm_so；//从模式中删除$
。@AeroX:这个问题与html@AndreaCorbellini从HTML标记的示例字符串来看，我怀疑这可能最终会成为XY问题。OP提到了报废&提供了一个HTML字符串，这可能意味着他们以后会想要报废网页。因此重复将它们指向HTML解析的方向，而不是正则表达式。这不是任何类型的重复，因为这里的问题不是正则表达式，而是如何在C代码中使用它。从模式中删除$
。@AeroX:这个问题与html@AndreaCorbellini从HTML标记的示例字符串中，我怀疑最终可能会成为XY问题。OP提到了废弃并提供了一个HTML字符串，这可能意味着他们以后会想要废弃网页。因此，可能会出现重复，将它们指向HTML解析的方向，而不是正则表达式。这不是任何类型的重复，因为这里的问题不是正则表达式，而是如何在C代码中使用它。请检查答案，如果它为你工作，请考虑接受。请检查答案，如果它为你工作，请考虑接受。