C++ Boost::Regex在长表达式不存在时引发错误';不匹配

C++ Boost::Regex在长表达式不存在时引发错误';不匹配,c++,regex,boost,C++,Regex,Boost,我有两个正则表达式。一个匹配python样式的注释,另一个匹配文件路径 当我尝试查看注释是否与文件路径表达式匹配时,如果注释字符串超过15个字符,则会抛出错误。否则,它将按预期运行 如何修改我的正则表达式,使其不存在此问题 示例代码: #include <string> #include "boost/regex.hpp" using namespace std; using namespace boost; int main(int argc, char** argv) {

我有两个正则表达式。一个匹配python样式的注释,另一个匹配文件路径

当我尝试查看注释是否与文件路径表达式匹配时,如果注释字符串超过15个字符,则会抛出错误。否则,它将按预期运行

如何修改我的正则表达式,使其不存在此问题

示例代码:

#include <string>
#include "boost/regex.hpp"

using namespace std;
using namespace boost;

int main(int argc, char** argv)
{
    boost::regex re_comment("\\s*#[^\\r\\n]*");
    boost::regex re_path("\"?([A-Za-z]:)?[\\\\/]?(([^(\\\\/:*?\"<>|\\r\\n)]+[\\\\/]?)+)?\\.[\\w]+\"?");

    string shortComment = " #comment ";
    string longComment  = "#123456789012345678901234567890";
    string myPath       = "C:/this/is.a/path.doc";

    regex_match(shortComment,re_comment);    //evaluates to true
    regex_match(longComment,re_comment);     //evaluates to true

    regex_match(myPath, re_path);             //evaluates to true
    regex_match(shortComment, re_path);       //evaluates to false
    regex.match(longComment, re_path);        //throws error
}
#包括
#包括“boost/regex.hpp”
使用名称空间std;
使用名称空间boost;
int main(int argc,字符**argv)
{
boost::regex重新注释(“\\s*#[^\\r\\n]*”;
boost::正则表达式re\u路径(“\”?([A-Za-z]:)?[\\\\/]?([^(\\\\/:*?“\\\”\\r\\n)]+[\\\\/]?)+)?\.[\\\\\\/]?”;
字符串shortComment=“#注释”;
字符串longComment=“#1234567890012345678901234567890”;
字符串myPath=“C:/this/is.a/path.doc”;
regex_match(shortComment,re_comment);//计算结果为true
regex_match(longcoment,re_comment);//计算结果为true
regex_match(myPath,re_path);//计算结果为true
regex_match(shortComment,re_path);//计算结果为false
regex.match(longcoment,re_path);//抛出错误
}
这就是抛出的错误

terminate called after throwing an instance of
    'boost::exception_detail::clone_impl<boost::exception_detail
            ::error_info_injector<std::runtime_error> >'
what():  The complexity of matching the regular expression exceeded predefined
    bounds.  Try refactoring the regular expression to make each choice made by the
    state machine unambiguous.  This exception is thrown to prevent "eternal" matches
    that take  an indefinite period time to locate.
terminate在抛出的实例后调用
'boost::exception\u detail::clone\u impl'
what():匹配正则表达式的复杂性超过了预定义值
尝试重构正则表达式,以使
状态机明确。引发此异常是为了防止“永久”匹配
需要一段不确定的时间才能找到。

我知道,总是创建一个巨大的正则表达式来解决所有的世界问题是很有诱惑力的,并且确实可能有这样的性能原因,但是你也必须考虑在构建这样一个怪物时你正在创建的维护噩梦。 基本上,处理引号、在dir分隔符上拆分字符串以及regex路径的每个部分

#include <string>
#include "boost/regex.hpp"
#include "boost/algorithm/string.hpp"
using namespace std;
using namespace boost;


bool my_path_match(std::string line)
{
    bool ret = true;
    string drive = "([a-zA-Z]\\:)?";
    string pathElem = "(\\w|\\.|\\s)+";
    boost::regex re_pathElem(pathElem);
    boost::regex re_drive("(" + drive + "|" + pathElem + ")");

    vector<string> split_line;
    vector<string>::iterator it;

    if ((line.front() == '"') && (line.back() == '"'))
    {
        line.erase(0, 1); // erase the first character
        line.erase(line.size() - 1); // erase the last character
    }

    split(split_line, line, is_any_of("/\\"));

    if (regex_match(split_line[0], re_drive) == false)
    {
        ret = false;
    }
    else
    {
        for (it = (split_line.begin() + 1); it != split_line.end(); it++)
        {
            if (regex_match(*it, re_pathElem) == false)
            {
                ret = false;
                break;
            }
        }
    }
    return ret;
}

int main(int argc, char** argv)
{
    boost::regex re_comment("^.*#.*$");

    string shortComment = " #comment ";
    string longComment  = "#123456789012345678901234567890";
    vector<string> testpaths;
    vector<string> paths;
    vector<string>::iterator it;
    testpaths.push_back("C:/this/is.a/path.doc");
    testpaths.push_back("C:/this/is also .a/path.doc");
    testpaths.push_back("/this/is also .a/path.doc");
    testpaths.push_back("./this/is also .a/path.doc");
    testpaths.push_back("this/is also .a/path.doc");
    testpaths.push_back("this/is 1 /path.doc");

    bool ret;
    ret = regex_match(shortComment, re_comment);    //evaluates to true
    cout<<"should be true = "<<ret<<endl;
    ret = regex_match(longComment, re_comment);     //evaluates to true
    cout<<"should be true = "<<ret<<endl;

    string quotes;
    for (it = testpaths.begin(); it != testpaths.end(); it++)
    {
        paths.push_back(*it);
        quotes = "\"" + *it + "\""; // test quoted paths
        paths.push_back(quotes);
        std::replace(it->begin(), it->end(), '/', '\\'); // test backslash paths
        std::replace(quotes.begin(), quotes.end(), '/', '\\'); // test backslash quoted paths
        paths.push_back(*it);
        paths.push_back(quotes);
    }

    for (it = paths.begin(); it != paths.end(); it++)
    {
        ret = my_path_match(*it);             //evaluates to true
        cout<<"should be true = "<<ret<<"\t"<<*it<<endl;
    }

    ret = my_path_match(shortComment);       //evaluates to false
    cout<<"should be false = "<<ret<<endl;
    ret = my_path_match(longComment);        //evaluates to false
    cout<<"should be false = "<<ret<<endl;
}
#包括
#包括“boost/regex.hpp”
#包括“boost/algorithm/string.hpp”
使用名称空间std;
使用名称空间boost;
布尔我的路径匹配(标准::字符串行)
{
bool-ret=真;
字符串驱动器=“([a-zA-Z]\\:)?”;
字符串pathElem=“(\\w|\\.\\s)+”;
boost::regex re_pathElem(pathElem);
boost::regex-re_-drive(“(“+drive+”)“|“+pathElem+”);
矢量分裂线;
向量::迭代器;
如果((line.front()=='”)&&(line.back()=='”))
{
line.erase(0,1);//删除第一个字符
line.erase(line.size()-1);//擦除最后一个字符
}
拆分(拆分行,行,是(“/\”)中的任何一行);
if(正则表达式匹配(分割线[0],重新驱动)=false)
{
ret=假;
}
其他的
{
for(it=(split_-line.begin()+1);it!=split_-line.end();it++)
{
if(regex_match(*it,re_pathElem)=false)
{
ret=假;
打破
}
}
}
返回ret;
}
int main(int argc,字符**argv)
{
boost::regex re_注释(“^..*.*$”;
字符串shortComment=“#注释”;
字符串longComment=“#1234567890012345678901234567890”;
向量测试路径;
向量路径;
向量::迭代器;
testpath.push_back(“C:/this/is.a/path.doc”);
testpath.push_back(“C:/this/is.a/path.doc”);
testpath.push_back(“/this/is.a/path.doc”);
testpath.push_back(“./this/is.a/path.doc”);
testpath.push_back(“this/is.a/path.doc”);
testpath.push_back(“this/is 1/path.doc”);
布尔-雷特;
ret=regex_match(shortComment,re_comment);//计算结果为true

你能解释一下你的正则表达式吗?这个
“\”?([A-Za-z]:)?[\\\\\/]([^(\\\\/:*?“\\r\\n)]+[\\\\\/]?)+)?\\.\.\\\\\\\/]。
特别是。我感觉你可能不知道你在匹配什么。很可能你是对的,我对正则表达式是新手。基本上,我想做的是:“?可选引号([A-Za-z]:[\/]))?可选驱动器号[^(\/:*?\”\r\n)]+文件或文件夹名称[\\/]?可选文件夹分隔符\.[\w]+文件扩展名我甚至没有想到要将其拆分。如果我正确解释了这一点,
string pathElem=“(\\w\\.\124;\\ s)+”;
将匹配一组一个或多个单词字符或.或空格。“|”做什么?我想(\w\.\s)+也会做同样的事情?管道是一个or运算符。我相信(\w\.\s)+将匹配一个单词,后跟一个句点,后跟一个空格。重复一次或多次。换句话说,您的匹配“单词”。但不匹配“单词”