C++ 如何使我的拆分只在一条实线上工作，并且能够跳过字符串的引用部分？_C++_String_Parsing_Boost_Split

C++ 如何使我的拆分只在一条实线上工作，并且能够跳过字符串的引用部分？
c++ string parsing boost
C++ 如何使我的拆分只在一条实线上工作，并且能够跳过字符串的引用部分？,c++,string,parsing,boost,split,C++,String,Parsing,Boost,Split,因此，我们有一个：我们想介绍拆分的结构和结束解析过程的charecter，这些结构应保持为非文字化。怎么做这样的事如果您的语法包含转义序列，我相信您将无法使用简单的拆分技术你需要一个状态机下面是一些示例代码，让您了解我的意思。此解决方案既不是完全指定的，也不是暗示正确的。我相当肯定它有一次性的错误，只有通过彻底的测试才能发现 std::vector<std::string> result; std::string str; size_t i = 0, last = 0; f
因此，我们有一个：
我们想介绍拆分的
结构和结束解析过程的charecter，这些结构应保持为非文字化。怎么做这样的事
 如果您的语法包含转义序列，我相信您将无法使用简单的拆分技术
你需要一个状态机
下面是一些示例代码，让您了解我的意思。此解决方案既不是完全指定的，也不是暗示正确的。我相当肯定它有一次性的错误，只有通过彻底的测试才能发现
std::vector<std::string> result;
std::string str;
size_t i = 0, last = 0;

for (;;) {

next_token:
  last = i;
  for (;;) {
    switch (str.at(i)) {
      case '"': goto handle_quote;
      case ' ': goto handle_token;
    }
    i++;
    if (i >= str.size())
      goto handle_token;
  }

handle_quote:
  for (;;) {
    switch (str.at(i)) {
      case '"': goto handle_token;
    }
    i++;
    if (i >= str.size())
      std::runtime_error("invalid format, mismatched quotes");
  }

handle_token:
  results.push_back(std::string.substr(last, i - last));
  if (i >= str.size())
    break;
  i++; 
}

std:：向量结果；
std：：字符串str；
尺寸i=0，最后一个=0；
对于（；；）{
下一个令牌：
last=i；
对于（；；）{
开关（str.at（i））{
案例''：转到处理_quote；
案例“”：转到处理\u令牌；
}
i++；
如果（i>=str.size（））
去处理你的代币；
}
处理报价：
对于（；；）{
开关（str.at（i））{
案例''：转到句柄\u令牌；
}
i++；
如果（i>=str.size（））
std:：runtime_错误（“格式无效，引号不匹配”）；
}
handle_令牌：
results.push_back（std:：string.substr（last，i-last））；
如果（i>=str.size（））
打破
i++；
}

这种代码很难推理和维护。然而，当人们写出蹩脚的语法时，就会发生这种情况。选项卡用于分隔字段，在可能的情况下鼓励使用它们
我会欣喜若狂地选择另一个更面向对象的解决方案。
以下代码：
vector<string>::const_iterator matchSymbol(const string & s, string::const_iterator i, const vector<string> & symbols)
{
    vector<string>::const_iterator testSymbol;
    for (testSymbol=symbols.begin();testSymbol!=symbols.end();++testSymbol) {
        if (!testSymbol->empty()) {
            if (0==testSymbol->compare(0,testSymbol->size(),&(*i),testSymbol->size())) {
                return testSymbol;
            }
        }
    }

    assert(testSymbol==symbols.end());
    return testSymbol;
}

vector<string> split(const string& s, const vector<string> & delims, const vector<string> & terms, const bool keep_empty = true)
{
    vector<string> result;
    if (delims.empty()) {
        result.push_back(s);
        return result;
    }

    bool checkForDelim=true;

    string temp;
    string::const_iterator i=s.begin();
    while (i!=s.end()) {
        vector<string>::const_iterator testTerm=terms.end();
        vector<string>::const_iterator testDelim=delims.end();

        if (checkForDelim) {
            testTerm=matchSymbol(s,i,terms);
            testDelim=matchSymbol(s,i,delims);
        }

        if (testTerm!=terms.end()) {
            i=s.end();
        } else if (testDelim!=delims.end()) {
            if (!temp.empty() || keep_empty) {
                result.push_back(temp);
                temp.clear();
            }
            string::const_iterator j=testDelim->begin();
            while (i!=s.end() && j!=testDelim->end()) {
                ++i;
                ++j;
            }
        } else if ('"'==*i) {
            if (checkForDelim) {
                string::const_iterator j=i;
                do {
                    ++j;
                } while (j!=s.end() && '"'!=*j);
                checkForDelim=(j==s.end());
                if (!checkForDelim && !temp.empty() || keep_empty) {
                    result.push_back(temp);
                    temp.clear();
                }
                temp.push_back('"');
                ++i;
            } else {
                //matched end quote
                checkForDelim=true;
                temp.push_back('"');
                ++i;
                result.push_back(temp);
                temp.clear();
            }
        } else if ('\n'==*i) {
            temp+="\\n";
            ++i;
        } else {
            temp.push_back(*i);
            ++i;
        }
    }

    if (!temp.empty() || keep_empty) {
        result.push_back(temp);
    }
    return result;
}

int runTest()
{
    vector<string> delims;
    delims.push_back(" ");
    delims.push_back("\t");
    delims.push_back("\n");
    delims.push_back("split_here");

    vector<string> terms;
    terms.push_back(">");
    terms.push_back("end_here");

    const vector<string> words = split("close no \"\n end_here matter\" how \n far testsplit_heretest\"another split_here test\"with some\"mo>re", delims, terms, false);

    copy(words.begin(), words.end(), ostream_iterator<string>(cout, "\n"));
}

根据您给出的示例，您似乎希望当新行出现在引号外时，它们被视为分隔符，而当出现在引号内时，它们由文本\n
表示，所以这就是它的作用。它还增加了具有多个分隔符的功能，例如我使用测试时这里的split\u

我不确定您是否希望不匹配的引号像匹配的引号那样拆分，因为您给出的示例中不匹配的引号用空格分隔。这段代码将不匹配的引号视为任何其他字符，但如果这不是您想要的行为，则应该很容易修改
该行：
if (0==testSymbol->compare(0,testSymbol->size(),&(*i),testSymbol->size())) {

即使不是全部，也可以在STL的大多数实现上工作，但并不需要工作。可以用更安全但速度较慢的版本替换：
if (*testSymbol==s.substr(i-s.begin(),testSymbol->size())) {

更新了通过“谢谢你”奖励奖金的方式，我去实施了4项功能，最初我跳过了这些功能，因为“你不需要它”
现在支持部分引用的列
这就是您报告的问题：例如，使用分隔符，
仅测试，“一，二”，三将是有效的，而不是测试，一，“二”，“三”。现在两者都被接受了

现在支持自定义分隔符表达式

只能将单个字符指定为分隔符。现在，您可以将任何语法分析器表达式指定为分隔符规则。例如
注意这会改变默认重载的行为
旧版本默认情况下将重复空格视为单个分隔符。如果需要，现在必须显式指定（第二个示例）

现在支持引号内的引号（“”）（而不仅仅是使它们消失）

请参阅代码示例。当然很简单。请注意，引用构造外部的序列“”
仍然表示空字符串（为了与冗余引用空字符串的现有CSV输出格式兼容）

除了作为输入的容器（例如char[]）之外，还支持增强范围

好吧，您不需要它（但它对我来说非常方便，因为我可以将拆分为（“一个字符数组”，…）
：）

正如我一半的预期，您将需要部分引用的字段（请参阅您的注释1。好的，这里就是了（瓶颈是让它在不同版本的Boost中一致工作））
介绍
读者随机记录和观察：

splitInto
template函数愉快地支持您对其抛出的任何内容：

来自向量、std:：string或std:：wstring的输入
输出到--演示中显示的某些组合--

向量
（所有行变平）
vector
（每行标记）
列表
（如果您愿意）
set
（唯一的行标记集）
。。。任何你梦寐以求的容器


出于演示目的，展示karma输出生成（特别是处理嵌套容器）

注意：\n
在输出中显示为？
，以便于理解（safechars
）

为新的Spirit用户提供方便的管道（易读的规则命名，注释调试定义，以防您想玩东西）
可以指定任何与分隔符匹配的Spirit解析表达式。这意味着通过传递+qi:：lit（“”）
而不是默认值（'
），您将跳过空字段（即重复的分隔符）

需要/测试的版本
这是使用

通用条款4.4.5
通用条款4.5.1和
通用条款4.6.1

它可以（经过测试）对抗

boost 1.42.0（也可能是更早的版本）一路走来
升压1.47.0

注意：输出容器的扁平化似乎只适用于Spirit V2.5（boost 1.47.0）。

（这可能很简单，因为旧版本需要额外的包含？）
密码！
/#定义BOOST_SPIRIT_调试
#定义BOOST\u SPIRIT\u DEBUG\u PRINT\u大约80
//YAGNI#4-除了作为输入的容器外，还支持提升范围（例如char[]）
#定义支持\u增强\u范围//我们自己定义拆分为
#包括
#包括
#包括
#包括//仅适用于1.47.0之前的版本
#包括
#包括
名称空间/
vector<string>::const_iterator matchSymbol(const string & s, string::const_iterator i, const vector<string> & symbols)
{
    vector<string>::const_iterator testSymbol;
    for (testSymbol=symbols.begin();testSymbol!=symbols.end();++testSymbol) {
        if (!testSymbol->empty()) {
            if (0==testSymbol->compare(0,testSymbol->size(),&(*i),testSymbol->size())) {
                return testSymbol;
            }
        }
    }

    assert(testSymbol==symbols.end());
    return testSymbol;
}

vector<string> split(const string& s, const vector<string> & delims, const vector<string> & terms, const bool keep_empty = true)
{
    vector<string> result;
    if (delims.empty()) {
        result.push_back(s);
        return result;
    }

    bool checkForDelim=true;

    string temp;
    string::const_iterator i=s.begin();
    while (i!=s.end()) {
        vector<string>::const_iterator testTerm=terms.end();
        vector<string>::const_iterator testDelim=delims.end();

        if (checkForDelim) {
            testTerm=matchSymbol(s,i,terms);
            testDelim=matchSymbol(s,i,delims);
        }

        if (testTerm!=terms.end()) {
            i=s.end();
        } else if (testDelim!=delims.end()) {
            if (!temp.empty() || keep_empty) {
                result.push_back(temp);
                temp.clear();
            }
            string::const_iterator j=testDelim->begin();
            while (i!=s.end() && j!=testDelim->end()) {
                ++i;
                ++j;
            }
        } else if ('"'==*i) {
            if (checkForDelim) {
                string::const_iterator j=i;
                do {
                    ++j;
                } while (j!=s.end() && '"'!=*j);
                checkForDelim=(j==s.end());
                if (!checkForDelim && !temp.empty() || keep_empty) {
                    result.push_back(temp);
                    temp.clear();
                }
                temp.push_back('"');
                ++i;
            } else {
                //matched end quote
                checkForDelim=true;
                temp.push_back('"');
                ++i;
                result.push_back(temp);
                temp.clear();
            }
        } else if ('\n'==*i) {
            temp+="\\n";
            ++i;
        } else {
            temp.push_back(*i);
            ++i;
        }
    }

    if (!temp.empty() || keep_empty) {
        result.push_back(temp);
    }
    return result;
}

int runTest()
{
    vector<string> delims;
    delims.push_back(" ");
    delims.push_back("\t");
    delims.push_back("\n");
    delims.push_back("split_here");

    vector<string> terms;
    terms.push_back(">");
    terms.push_back("end_here");

    const vector<string> words = split("close no \"\n end_here matter\" how \n far testsplit_heretest\"another split_here test\"with some\"mo>re", delims, terms, false);

    copy(words.begin(), words.end(), ostream_iterator<string>(cout, "\n"));
}

close
no
"\n end_here matter"
how
far
test
test
"another split_here test"
with
some"mo

if (0==testSymbol->compare(0,testSymbol->size(),&(*i),testSymbol->size())) {

if (*testSymbol==s.substr(i-s.begin(),testSymbol->size())) {

  splitInto(input, output, ' ');             // single space
  splitInto(input, output, +qi.lit(' '));    // one or more spaces
  splitInto(input, output, +qi.lit(" \t"));  // one or more spaces or tabs
  splitInto(input, output, (qi::double_ >> !'#') // -- any parse expression

actually|on|two|lines
set['columns', 'partially', 'qouted']
set['am', 'custom', 'delimiters']
set['', '03', '10', '13', '14', '1997']
set['like', 'nested', 'quotes like "?" that', 'would']

--server=127.0.0.1:4774/|--username=robota|--userdescr=robot A ? I am cool robot ||--robot|>|echo.txt