C++ 如何使用转义空格拆分句子？_C++_Boost_Split_Whitespace_Delimiter

C++ 如何使用转义空格拆分句子？

c++ boost

C++ 如何使用转义空格拆分句子？,c++,boost,split,whitespace,delimiter,C++,Boost,Split,Whitespace,Delimiter,我想用空格作为分隔符分割句子，除了转义的空格。使用boost:：split和regex，如何拆分它？如果不可能，还有什么办法例如： std::string sentence = "My dog Fluffy\\ Cake likes to jump"; 结果：我的狗蓬松的\蛋糕喜欢到跳转三种实现：振奋精神使用Boost正则表达式手写解析器振奋精神下面是我如何用Boost Spirit做到这一点的。这似乎有些过分，但经验告诉我，一旦拆分输入文本，可能需要更多的解析逻辑当

我想用空格作为分隔符分割句子，除了转义的空格。使用boost:：split和regex，如何拆分它？如果不可能，还有什么办法

例如：

std::string sentence = "My dog Fluffy\\ Cake likes to jump";

结果：
我的
狗
蓬松的\蛋糕
喜欢
到
跳转

三种实现：

振奋精神

使用Boost正则表达式

手写解析器

振奋精神下面是我如何用Boost Spirit做到这一点的。这似乎有些过分，但经验告诉我，一旦拆分输入文本，可能需要更多的解析逻辑

当您从“仅拆分标记”扩展到具有产生式规则的真正语法时，Boost Spirit将大放异彩

#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;

int main() {
    std::string const sentence = "My dog Fluffy\\ Cake likes to jump";
    using It = std::string::const_iterator;
    It f = sentence.begin(), l = sentence.end();

    std::vector<std::string> words;

    bool ok = qi::phrase_parse(f, l,
            *qi::lexeme [ +('\\' >> qi::char_ | qi::graph) ], // words
            qi::space - "\\ ", // skipper
            words);

    if (ok) {
        std::cout << "Parsed:\n";
        for (auto& w : words)
            std::cout << "\t'" << w << "'\n";
    } else {
        std::cout << "Parse failed\n";
    }

    if (f != l)
        std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}

#include <iostream>
#include <boost/regex.hpp>
#include <boost/algorithm/string_regex.hpp>
#include <vector>

int main() {
    std::string const sentence = "My dog Fluffy\\ Cake likes to jump";

    std::vector<std::string> words;
    boost::algorithm::split_regex(words, sentence, boost::regex("(?<!\\\\)\\s"), boost::match_default);

    for (auto& w : words)
        std::cout << " '" << w << "'\n";
}

#include <iostream>
#include <iterator>
#include <vector>

template <typename It, typename Out>
Out tokens(It f, It l, Out out) {
    std::string accum;
    auto flush = [&] { 
        if (!accum.empty()) {
            *out++ = accum;
            accum.resize(0);
        }
    };

    while (f!=l) {
        switch(*f) {
            case '\\': 
                if (++f!=l && *f==' ')
                    accum += ' ';
                else
                    accum += '\\';
                break;
            case ' ': case '\t': case '\r': case '\n':
                ++f;
                flush();
                break;
            default:
                accum += *f++;
        }
    }
    flush();
    return out;
}

int main() {
    std::string const sentence = "My dog Fluffy\\ Cake likes to jump";

    std::vector<std::string> words;

    tokens(sentence.begin(), sentence.end(), back_inserter(words));

    for (auto& w : words)
        std::cout << "\t'" << w << "'\n";
}

#包括
名称空间qi=boost:：spirit:：qi；
int main（）{
std:：string const句子=“我的狗毛茸茸的\\蛋糕喜欢跳”；
使用它=std:：string:：const_迭代器；
它f=句子.begin（），l=句子.end（）；
向量词；
bool ok=qi：：短语解析（f，l，
*qi:：lexeme[+（'\\'>>qi:：char\u124; qi:：graph）]，//单词
qi：：空格-“\\”，//skipper
文字）；
如果（确定）{
标准：：cout
手写解析器
这有点乏味，但就像Spirit语法一样，它是完全通用的，并且允许很好的性能
然而，一旦你开始增加语法的复杂性，它就不能像Spirit方法那样优雅地伸缩。一个优点是你花在编译代码上的时间比使用Spirit版本要少

#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;

int main() {
    std::string const sentence = "My dog Fluffy\\ Cake likes to jump";
    using It = std::string::const_iterator;
    It f = sentence.begin(), l = sentence.end();

    std::vector<std::string> words;

    bool ok = qi::phrase_parse(f, l,
            *qi::lexeme [ +('\\' >> qi::char_ | qi::graph) ], // words
            qi::space - "\\ ", // skipper
            words);

    if (ok) {
        std::cout << "Parsed:\n";
        for (auto& w : words)
            std::cout << "\t'" << w << "'\n";
    } else {
        std::cout << "Parse failed\n";
    }

    if (f != l)
        std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}

#include <iostream>
#include <boost/regex.hpp>
#include <boost/algorithm/string_regex.hpp>
#include <vector>

int main() {
    std::string const sentence = "My dog Fluffy\\ Cake likes to jump";

    std::vector<std::string> words;
    boost::algorithm::split_regex(words, sentence, boost::regex("(?<!\\\\)\\s"), boost::match_default);

    for (auto& w : words)
        std::cout << " '" << w << "'\n";
}

#include <iostream>
#include <iterator>
#include <vector>

template <typename It, typename Out>
Out tokens(It f, It l, Out out) {
    std::string accum;
    auto flush = [&] { 
        if (!accum.empty()) {
            *out++ = accum;
            accum.resize(0);
        }
    };

    while (f!=l) {
        switch(*f) {
            case '\\': 
                if (++f!=l && *f==' ')
                    accum += ' ';
                else
                    accum += '\\';
                break;
            case ' ': case '\t': case '\r': case '\n':
                ++f;
                flush();
                break;
            default:
                accum += *f++;
        }
    }
    flush();
    return out;
}

int main() {
    std::string const sentence = "My dog Fluffy\\ Cake likes to jump";

    std::vector<std::string> words;

    tokens(sentence.begin(), sentence.end(), back_inserter(words));

    for (auto& w : words)
        std::cout << "\t'" << w << "'\n";
}

#包括
#包括
#包括
模板
Out令牌（It f、It l、Out）{
std：：字符串累加器；
自动刷新=[&]{
如果（！acum.empty（））{
*out++=acum；
累计调整大小（0）；
}
};
while（f！=l）{
开关（*f）{
案例“\\”：
如果（++f！=l&&f==''）
累计+=''；
其他的
累计+=“\\”；
打破
案例“”：案例“\t”：案例“\r”：案例“\n”：
++f；
冲洗（）；
打破
违约：
累计+=*f++；
}
}
冲洗（）；
返回；
}
int main（）{
std:：string const句子=“我的狗毛茸茸的\\蛋糕喜欢跳”；
向量词；
标记（句子.开始（），句子.结束（），背向插入器（单词））；
for（自动和自动：文字）
std:：stringstream或regex@BarmakShemirani可以使用吗？你会如何处理转义空间？@sehe，你可以使用Boost Spirit、Boost regex或手写解析器。@BarmakShemirani Lol。我想这是一种恭维：）我使用了你提供的Boost regex，它工作得很好。非常感谢。@AppleJuice你意识到了吗你没有选择丑陋的继子：）这是唯一一个有链接依赖性的，需要在你的人寿保险中豁免，并且需要你手动删除转义，即使在它被解析后：）（幸运的是，它不需要一个处女牺牲来编译，比如#1；和#3引起嫉妒）。干杯