C++ 如何使用转义空格拆分句子?
我想用空格作为分隔符分割句子,除了转义的空格。使用boost::split和regex,如何拆分它?如果不可能,还有什么办法 例如:C++ 如何使用转义空格拆分句子?,c++,boost,split,whitespace,delimiter,C++,Boost,Split,Whitespace,Delimiter,我想用空格作为分隔符分割句子,除了转义的空格。使用boost::split和regex,如何拆分它?如果不可能,还有什么办法 例如: std::string sentence = "My dog Fluffy\\ Cake likes to jump"; 结果: 我的 狗 蓬松的\蛋糕 喜欢 到 跳转三种实现: 振奋精神 使用Boost正则表达式 手写解析器 振奋精神 下面是我如何用Boost Spirit做到这一点的。这似乎有些过分,但经验告诉我,一旦拆分输入文本,可能需要更多的解析逻辑 当
std::string sentence = "My dog Fluffy\\ Cake likes to jump";
结果:我的
狗
蓬松的\蛋糕
喜欢
到
跳转
三种实现:
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
int main() {
std::string const sentence = "My dog Fluffy\\ Cake likes to jump";
using It = std::string::const_iterator;
It f = sentence.begin(), l = sentence.end();
std::vector<std::string> words;
bool ok = qi::phrase_parse(f, l,
*qi::lexeme [ +('\\' >> qi::char_ | qi::graph) ], // words
qi::space - "\\ ", // skipper
words);
if (ok) {
std::cout << "Parsed:\n";
for (auto& w : words)
std::cout << "\t'" << w << "'\n";
} else {
std::cout << "Parse failed\n";
}
if (f != l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
#include <iostream>
#include <boost/regex.hpp>
#include <boost/algorithm/string_regex.hpp>
#include <vector>
int main() {
std::string const sentence = "My dog Fluffy\\ Cake likes to jump";
std::vector<std::string> words;
boost::algorithm::split_regex(words, sentence, boost::regex("(?<!\\\\)\\s"), boost::match_default);
for (auto& w : words)
std::cout << " '" << w << "'\n";
}
#include <iostream>
#include <iterator>
#include <vector>
template <typename It, typename Out>
Out tokens(It f, It l, Out out) {
std::string accum;
auto flush = [&] {
if (!accum.empty()) {
*out++ = accum;
accum.resize(0);
}
};
while (f!=l) {
switch(*f) {
case '\\':
if (++f!=l && *f==' ')
accum += ' ';
else
accum += '\\';
break;
case ' ': case '\t': case '\r': case '\n':
++f;
flush();
break;
default:
accum += *f++;
}
}
flush();
return out;
}
int main() {
std::string const sentence = "My dog Fluffy\\ Cake likes to jump";
std::vector<std::string> words;
tokens(sentence.begin(), sentence.end(), back_inserter(words));
for (auto& w : words)
std::cout << "\t'" << w << "'\n";
}
#包括
名称空间qi=boost::spirit::qi;
int main(){
std::string const句子=“我的狗毛茸茸的\\蛋糕喜欢跳”;
使用它=std::string::const_迭代器;
它f=句子.begin(),l=句子.end();
向量词;
bool ok=qi::短语解析(f,l,
*qi::lexeme[+('\\'>>qi::char\u124; qi::graph)],//单词
qi::空格-“\\”,//skipper
文字);
如果(确定){
标准::cout
手写解析器
这有点乏味,但就像Spirit语法一样,它是完全通用的,并且允许很好的性能
然而,一旦你开始增加语法的复杂性,它就不能像Spirit方法那样优雅地伸缩。一个优点是你花在编译代码上的时间比使用Spirit版本要少
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
int main() {
std::string const sentence = "My dog Fluffy\\ Cake likes to jump";
using It = std::string::const_iterator;
It f = sentence.begin(), l = sentence.end();
std::vector<std::string> words;
bool ok = qi::phrase_parse(f, l,
*qi::lexeme [ +('\\' >> qi::char_ | qi::graph) ], // words
qi::space - "\\ ", // skipper
words);
if (ok) {
std::cout << "Parsed:\n";
for (auto& w : words)
std::cout << "\t'" << w << "'\n";
} else {
std::cout << "Parse failed\n";
}
if (f != l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
#include <iostream>
#include <boost/regex.hpp>
#include <boost/algorithm/string_regex.hpp>
#include <vector>
int main() {
std::string const sentence = "My dog Fluffy\\ Cake likes to jump";
std::vector<std::string> words;
boost::algorithm::split_regex(words, sentence, boost::regex("(?<!\\\\)\\s"), boost::match_default);
for (auto& w : words)
std::cout << " '" << w << "'\n";
}
#include <iostream>
#include <iterator>
#include <vector>
template <typename It, typename Out>
Out tokens(It f, It l, Out out) {
std::string accum;
auto flush = [&] {
if (!accum.empty()) {
*out++ = accum;
accum.resize(0);
}
};
while (f!=l) {
switch(*f) {
case '\\':
if (++f!=l && *f==' ')
accum += ' ';
else
accum += '\\';
break;
case ' ': case '\t': case '\r': case '\n':
++f;
flush();
break;
default:
accum += *f++;
}
}
flush();
return out;
}
int main() {
std::string const sentence = "My dog Fluffy\\ Cake likes to jump";
std::vector<std::string> words;
tokens(sentence.begin(), sentence.end(), back_inserter(words));
for (auto& w : words)
std::cout << "\t'" << w << "'\n";
}
#包括
#包括
#包括
模板
Out令牌(It f、It l、Out){
std::字符串累加器;
自动刷新=[&]{
如果(!acum.empty()){
*out++=acum;
累计调整大小(0);
}
};
while(f!=l){
开关(*f){
案例“\\”:
如果(++f!=l&&f=='')
累计+='';
其他的
累计+=“\\”;
打破
案例“”:案例“\t”:案例“\r”:案例“\n”:
++f;
冲洗();
打破
违约:
累计+=*f++;
}
}
冲洗();
返回;
}
int main(){
std::string const句子=“我的狗毛茸茸的\\蛋糕喜欢跳”;
向量词;
标记(句子.开始(),句子.结束(),背向插入器(单词));
for(自动和自动:文字)
std::stringstream或regex@BarmakShemirani可以使用吗?你会如何处理转义空间?@sehe,你可以使用Boost Spirit、Boost regex或手写解析器。@BarmakShemirani Lol。我想这是一种恭维:)我使用了你提供的Boost regex,它工作得很好。非常感谢。@AppleJuice你意识到了吗你没有选择丑陋的继子:)这是唯一一个有链接依赖性的,需要在你的人寿保险中豁免,并且需要你手动删除转义,即使在它被解析后:)(幸运的是,它不需要一个处女牺牲来编译,比如#1;和#3引起嫉妒)。干杯