C++ Spirit Lex:哪个令牌定义生成了这个令牌?
如果这是一个新手问题,很抱歉,但我需要知道哪个令牌定义产生了某个令牌。当我打印令牌ID时,我只得到一个整数。我需要知道哪个正则表达式生成了这个令牌 编辑: 以下是我如何定义我的令牌:C++ Spirit Lex:哪个令牌定义生成了这个令牌?,c++,boost,boost-spirit,boost-spirit-lex,C++,Boost,Boost Spirit,Boost Spirit Lex,如果这是一个新手问题,很抱歉,但我需要知道哪个令牌定义产生了某个令牌。当我打印令牌ID时,我只得到一个整数。我需要知道哪个正则表达式生成了这个令牌 编辑: 以下是我如何定义我的令牌: template <typename LexerT> class Tokens: public lex::lexer<LexerT> { public: Tokens(const std::string& input):
template <typename LexerT>
class Tokens: public lex::lexer<LexerT>
{
public:
Tokens(const std::string& input):
lineNo_(1)
{
using boost::spirit::lex::_start;
using boost::spirit::lex::_end;
using boost::spirit::lex::_pass;
using boost::phoenix::ref;
using boost::phoenix::construct;
// macros
this->self.add_pattern
("EXP", "(e|E)(\\+|-)?\\d+")
("SUFFIX", "[yzafpnumkKMGTPEZY]")
("INTEGER", "-?\\d+")
("FLOAT", "-?(((\\d+)|(\\d*\\.\\d+)|(\\d+\\.\\d*))({EXP}|{SUFFIX})?)")
("SYMBOL", "[a-zA-Z_?@](\\w|\\?|@)*")
("STRING", "\\\"([^\\\"]|\\\\\\\")*\\\"");
// whitespaces and comments
whitespaces_ = "\\s+";
comments_ = "(;[^\\n]*\\n)|(\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/)";
// literals
integer_ = "{INTEGER}";
float_ = "{FLOAT}";
symbol_ = "{SYMBOL}";
string_ = "{STRING}";
// operators
quote_ = "'";
backquote_ = '`';
// ... other tokens
// whitespace and comment rules
this->self += whitespaces_ [ref(lineNo_) += count(_start, _end, '\n'), _pass = lex::pass_flags::pass_ignore];
this->self += comments_ [ref(lineNo_) += count(_start, _end, '\n'), _pass = lex::pass_flags::pass_ignore];
// literal rules
this->self += integer_ | float_ | string_ | symbol_;
// this->self += ... other tokens
}
~Tokens() {}
size_t lineNo() { return lineNo_; }
private:
// ignored tokens
lex::token_def<lex::omit> whitespaces_, comments_;
// literal tokens
lex::token_def<int> integer_;
lex::token_def<std::string> float_, symbol_, string_;
// operator tokens
lex::token_def<> quote_, backquote_;
// ... other token definitions of type lex::token_def<>
// current line number
size_t lineNo_;
};
模板
类标记:public lex::lexer
{
公众:
令牌(常量std::字符串和输入):
第11行(1)
{
使用boost::spirit::lex::_start;
使用boost::spirit::lex::\u end;
使用boost::spirit::lex::_pass;
使用boost::phoenix::ref;
使用boost::phoenix::construct;
//宏
此->self.add\u模式
(“EXP”,“e | e”(\\+\124;-)?\\ d+))
(“后缀“,[yzafpnumkKMGTPEZY]”)
(“整数”、“-?\\d+”)
(“FLOAT”、“-?(((\\d+)(\\d*\\\.\\d+)(\\d+\\.\\d*)(\\d+\\.\\d*)({EXP}{SUFFIX}))
(“符号“,”[a-zA-Z.@](\\w.\\?\\?\))*)
(“字符串”、“\\\”([^\\\”]|;
//空白和注释
空格\=“\\s+”;
注释\=“(;[^\\n]*\\n)\(\\/\*[^*]*\*+([^/*][^*]*\*+)*\\/)”;
//文字
整型函数“{integer}”;
浮点数=“{float}”;
symbol_u389;=“{symbol}”;
字符串=“{string}”;
//操作员
引用“=””;
倒引号_uquote='''';
//…其他代币
//空白和注释规则
这->self+=whitespaces\uRef[lineNo\uRef+=count(\u start,\u end,'\n'),\u pass=lex::pass\u flags::pass\u ignore];
这->self+=注释\uref(lineNo)+=计数(\u start,\u end,'\n'),\u pass=lex::pass\u flags::pass\u ignore];
//文字规则
这个->self+=integer_124;float_124;string_124;symbol_124;;
//此->自我+=…其他令牌
}
~Tokens(){}
size\u t lineNo(){return lineNo\u;}
私人:
//忽略标记
lex::token_def whitespaces,comments;
//文字标记
lex::令牌定义整数;
lex::token_def float_uu,symbol_uu,string_uu;
//操作员令牌
lex::token_def quote,backquote;
//…lex::token_def类型的其他令牌定义
//当前行号
尺寸线号;
};
谢谢,
海瑟姆来自文件:
为了确保为每个令牌分配一个id,Spirit.Lex库在内部为令牌定义分配唯一的编号,从boost::Spirit::Lex::min_token_id
因此,您实际上可以增量地获得令牌id。但是,为了使事情变得更友好/健壮,我建议使用一个helper函数来确定令牌的名称,这样您就可以执行以下操作:
while (iter != end && token_is_valid(*iter))
{
std::cout << "Token: " <<
(iter->id() - lex::min_token_id) << ": " <<
toklexer.nameof(iter) << " ('" << iter->value() << "')\n";
++iter;
}
if (iter == end) { std::cout << "lineNo: " << toklexer.lineNo() << "\n"; }
将打印:
Token: 5: symbol_ ('symbol')
Token: 4: string_ ('"string"')
Token: 5: symbol_ ('this')
Token: 3: float_ ('31415926E-7')
Token: 2: integer_ ('123')
lineNo: 3
注释
- 我认为不可能识别到模式表达式,因为一旦lexer返回令牌,信息就不会公开,也不再可用
- 我记得我看到过带有调试信息的令牌(类似于
?),但我目前找不到它的文档。如果可以重用调试名称,那么qi::rule::name()
函数的实现将大大简化Tokens::nameof(It)
#define BOOST_RESULT_OF_USE_DECLTYPE
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/phoenix/function/adapt_callable.hpp>
namespace qi = boost::spirit::qi;
namespace lex = boost::spirit::lex;
namespace phx = boost::phoenix;
///////////////////////////////////////////////////////////////////////////
// irrelevant for question: needed this locally to make it work with my boost
// version
namespace detail {
struct count {
template<class It1, class It2, class T> struct result { typedef ptrdiff_t type; };
template<class It1, class It2, class T>
typename result<It1, It2, T>::type operator()(It1 f, It2 l, T const& x) const {
return std::count(f, l, x);
}
};
}
BOOST_PHOENIX_ADAPT_CALLABLE(count, detail::count, 3);
///////////////////////////////////////////////////////////////////////////
template <typename LexerT>
class Tokens: public lex::lexer<LexerT>
{
public:
Tokens():
lineNo_(1)
{
using lex::_start;
using lex::_end;
using lex::_pass;
using phx::ref;
// macros
this->self.add_pattern
("EXP", "(e|E)(\\+|-)?\\d+")
("SUFFIX", "[yzafpnumkKMGTPEZY]")
("INTEGER", "-?\\d+")
("FLOAT", "-?(((\\d+)|(\\d*\\.\\d+)|(\\d+\\.\\d*))({EXP}|{SUFFIX})?)")
("SYMBOL", "[a-zA-Z_?@](\\w|\\?|@)*")
("STRING", "\\\"([^\\\"]|\\\\\\\")*\\\"");
// whitespaces and comments
whitespaces_ = "\\s+";
comments_ = "(;[^\\n]*\\n)|(\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/)";
// literals
integer_ = "{INTEGER}";
float_ = "{FLOAT}";
symbol_ = "{SYMBOL}";
string_ = "{STRING}";
// operators
quote_ = "'";
backquote_ = '`';
// ... other tokens
// whitespace and comment rules
//this->self.add(whitespaces_, 1001)
//(comments_, 1002);
this->self = whitespaces_ [phx::ref(lineNo_) += count(_start, _end, '\n'), _pass = lex::pass_flags::pass_ignore]
| comments_ [phx::ref(lineNo_) += count(_start, _end, '\n'), _pass = lex::pass_flags::pass_ignore];
// literal rules
this->self += integer_ | float_ | string_ | symbol_;
// this->self += ... other tokens
}
template <typename TokIter>
std::string nameof(TokIter it)
{
if (it->id() == whitespaces_.id()) return "whitespaces_";
if (it->id() == comments_.id()) return "comments_";
if (it->id() == integer_.id()) return "integer_";
if (it->id() == float_.id()) return "float_";
if (it->id() == symbol_.id()) return "symbol_";
if (it->id() == string_.id()) return "string_";
if (it->id() == quote_.id()) return "quote_";
if (it->id() == backquote_.id()) return "backquote_";
return "other";
}
~Tokens() {}
size_t lineNo() { return lineNo_; }
private:
// ignored tokens
lex::token_def</*lex::omit*/> whitespaces_, comments_;
// literal tokens
lex::token_def<int> integer_;
lex::token_def<std::string> float_, symbol_, string_;
// operator tokens
lex::token_def<> quote_, backquote_;
// ... other token definitions of type lex::token_def<>
// current line number
size_t lineNo_;
};
int main()
{
const std::string str = "symbol \"string\" \n"
"this /* is a comment */\n"
"31415926E-7 123";
typedef lex::lexertl::token<char const*> token_type;
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
Tokens<lexer_type> toklexer;
char const* first = str.c_str();
char const* last = &first[str.size()];
lexer_type::iterator_type iter = toklexer.begin(first, last);
lexer_type::iterator_type end = toklexer.end();
while (iter != end && token_is_valid(*iter))
{
std::cout << "Token: " <<
(iter->id() - lex::min_token_id) << ": " <<
toklexer.nameof(iter) << " ('" << iter->value() << "')\n";
++iter;
}
if (iter == end) { std::cout << "lineNo: " << toklexer.lineNo() << "\n"; }
else {
std::string rest(first, last);
std::cout << "Lexical analysis failed\n" << "stopped at: \""
<< rest << "\"\n";
}
return 0;
}
#定义BOOST_RESULT_OF_USE_DECLTYPE
#定义增强\u精神\u使用\u凤凰\u V3
#包括
#包括
#包括
#包括
名称空间qi=boost::spirit::qi;
名称空间lex=boost::spirit::lex;
名称空间phx=boost::phoenix;
///////////////////////////////////////////////////////////////////////////
//与问题无关:需要在本地使用此功能才能使其与我的boost一起工作
//版本
名称空间详细信息{
结构计数{
模板结构结果{typedef ptrdiff_t type;};
模板
typename结果::类型运算符()(IT1F、IT2L、T常量和x)常量{
返回标准::计数(f,l,x);
}
};
}
BOOST_PHOENIX_ADAPT_CALLABLE(计数,细节::计数,3);
///////////////////////////////////////////////////////////////////////////
模板
类标记:public lex::lexer
{
公众:
令牌():
第11行(1)
{
使用lex::_start;
使用lex::end;
使用lex::_pass;
使用phx::ref;
//宏
此->self.add\u模式
(“EXP”,“e | e”(\\+\124;-)?\\ d+))
(“后缀“,[yzafpnumkKMGTPEZY]”)
(“整数”、“-?\\d+”)
(“FLOAT”、“-?(((\\d+)(\\d*\\\.\\d+)(\\d+\\.\\d*)(\\d+\\.\\d*)({EXP}{SUFFIX}))
(“符号“,”[a-zA-Z.@](\\w.\\?\\?\))*)
(“字符串”、“\\\”([^\\\”]|;
//空白和注释
空格\=“\\s+”;
注释\=“(;[^\\n]*\\n)\(\\/\*[^*]*\*+([^/*][^*]*\*+)*\\/)”;
//文字
整型函数“{integer}”;
浮点数=“{float}”;
symbol_u389;=“{symbol}”;
字符串=“{string}”;
//操作员
引用“=””;
倒引号_uquote='''';
//…其他代币
//空白和注释规则
//此->self.add(空白,1001)
//(评论,1002);
this->self=whitespaces\ux:[phx::ref(lineNo)+=count(\u start,\u end,'\n'),\u pass=lex::pass\u flags::pass\u ignore]
|注释\[phx::ref(lineNo)+=count(_start,_end,'\n'),_pass=lex::pass\u flags::pass\u ignore];
//文字规则
这个->self+=integer_124;float_124;string_124;symbol_124;;
//此->自我+=…其他令牌
#define BOOST_RESULT_OF_USE_DECLTYPE
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/phoenix/function/adapt_callable.hpp>
namespace qi = boost::spirit::qi;
namespace lex = boost::spirit::lex;
namespace phx = boost::phoenix;
///////////////////////////////////////////////////////////////////////////
// irrelevant for question: needed this locally to make it work with my boost
// version
namespace detail {
struct count {
template<class It1, class It2, class T> struct result { typedef ptrdiff_t type; };
template<class It1, class It2, class T>
typename result<It1, It2, T>::type operator()(It1 f, It2 l, T const& x) const {
return std::count(f, l, x);
}
};
}
BOOST_PHOENIX_ADAPT_CALLABLE(count, detail::count, 3);
///////////////////////////////////////////////////////////////////////////
template <typename LexerT>
class Tokens: public lex::lexer<LexerT>
{
public:
Tokens():
lineNo_(1)
{
using lex::_start;
using lex::_end;
using lex::_pass;
using phx::ref;
// macros
this->self.add_pattern
("EXP", "(e|E)(\\+|-)?\\d+")
("SUFFIX", "[yzafpnumkKMGTPEZY]")
("INTEGER", "-?\\d+")
("FLOAT", "-?(((\\d+)|(\\d*\\.\\d+)|(\\d+\\.\\d*))({EXP}|{SUFFIX})?)")
("SYMBOL", "[a-zA-Z_?@](\\w|\\?|@)*")
("STRING", "\\\"([^\\\"]|\\\\\\\")*\\\"");
// whitespaces and comments
whitespaces_ = "\\s+";
comments_ = "(;[^\\n]*\\n)|(\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/)";
// literals
integer_ = "{INTEGER}";
float_ = "{FLOAT}";
symbol_ = "{SYMBOL}";
string_ = "{STRING}";
// operators
quote_ = "'";
backquote_ = '`';
// ... other tokens
// whitespace and comment rules
//this->self.add(whitespaces_, 1001)
//(comments_, 1002);
this->self = whitespaces_ [phx::ref(lineNo_) += count(_start, _end, '\n'), _pass = lex::pass_flags::pass_ignore]
| comments_ [phx::ref(lineNo_) += count(_start, _end, '\n'), _pass = lex::pass_flags::pass_ignore];
// literal rules
this->self += integer_ | float_ | string_ | symbol_;
// this->self += ... other tokens
}
template <typename TokIter>
std::string nameof(TokIter it)
{
if (it->id() == whitespaces_.id()) return "whitespaces_";
if (it->id() == comments_.id()) return "comments_";
if (it->id() == integer_.id()) return "integer_";
if (it->id() == float_.id()) return "float_";
if (it->id() == symbol_.id()) return "symbol_";
if (it->id() == string_.id()) return "string_";
if (it->id() == quote_.id()) return "quote_";
if (it->id() == backquote_.id()) return "backquote_";
return "other";
}
~Tokens() {}
size_t lineNo() { return lineNo_; }
private:
// ignored tokens
lex::token_def</*lex::omit*/> whitespaces_, comments_;
// literal tokens
lex::token_def<int> integer_;
lex::token_def<std::string> float_, symbol_, string_;
// operator tokens
lex::token_def<> quote_, backquote_;
// ... other token definitions of type lex::token_def<>
// current line number
size_t lineNo_;
};
int main()
{
const std::string str = "symbol \"string\" \n"
"this /* is a comment */\n"
"31415926E-7 123";
typedef lex::lexertl::token<char const*> token_type;
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
Tokens<lexer_type> toklexer;
char const* first = str.c_str();
char const* last = &first[str.size()];
lexer_type::iterator_type iter = toklexer.begin(first, last);
lexer_type::iterator_type end = toklexer.end();
while (iter != end && token_is_valid(*iter))
{
std::cout << "Token: " <<
(iter->id() - lex::min_token_id) << ": " <<
toklexer.nameof(iter) << " ('" << iter->value() << "')\n";
++iter;
}
if (iter == end) { std::cout << "lineNo: " << toklexer.lineNo() << "\n"; }
else {
std::string rest(first, last);
std::cout << "Lexical analysis failed\n" << "stopped at: \""
<< rest << "\"\n";
}
return 0;
}