C++ Spirit Lex：哪个令牌定义生成了这个令牌？_C++_Boost_Boost Spirit_Boost Spirit Lex

C++ Spirit Lex：哪个令牌定义生成了这个令牌？

c++ boost

C++ Spirit Lex：哪个令牌定义生成了这个令牌？,c++,boost,boost-spirit,boost-spirit-lex,C++,Boost,Boost Spirit,Boost Spirit Lex,如果这是一个新手问题，很抱歉，但我需要知道哪个令牌定义产生了某个令牌。当我打印令牌ID时，我只得到一个整数。我需要知道哪个正则表达式生成了这个令牌编辑：以下是我如何定义我的令牌： template <typename LexerT> class Tokens: public lex::lexer<LexerT> { public: Tokens(const std::string& input):

如果这是一个新手问题，很抱歉，但我需要知道哪个令牌定义产生了某个令牌。当我打印令牌ID时，我只得到一个整数。我需要知道哪个正则表达式生成了这个令牌

编辑：

以下是我如何定义我的令牌：

   template <typename LexerT>
   class Tokens: public lex::lexer<LexerT>
   {
      public:
         Tokens(const std::string& input):
            lineNo_(1)
         {
            using boost::spirit::lex::_start;
            using boost::spirit::lex::_end;
            using boost::spirit::lex::_pass;
            using boost::phoenix::ref;
            using boost::phoenix::construct;

            // macros
            this->self.add_pattern
               ("EXP",     "(e|E)(\\+|-)?\\d+")
               ("SUFFIX",  "[yzafpnumkKMGTPEZY]")
               ("INTEGER", "-?\\d+")
               ("FLOAT",    "-?(((\\d+)|(\\d*\\.\\d+)|(\\d+\\.\\d*))({EXP}|{SUFFIX})?)")
               ("SYMBOL",  "[a-zA-Z_?@](\\w|\\?|@)*")
               ("STRING",  "\\\"([^\\\"]|\\\\\\\")*\\\"");

            // whitespaces and comments
            whitespaces_ = "\\s+";
            comments_    = "(;[^\\n]*\\n)|(\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/)";

            // literals
            integer_ = "{INTEGER}";
            float_   = "{FLOAT}";
            symbol_  = "{SYMBOL}";
            string_  = "{STRING}";

            // operators
            quote_         = "'";
            backquote_     = '`';

            // ... other tokens

            // whitespace and comment rules
            this->self += whitespaces_ [ref(lineNo_) += count(_start, _end, '\n'), _pass = lex::pass_flags::pass_ignore];
            this->self += comments_    [ref(lineNo_) += count(_start, _end, '\n'), _pass = lex::pass_flags::pass_ignore];

            // literal rules
            this->self += integer_ | float_ | string_ | symbol_;
            // this->self += ... other tokens
         }

         ~Tokens() {}

         size_t lineNo() { return lineNo_; }


      private:
         // ignored tokens
         lex::token_def<lex::omit> whitespaces_, comments_;

         // literal tokens
         lex::token_def<int>          integer_;
         lex::token_def<std::string>  float_, symbol_, string_;

         // operator tokens
         lex::token_def<> quote_, backquote_;
         // ... other token definitions of type lex::token_def<>

         // current line number
         size_t lineNo_;
   };

模板
类标记：public lex:：lexer
{
公众：
令牌（常量std:：字符串和输入）：
第11行（1）
{
使用boost:：spirit:：lex:：_start；
使用boost:：spirit:：lex:：\u end；
使用boost:：spirit:：lex:：_pass；
使用boost:：phoenix:：ref；
使用boost:：phoenix:：construct；
//宏
此->self.add\u模式
（“EXP”，“e | e”（\\+\124;-）？\\ d+））
（“后缀“，[yzafpnumkKMGTPEZY]”）
（“整数”、“-？\\d+”）
（“FLOAT”、“-？（（（\\d+）（\\d*\\\.\\d+）（\\d+\\.\\d*）（\\d+\\.\\d*）（{EXP}{SUFFIX}））
（“符号“，”[a-zA-Z.@]（\\w.\\？\\？\））*）
（“字符串”、“\\\”（[^\\\”]|；
//空白和注释
空格\=“\\s+”；
注释\=“（；[^\\n]*\\n）\（\\/\*[^*]*\*+（[^/*][^*]*\*+）*\\/）”；
//文字
整型函数“{integer}”；
浮点数=“{float}”；
symbol_u389;=“{symbol}”；
字符串=“{string}”；
//操作员
引用“=””；
倒引号_uquote=''''；
//…其他代币
//空白和注释规则
这->self+=whitespaces\uRef[lineNo\uRef+=count（\u start，\u end，'\n'），\u pass=lex:：pass\u flags:：pass\u ignore]；
这->self+=注释\uref（lineNo）+=计数（\u start，\u end，'\n'），\u pass=lex:：pass\u flags:：pass\u ignore]；
//文字规则
这个->self+=integer_124;float_124;string_124;symbol_124;；
//此->自我+=…其他令牌
}
~Tokens（）{}
size\u t lineNo（）{return lineNo\u；}
私人：
//忽略标记
lex:：token_def whitespaces，comments；
//文字标记
lex：：令牌定义整数；
lex:：token_def float_uu，symbol_uu，string_uu；
//操作员令牌
lex:：token_def quote，backquote；
//…lex:：token_def类型的其他令牌定义
//当前行号
尺寸线号；
};

谢谢，海瑟姆

来自文件：

为了确保为每个令牌分配一个id，Spirit.Lex库在内部为令牌定义分配唯一的编号，从

boost:：Spirit:：Lex:：min_token_id

因此，您实际上可以增量地获得令牌id。但是，为了使事情变得更友好/健壮，我建议使用一个helper函数来确定令牌的名称，这样您就可以执行以下操作：

while (iter != end && token_is_valid(*iter))
{
    std::cout << "Token: " << 
       (iter->id() - lex::min_token_id) << ": " << 
       toklexer.nameof(iter) << " ('" << iter->value() << "')\n";
    ++iter;
}
if (iter == end) { std::cout << "lineNo: " << toklexer.lineNo() << "\n"; }

将打印：

Token: 5: symbol_ ('symbol')
Token: 4: string_ ('"string"')
Token: 5: symbol_ ('this')
Token: 3: float_ ('31415926E-7')
Token: 2: integer_ ('123')
lineNo: 3

注释

我认为不可能识别到模式表达式，因为一旦lexer返回令牌，信息就不会公开，也不再可用
我记得我看到过带有调试信息的令牌（类似于
```
qi:：rule:：name（）
```
？），但我目前找不到它的文档。如果可以重用调试名称，那么
```
Tokens:：nameof（It）
```
函数的实现将大大简化

代码完全工作的演示代码（稍微适应Boost 1_49-1_57，GCC-std=c++0x）：

#define BOOST_RESULT_OF_USE_DECLTYPE
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/phoenix/function/adapt_callable.hpp>

namespace qi  = boost::spirit::qi;
namespace lex = boost::spirit::lex;
namespace phx = boost::phoenix;

///////////////////////////////////////////////////////////////////////////
// irrelevant for question: needed this locally to make it work with my boost
// version
namespace detail {
    struct count {
        template<class It1, class It2, class T> struct result { typedef ptrdiff_t type; };
        template<class It1, class It2, class T>
            typename result<It1, It2, T>::type operator()(It1 f, It2 l, T const& x) const {
                return std::count(f, l, x);
            }
    };
}

BOOST_PHOENIX_ADAPT_CALLABLE(count, detail::count, 3);
///////////////////////////////////////////////////////////////////////////

template <typename LexerT>
   class Tokens: public lex::lexer<LexerT>
   {

      public:
         Tokens():
            lineNo_(1)
         {
            using lex::_start;
            using lex::_end;
            using lex::_pass;
            using phx::ref;

            // macros
            this->self.add_pattern
               ("EXP",     "(e|E)(\\+|-)?\\d+")
               ("SUFFIX",  "[yzafpnumkKMGTPEZY]")
               ("INTEGER", "-?\\d+")
               ("FLOAT",    "-?(((\\d+)|(\\d*\\.\\d+)|(\\d+\\.\\d*))({EXP}|{SUFFIX})?)")
               ("SYMBOL",  "[a-zA-Z_?@](\\w|\\?|@)*")
               ("STRING",  "\\\"([^\\\"]|\\\\\\\")*\\\"");

            // whitespaces and comments
            whitespaces_ = "\\s+";
            comments_    = "(;[^\\n]*\\n)|(\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/)";

            // literals
            integer_ = "{INTEGER}";
            float_   = "{FLOAT}";
            symbol_  = "{SYMBOL}";
            string_  = "{STRING}";

            // operators
            quote_         = "'";
            backquote_     = '`';

            // ... other tokens

            // whitespace and comment rules
            //this->self.add(whitespaces_, 1001)
                          //(comments_,    1002);
            this->self = whitespaces_ [phx::ref(lineNo_) += count(_start, _end, '\n'), _pass = lex::pass_flags::pass_ignore]
                       | comments_    [phx::ref(lineNo_) += count(_start, _end, '\n'), _pass = lex::pass_flags::pass_ignore];

            // literal rules
            this->self += integer_ | float_ | string_ | symbol_;
            // this->self += ... other tokens
         }

         template <typename TokIter>
         std::string nameof(TokIter it)
         {
             if (it->id() == whitespaces_.id()) return "whitespaces_";
             if (it->id() == comments_.id())    return "comments_";
             if (it->id() == integer_.id())     return "integer_";
             if (it->id() == float_.id())       return "float_";
             if (it->id() == symbol_.id())      return "symbol_";
             if (it->id() == string_.id())      return "string_";

             if (it->id() == quote_.id())       return "quote_";
             if (it->id() == backquote_.id())   return "backquote_";
             return "other";
         }

         ~Tokens() {}

         size_t lineNo() { return lineNo_; }


      private:
         // ignored tokens
         lex::token_def</*lex::omit*/> whitespaces_, comments_;

         // literal tokens
         lex::token_def<int>          integer_;
         lex::token_def<std::string>  float_, symbol_, string_;

         // operator tokens
         lex::token_def<> quote_, backquote_;
         // ... other token definitions of type lex::token_def<>

         // current line number
         size_t lineNo_;
   };

int main()
{
    const std::string str = "symbol \"string\" \n"
        "this /* is a comment */\n"
        "31415926E-7 123";

    typedef lex::lexertl::token<char const*> token_type;
    typedef lex::lexertl::actor_lexer<token_type> lexer_type;

    Tokens<lexer_type> toklexer;

    char const* first = str.c_str();
    char const* last = &first[str.size()];

    lexer_type::iterator_type iter = toklexer.begin(first, last);
    lexer_type::iterator_type end = toklexer.end();

    while (iter != end && token_is_valid(*iter))
    {
        std::cout << "Token: " << 
           (iter->id() - lex::min_token_id) << ": " << 
           toklexer.nameof(iter) << " ('" << iter->value() << "')\n";
        ++iter;
    }

    if (iter == end) { std::cout << "lineNo: " << toklexer.lineNo() << "\n"; }
    else {
        std::string rest(first, last);
        std::cout << "Lexical analysis failed\n" << "stopped at: \""
            << rest << "\"\n";
    }
    return 0;
}

#定义BOOST_RESULT_OF_USE_DECLTYPE
#定义增强\u精神\u使用\u凤凰\u V3
#包括
#包括
#包括
#包括
名称空间qi=boost:：spirit:：qi；
名称空间lex=boost:：spirit:：lex；
名称空间phx=boost:：phoenix；
///////////////////////////////////////////////////////////////////////////
//与问题无关：需要在本地使用此功能才能使其与我的boost一起工作
//版本
名称空间详细信息{
结构计数{
模板结构结果{typedef ptrdiff_t type；}；
模板
typename结果：：类型运算符（）（IT1F、IT2L、T常量和x）常量{
返回标准：：计数（f，l，x）；
}
};
}
BOOST_PHOENIX_ADAPT_CALLABLE（计数，细节：：计数，3）；
///////////////////////////////////////////////////////////////////////////
模板
类标记：public lex:：lexer
{
公众：
令牌（）：
第11行（1）
{
使用lex:：_start；
使用lex:：end；
使用lex:：_pass；
使用phx：：ref；
//宏
此->self.add\u模式
（“EXP”，“e | e”（\\+\124;-）？\\ d+））
（“后缀“，[yzafpnumkKMGTPEZY]”）
（“整数”、“-？\\d+”）
（“FLOAT”、“-？（（（\\d+）（\\d*\\\.\\d+）（\\d+\\.\\d*）（\\d+\\.\\d*）（{EXP}{SUFFIX}））
（“符号“，”[a-zA-Z.@]（\\w.\\？\\？\））*）
（“字符串”、“\\\”（[^\\\”]|；
//空白和注释
空格\=“\\s+”；
注释\=“（；[^\\n]*\\n）\（\\/\*[^*]*\*+（[^/*][^*]*\*+）*\\/）”；
//文字
整型函数“{integer}”；
浮点数=“{float}”；
symbol_u389;=“{symbol}”；
字符串=“{string}”；
//操作员
引用“=””；
倒引号_uquote=''''；
//…其他代币
//空白和注释规则
//此->self.add（空白，1001）
//（评论，1002）；
this->self=whitespaces\ux:[phx:：ref（lineNo）+=count（\u start，\u end，'\n'），\u pass=lex:：pass\u flags:：pass\u ignore]
|注释\[phx:：ref（lineNo）+=count（_start，_end，'\n'），_pass=lex:：pass\u flags:：pass\u ignore]；
//文字规则
这个->self+=integer_124;float_124;string_124;symbol_124;；
//此->自我+=…其他令牌
#define BOOST_RESULT_OF_USE_DECLTYPE
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/phoenix/function/adapt_callable.hpp>

namespace qi  = boost::spirit::qi;
namespace lex = boost::spirit::lex;
namespace phx = boost::phoenix;

///////////////////////////////////////////////////////////////////////////
// irrelevant for question: needed this locally to make it work with my boost
// version
namespace detail {
    struct count {
        template<class It1, class It2, class T> struct result { typedef ptrdiff_t type; };
        template<class It1, class It2, class T>
            typename result<It1, It2, T>::type operator()(It1 f, It2 l, T const& x) const {
                return std::count(f, l, x);
            }
    };
}

BOOST_PHOENIX_ADAPT_CALLABLE(count, detail::count, 3);
///////////////////////////////////////////////////////////////////////////

template <typename LexerT>
   class Tokens: public lex::lexer<LexerT>
   {

      public:
         Tokens():
            lineNo_(1)
         {
            using lex::_start;
            using lex::_end;
            using lex::_pass;
            using phx::ref;

            // macros
            this->self.add_pattern
               ("EXP",     "(e|E)(\\+|-)?\\d+")
               ("SUFFIX",  "[yzafpnumkKMGTPEZY]")
               ("INTEGER", "-?\\d+")
               ("FLOAT",    "-?(((\\d+)|(\\d*\\.\\d+)|(\\d+\\.\\d*))({EXP}|{SUFFIX})?)")
               ("SYMBOL",  "[a-zA-Z_?@](\\w|\\?|@)*")
               ("STRING",  "\\\"([^\\\"]|\\\\\\\")*\\\"");

            // whitespaces and comments
            whitespaces_ = "\\s+";
            comments_    = "(;[^\\n]*\\n)|(\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/)";

            // literals
            integer_ = "{INTEGER}";
            float_   = "{FLOAT}";
            symbol_  = "{SYMBOL}";
            string_  = "{STRING}";

            // operators
            quote_         = "'";
            backquote_     = '`';

            // ... other tokens

            // whitespace and comment rules
            //this->self.add(whitespaces_, 1001)
                          //(comments_,    1002);
            this->self = whitespaces_ [phx::ref(lineNo_) += count(_start, _end, '\n'), _pass = lex::pass_flags::pass_ignore]
                       | comments_    [phx::ref(lineNo_) += count(_start, _end, '\n'), _pass = lex::pass_flags::pass_ignore];

            // literal rules
            this->self += integer_ | float_ | string_ | symbol_;
            // this->self += ... other tokens
         }

         template <typename TokIter>
         std::string nameof(TokIter it)
         {
             if (it->id() == whitespaces_.id()) return "whitespaces_";
             if (it->id() == comments_.id())    return "comments_";
             if (it->id() == integer_.id())     return "integer_";
             if (it->id() == float_.id())       return "float_";
             if (it->id() == symbol_.id())      return "symbol_";
             if (it->id() == string_.id())      return "string_";

             if (it->id() == quote_.id())       return "quote_";
             if (it->id() == backquote_.id())   return "backquote_";
             return "other";
         }

         ~Tokens() {}

         size_t lineNo() { return lineNo_; }


      private:
         // ignored tokens
         lex::token_def</*lex::omit*/> whitespaces_, comments_;

         // literal tokens
         lex::token_def<int>          integer_;
         lex::token_def<std::string>  float_, symbol_, string_;

         // operator tokens
         lex::token_def<> quote_, backquote_;
         // ... other token definitions of type lex::token_def<>

         // current line number
         size_t lineNo_;
   };

int main()
{
    const std::string str = "symbol \"string\" \n"
        "this /* is a comment */\n"
        "31415926E-7 123";

    typedef lex::lexertl::token<char const*> token_type;
    typedef lex::lexertl::actor_lexer<token_type> lexer_type;

    Tokens<lexer_type> toklexer;

    char const* first = str.c_str();
    char const* last = &first[str.size()];

    lexer_type::iterator_type iter = toklexer.begin(first, last);
    lexer_type::iterator_type end = toklexer.end();

    while (iter != end && token_is_valid(*iter))
    {
        std::cout << "Token: " << 
           (iter->id() - lex::min_token_id) << ": " << 
           toklexer.nameof(iter) << " ('" << iter->value() << "')\n";
        ++iter;
    }

    if (iter == end) { std::cout << "lineNo: " << toklexer.lineNo() << "\n"; }
    else {
        std::string rest(first, last);
        std::cout << "Lexical analysis failed\n" << "stopped at: \""
            << rest << "\"\n";
    }
    return 0;
}