使用boostspirit解析复杂日志

使用boostspirit解析复杂日志,boost,boost-spirit,Boost,Boost Spirit,使用boost::spirit解析复杂日志时遇到问题。我不能得到我想要的数据,主要是因为空格跳过器把一切都搞糟了 我有下一个名为log.txt的文本文件: 1:[2017-Feb-18 01:57:55.341100] <INFO, SIMULATING> => CPU | Name: CAR (ID: 0) - ID: 1 2:[2017-Feb-18 01:57:55.344100] <INFO, SENDING_DATA> => IO | Io_out

使用boost::spirit解析复杂日志时遇到问题。我不能得到我想要的数据,主要是因为空格跳过器把一切都搞糟了

我有下一个名为log.txt的文本文件:

1:[2017-Feb-18 01:57:55.341100] <INFO, SIMULATING> => CPU | Name: CAR (ID: 0) - ID: 1
2:[2017-Feb-18 01:57:55.344100] <INFO, SENDING_DATA> => IO | Io_out - ABS: 1
3:[2017-Feb-18 01:57:55.344100] <INFO, SIMULATING> => CPU | Status: Ok
4:[2017-Feb-18 01:57:55.346100] <INFO, SIMULATING> => MSS | Random Number: 0x4D080020
5:[2017-Feb-18 01:57:55.346100] <INFO, SIMULATING> => CPU | Entering mode: AUTO
6:[2017-Feb-18 01:57:59.583342] <INFO, SENDING_DATA> => IO | Io_in - BRK: 1
7:[2017-Feb-18 01:58:24.604773] <INFO, RECEIVING_DATA> => DET | Point: 004811
8:[2017-Feb-18 01:58:24.844787] <INFO, SENDING_DATA> => PC | Send msg 1: 0101000000000000
9:[2017-Feb-18 01:58:26.204865] <INFO, RECEIVING_DATA> => PC2 | Receive msg 8: 0801000000000000
10:[2017-Feb-18 01:58:28.706008] <INFO, RECEIVING_DATA> => PC1 | Receive msg 2: 0201000000000000
11:[2017-Feb-18 01:58:29.345045] <INFO, SENDING_DATA> => PC | Send msg 3: 0301000000000000
12:[2017-Feb-18 01:58:29.706065] <INFO, RECEIVING_DATA> => PC1 | Receive msg 4: 04010000F8B8C1A7
13:[2017-Feb-18 01:58:29.846073] <INFO, SENDING_DATA> => PC | Send msg 5: 05010000F8B8C1A7
14:[2017-Feb-18 01:58:32.206208] <INFO, RECEIVING_DATA> => PC1 | Receive msg 6: 06010001F8B8C1A8
15:[2017-Feb-18 01:58:32.366217] <INFO, SENDING_DATA> => PC | Send msg 7: 07010001F8B8C1A8
17:[2017-Feb-18 01:58:32.406220] <INFO, RECEIVING_DATA> => PC2 | Receive msg 6: 06010001F8B8C1A8
18:[2017-Feb-18 01:58:32.875246] <INFO, SENDING_DATA> => PC | Send msg 7: 07010001F8B8C1A9
19:[2017-Feb-18 01:58:32.906248] <INFO, RECEIVING_DATA> => PC1 | Receive msg 6: 06010001F8B8C1A9
20:[2017-Feb-18 01:58:33.386276] <INFO, SENDING_DATA> => PC | Send msg 7: 07010001F8B8C1AA
1:[2017-Feb-18 01:57:55.341100]=>CPU |名称:汽车(ID:0)-ID:1
2:[2017-Feb-18 01:57:55.344100]=>IO | IO | out-ABS:1
3:[2017-Feb-18 01:57:55.344100]=>CPU |状态:正常
4:[2017-Feb-18 01:57:55.346100]=>MSS |随机数:0x4D08020
5:[2017-Feb-18 01:57:55.346100]=>CPU |进入模式:自动
6:[2017-Feb-18 01:57:59.583342]=>IO | IO|U in-BRK:1
7:[2017-Feb-18 01:58:24.604773]=>详细点:004811
8:[2017-Feb-18 01:58:24.844787]=>PC |发送消息1:01010000000000
9:[2017-Feb-18 01:58:26.204865]=>PC2 |接收消息8:08010000000
10:[2017-Feb-18 01:58:28.706008]=>PC1 |接收消息2:02010000000000
11:[2017-Feb-18 01:58:29.345045]=>PC |发送消息3:03010000000000
12:[2017-Feb-18 01:58:29.706065]=>PC1 |接收消息4:04010000F8B8C1A7
13:[2017-Feb-18 01:58:29.846073]=>PC |发送消息5:05010000F8B8C1A7
14:[2017-Feb-18 01:58:32.206208]=>PC1 |接收消息6:06010001F8B8C1A8
15:[2017-Feb-18 01:58:32.366217]=>PC |发送消息7:07010001F8B8C1A8
17:[2017-Feb-18 01:58:32.406220]=>PC2 |接收消息6:06010001F8B8C1A8
18:[2017-Feb-18 01:58:32.875246]=>PC |发送消息7:07010001F8B8C1A9
19:[2017-Feb-18 01:58:32.906248]=>PC1 |接收消息6:06010001F8B8C1A9
20:[2017-Feb-18 01:58:33.386276]=>PC |发送消息7:07010001F8B8C1AA
我正在使用下一段代码将其解析为boost fusion自适应结构:

#include <fstream>

#include <boost/config/warning_disable.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>


struct Message
{
    std::string line;
    std::string date;
    std::string time;
    char id;
    std::string hex;
};

BOOST_FUSION_ADAPT_STRUCT(
    Message,
    (std::string, Message::line)
    (std::string, Message::date)
    (std::string, Message::time)
    (char, Message::id)
    (std::string, Message::hex)
)

std::vector<Message> messages;

namespace qi = boost::spirit::qi;
namespace repo = boost::spirit::repository;
namespace ascii = boost::spirit::ascii;

void main()
{
    std::ifstream in("C:/log.txt", std::ios_base::in);
    in >> std::noskipws;//No white space skipping

    if (!in)
    {
        std::cerr << "Error: Could not open input file: " << std::endl;
        return;
    }//if

    boost::spirit::istream_iterator first(in);
    boost::spirit::istream_iterator last;

    bool result = qi::phrase_parse(first, last, 
        *repo::seek[qi::eol
            >> +ascii::char_("0-9") 
            >> ":["
            >> +ascii::char_("0-9a-fA-F-")
            >> +ascii::char_("0-9.:")
            >> "] <INFO, RECEIVING_DATA> => PC"
            >> ascii::char_('1', '2')
            >> "| Receive msg 6:"
            >> +ascii::char_("0-9a-fA-F")
            >> qi::eol],
        ascii::blank,
        messages);

    return;
}
#包括
#包括
#包括
#包括
#包括
结构消息
{
std::字符串行;
std::字符串日期;
std::字符串时间;
字符id;
字符串十六进制;
};
增强融合适应结构(
消息
(标准::字符串,消息::行)
(标准::字符串,消息::日期)
(标准::字符串,消息::时间)
(字符,消息::id)
(标准::字符串,消息::十六进制)
)
std::向量消息;
名称空间qi=boost::spirit::qi;
名称空间repo=boost::spirit::repository;
名称空间ascii=boost::spirit::ascii;
void main()
{
std::ifstream in(“C:/log.txt”,std::ios\u base::in);
在>>std::noskipws;//不跳过空白
如果(!in)
{
标准::cerr+ascii::字符(“0-9”)
>> ":["
>>+ascii::字符(0-9a-fA-F-)
>>+ascii::字符(0-9.)
>>“]=>PC”
>>ascii::char_('1','2')
>>“|接收消息6:”
>>+ascii::字符(0-9a-fA-F)
>>qi::eol],
ascii::空白,
信息);
返回;
}

执行代码时,结构中的数据格式不正确。是否有人可以尝试帮助我解决此问题?

在我添加
lexeme
后,我能够解析以下一行日志文件:

14:[2017-Feb-18 01:58:32.206208] <INFO, RECEIVING_DATA> => PC1 | Receive msg 6: 06010001F8B8C1A8

您是否尝试使用
qi::lexeme
?结果中似乎添加了id。回答很好且准确,但仍然无法按需要工作。我需要解析整个日志文件以检索msg6中包含的所有信息:我需要行号、日期、时间、cpu id和十六进制消息。为了找到所有msg6行,需要*repo::seek指令,以及第一个qi::eol和最后一个>>qi::eol。这样做,我在日志第17行和第19行的消息列表中获得了正确的信息,但是第14行中第一条解析的消息包含格式不正确的数据,因为前面的所有字段都已附加。是否有进一步的ideo用于修复此问题?啊,我明白了。是吗有一些关于
repo::seek
的文档吗?这对我来说是全新的。因此我删除了它。对于id,第2,3,4,5,…14行的字符串似乎是连接在一起的。这导致了这个模糊的234567…14 id字符串。似乎强制将“有趣”行的属性设置为
Message
(通过使用额外的规则)解决了将不同字符串合并为一个字符串的问题。很抱歉,但仍然无法按预期工作。在您的lite版本日志中,第一行非msg6行从一开始就缺少eol,因此下一行是msg 6,它被正确解析。但是如果您添加“12:[2017-Feb-18 01:58:29.706065]=>PC1 |接收msg 4:04010000BF88C1A7”在std::string的开头,程序仍像以前一样以错误的方式工作。
#include <fstream>

#include <boost/config/warning_disable.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>

struct Message
{
    std::string line;
    std::string date;
    std::string time;
    char id;
    std::string hex;
};

BOOST_FUSION_ADAPT_STRUCT(
    Message,
    (std::string, Message::line)
    (std::string, Message::date)
    (std::string, Message::time)
    (char, Message::id)
    (std::string, Message::hex)
)

std::vector<Message> messages;

namespace qi = boost::spirit::qi;
namespace repo = boost::spirit::repository;
namespace ascii = boost::spirit::ascii;
namespace ph=boost::phoenix;

void main()
{
    std::ifstream in("C:/temp/log2.txt", std::ios_base::in);
    in >> std::noskipws;//No white space skipping

    if (!in)
    {
        std::cerr << "Error: Could not open input file: " << std::endl;
        return;
    }//if
    Message msg;
    boost::spirit::istream_iterator first(in);
    boost::spirit::istream_iterator last;
    bool result = qi::phrase_parse(first, last,
//      *repo::seek[
        (+ascii::char_("0-9")
        >> qi::lexeme[":[" >> +ascii::char_("0-9a-fA-F-")]
        >> +ascii::char_("0-9.:")
        >> "] <INFO, RECEIVING_DATA> => PC"
        >> ascii::char_('1', '2')
        >> "| Receive msg 6:"
        >> +ascii::char_("0-9a-fA-F") )
        % qi::eol,
/*      >> qi::eol],*/
        ascii::blank,
        messages);
        for (auto msg : messages) {
            std::cout << msg.line << ", " << msg.date << ", " << msg.time << ", " << msg.id << ", " << msg.hex << std::endl;
        }
    return;
}