Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/cplusplus/144.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C++ 如何使用Boost.Spirit.Qi增量解析(并处理)一个大文件?_C++_Parsing_Boost_Boost Spirit_Boost Spirit Qi - Fatal编程技术网

C++ 如何使用Boost.Spirit.Qi增量解析(并处理)一个大文件?

C++ 如何使用Boost.Spirit.Qi增量解析(并处理)一个大文件?,c++,parsing,boost,boost-spirit,boost-spirit-qi,C++,Parsing,Boost,Boost Spirit,Boost Spirit Qi,我已经为自定义文本文件格式创建了一个Qi解析器。有成千上万个条目需要处理,每个条目通常有1-10个子条目。我给出了一个经过精简的解析器工作示例 #包括 #包括 #包括 #包括 #包括 #包括 #包括 #包括 #包括 #包括 #包括 #包括 #包括 使用std::string; 使用std::vector; 使用std::cout; 使用std::endl; 名称空间模型 { 名称空间qi=boost::spirit::qi; 结构谱 { 字符串注释; 字符串文件; 弦本地; 双前兆MZ; 国际

我已经为自定义文本文件格式创建了一个Qi解析器。有成千上万个条目需要处理,每个条目通常有1-10个子条目。我给出了一个经过精简的解析器工作示例

#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
使用std::string;
使用std::vector;
使用std::cout;
使用std::endl;
名称空间模型
{
名称空间qi=boost::spirit::qi;
结构谱
{
字符串注释;
字符串文件;
弦本地;
双前兆MZ;
国际前兆电荷;
双前兆;
};
结构簇
{
字符串id;
矢量谱;
};
结构聚类
{
字符串名;
向量簇;
};
}
//告诉fusion有关数据结构的信息,使其成为一流的fusion公民。
//必须在全局范围内。
增强融合适应结构(
模型::频谱,
(字符串,注释)
(字符串,文件)
(字符串,nativeId)
(双倍,前兆MZ)
(国际,前兆电荷)
(双倍,前兆强度)
)
增强融合适应结构(
模型::集群,
(字符串,id)
(标准::矢量,光谱)
)
增强融合适应结构(
模型::集群,
(字符串、名称)
(标准::向量,簇)
)
名称空间{
结构报告错误
{
模板结构结果{typedef void type;};
//将字符串压缩到周围的新行字符
模板
void运算符()(Iter first_Iter,Iter last_Iter,
Iter错误\u Iter,常数boost::spirit::qi::info&what)常数
{
std::字符串优先(第一个iter,错误iter);
std::字符串last(错误,last);
auto first_pos=first.rfind('\n');
auto last_pos=last.find('\n');
自动错误行=((第一个位置==标准::字符串::npos)?第一个
:std::字符串(第一个,第一个位置+1))
+标准::字符串(最后,0,最后位置);
//自动错误位置=(错误位置-第一个位置)+1;
/*自动错误\u pos=错误
如果(第一个位置!=std::string::npos)
错误_pos-=(第一个_pos+1)*/
标准:cerr
“NativeID:”>引用的字符串>
布尔值>双值>整数>双值;
群集\u开始%=
“=集群=“>eol>
“id=“>+(字符-下线)>下线>
频谱开始%eol;
群集%=
“name=“>+(字符-下线)>下线>
下线>
群集启动%eol;
BOOST_SPIRIT_DEBUG_节点((集群)(集群启动)(引用字符串)(频谱启动))
//关于_错误(集群,报告_错误(_1,_2,_3,_4));
//on_错误(集群_开始,报告_错误(_1,_2,_3,_4));
//on_错误(频谱_开始,报告_错误(_1,_2,_3,_4));
//on_错误(引用的_字符串,报告_错误(_1,_2,_3,_4));
//成功时(群集开始,量化群集(_1,_2,_3,_4))??
}
qi::规则引用的字符串;
qi::规则集群启动;
qi::规则谱\u开始;
qi:规则簇;
};
}
int main()
{
使用名称空间模型;
cluster_parser g;//我们的语法
字符串str;
//std::ifstream输入(“c:/test/Mo_tai.clustering”);
std::istringstream输入(“name=GreedyClustering\u 0.99\n”
“\n”
“=群集=\n”
“id=9c8c5830-5841-4f77-b819-64180509615b\n”
“SPEC\t\35; file=w:\\test\\mou Tai\u iTRAQ\u f4.mgf\id=index=219\35title=mou Tai\u iTRAQ\u f4.1254.1254.2文件:\“mou Tai\u iTRAQ\u f4.raw\”,NativeID:“controllerType=0 controllerNumber=1 scan=1254\”\ttrue\t\t300.1374\t2\t\t0.0\n”
“=群集=\n”
“id=f8f384a1-3d5f-4af1-9581-4d03a5aa3342\n”
“SPEC\t\35; file=w:\\test\\mou Tai\u iTRAQ\u f9.mgf\id=index=560\35title=mou Tai\u iTRAQ\u f9.1666.1666.3文件:\“mou Tai\u iTRAQ\u f9.raw\”,NativeID:“controllerType=0 controllerNumber=1 scan=1666\”\ttrue\t\t300.14413\t3\t\t0.0\n”
“SPEC\t\35; file=w:\\test\\mou Tai\u iTRAQ\u f9.mgf\id=index=520\35title=mou Tai\u iTRAQ\u f9.1621.1621.3文件:\“mou Tai\u iTRAQ\u f9.raw\”,NativeID:“controllerType=0 controllerNumber=1 scan=1621\”\ttrue\t\t300.14197\t3\t\t\t0.0\n”
“=群集=\n”
“id=b84b79e1-44bc-44c0-a9af-5391ca02582d\n”
“SPEC\t\35; file=w:\\test\\mou-Tai\u-iTRAQ\u f2.mgf\35id=index=7171\35title=mou-Tai\u-iTRAQ\u f2.12729.12729.2文件:\“mou-Tai\u-iTRAQ\u f2.raw\”,NativeID:“controllerType=0 controllerNumber=1 scan=12729\”\ttrue\t\t300.15695\t2\t\t0.0”);
input.unset(std::ios::skipws);
boost::spirit::istream_迭代器开始(输入);
boost::spirit::istream_迭代器end;
聚类结果;
bool r=短语解析(开始、结束、g、qi::空白、聚类结果);
if(r&&begin==end)
{

不能使用流式迭代器

或对内存映射文件进行操作

在处理端,从语义操作内部将操作推送到队列上

注意:您可能会遇到一个假定的错误,该错误无法正确清除回溯缓冲区;您可能希望检查此错误,并采取以下回答中所述的预防措施:使用

#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/io.hpp>

namespace model
{
    namespace qi = boost::spirit::qi;
    namespace px = boost::phoenix;

    struct spectrum {
        std::string comment;
        std::string file;
        std::string nativeId;
        double      precursorMz;
        int         precursorCharge;
        double      precursorIntensity;
    };

    struct cluster {
        std::string           id;
        std::vector<spectrum> spectra;
    };
}

BOOST_FUSION_ADAPT_STRUCT(model::spectrum, comment, file, nativeId, precursorMz, precursorCharge, precursorIntensity)
BOOST_FUSION_ADAPT_STRUCT(model::cluster, id, spectra)

namespace model
{
    template <typename Iterator>
    struct cluster_parser : qi::grammar<Iterator>
    {
        cluster_parser(std::function<void(std::string const&, model::cluster const&)> handler) 
            :   cluster_parser::base_type(start),
                submit_(handler)
        {
            using namespace qi;

            quoted_string %= lexeme['"' > +(char_ - '"') > '"'];

            spectrum_start %=
                lit("SPEC") >
                "#" > +(char_ - "File:") >
                "File:" > quoted_string > lit(",") >
                "NativeID:" > quoted_string >
                bool_ > double_ > int_ > double_;

            cluster_start %= 
                "=Cluster=" > eol >
                "id=" > +(char_ - eol) > eol >
                spectrum_start % eol;


            clusters %= 
                "name=" > qi::as_string[ +(char_ - eol) ][ name_ = _1 ] > eol > eol >
                cluster_start [ submit_(name_, _1) ] % eol;

            start = skip(blank) [clusters];

            BOOST_SPIRIT_DEBUG_NODES((start)(clusters)(cluster_start)(quoted_string)(spectrum_start))
        }
      private:
        qi::_a_type name_;
        px::function<std::function<void(std::string const&, model::cluster const&)> > submit_;

        qi::rule<Iterator, std::string(), qi::blank_type> quoted_string;
        qi::rule<Iterator, cluster(), qi::blank_type> cluster_start;
        qi::rule<Iterator, spectrum(), qi::blank_type> spectrum_start;
        qi::rule<Iterator, qi::locals<std::string>, qi::blank_type> clusters;
        qi::rule<Iterator> start;
    };
}

int main()
{
    using namespace model;

    cluster_parser<boost::spirit::istream_iterator> g([&](auto const&...){std::cout << "handled\n";}); // Our grammar
    std::string str;
    //std::ifstream input("c:/test/Mo_tai.clustering");

    std::istringstream input(R"(name=GreedyClustering_0.99

=Cluster=
id=9c8c5830-5841-4f77-b819-64180509615b
SPEC    #file=w:\test\Mo_Tai_iTRAQ_f4.mgf#id=index=219#title=Mo_Tai_iTRAQ_f4.1254.1254.2 File:"Mo_Tai_iTRAQ_f4.raw", NativeID:"controllerType=0 controllerNumber=1 scan=1254"   true        300.1374    2           0.0
=Cluster=
id=f8f384a1-3d5f-4af1-9581-4d03a5aa3342
SPEC    #file=w:\test\Mo_Tai_iTRAQ_f9.mgf#id=index=560#title=Mo_Tai_iTRAQ_f9.1666.1666.3 File:"Mo_Tai_iTRAQ_f9.raw", NativeID:"controllerType=0 controllerNumber=1 scan=1666"   true        300.14413   3           0.0
SPEC    #file=w:\test\Mo_Tai_iTRAQ_f9.mgf#id=index=520#title=Mo_Tai_iTRAQ_f9.1621.1621.3 File:"Mo_Tai_iTRAQ_f9.raw", NativeID:"controllerType=0 controllerNumber=1 scan=1621"   true        300.14197   3           0.0
=Cluster=
id=b84b79e1-44bc-44c0-a9af-5391ca02582d
SPEC    #file=w:\test\Mo_Tai_iTRAQ_f2.mgf#id=index=7171#title=Mo_Tai_iTRAQ_f2.12729.12729.2 File:"Mo_Tai_iTRAQ_f2.raw", NativeID:"controllerType=0 controllerNumber=1 scan=12729"   true        300.15695   2           0.0)");
    input.unsetf(std::ios::skipws);
    boost::spirit::istream_iterator begin(input);
    boost::spirit::istream_iterator end;

    bool r = phrase_parse(begin, end, g, qi::blank);

    if (r && begin == end) {
        std::cout << "Parsing succeeded\n";
    }
    else {
        std::cout << "Parsing failed\n";
    }

    if (begin!=end) {
        std::cout << "Unparsed remaining input: '" << std::string(begin, end) << "\n";
    }

    return (r && begin==end)? 0 : 1;
}
#include <boost/asio.hpp>
#include <boost/thread.hpp>
namespace ba = boost::asio;

struct Processing {
    Processing() {
        for (unsigned i=0; i < boost::thread::hardware_concurrency(); ++i)
            _threads.create_thread([this] { _svc.run(); });
    }

    ~Processing() {
        _work.reset();
        _threads.join_all();
    }

    void submit(std::string const& name, model::cluster const& cluster) {
        _svc.post([=] { do_processing(name, cluster); });
    }

  private:
    void do_processing(std::string const& name, model::cluster const& cluster) {
        std::cout << "Thread " << boost::this_thread::get_id() << ": " << name << " cluster of " << cluster.spectra.size() << " spectra\n";
        boost::this_thread::sleep_for(boost::chrono::milliseconds(950));
    }

    ba::io_service _svc;
    boost::optional<ba::io_service::work> _work = ba::io_service::work(_svc);
    boost::thread_group _threads;
};
奖金:线程工人 下面是一个版本,它在线程池上调度集群进行异步处理

请注意,submit方法向服务发布lambda。lambda按值捕获,因为参数的生存期应在处理过程中延长

#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/io.hpp>

namespace model
{
    namespace qi = boost::spirit::qi;
    namespace px = boost::phoenix;

    struct spectrum {
        std::string comment;
        std::string file;
        std::string nativeId;
        double      precursorMz;
        int         precursorCharge;
        double      precursorIntensity;
    };

    struct cluster {
        std::string           id;
        std::vector<spectrum> spectra;
    };
}

BOOST_FUSION_ADAPT_STRUCT(model::spectrum, comment, file, nativeId, precursorMz, precursorCharge, precursorIntensity)
BOOST_FUSION_ADAPT_STRUCT(model::cluster, id, spectra)

namespace model
{
    template <typename Iterator>
    struct cluster_parser : qi::grammar<Iterator>
    {
        cluster_parser(std::function<void(std::string const&, model::cluster const&)> handler) 
            :   cluster_parser::base_type(start),
                submit_(handler)
        {
            using namespace qi;

            quoted_string %= lexeme['"' > +(char_ - '"') > '"'];

            spectrum_start %=
                lit("SPEC") >
                "#" > +(char_ - "File:") >
                "File:" > quoted_string > lit(",") >
                "NativeID:" > quoted_string >
                bool_ > double_ > int_ > double_;

            cluster_start %= 
                "=Cluster=" > eol >
                "id=" > +(char_ - eol) > eol >
                spectrum_start % eol;


            clusters %= 
                "name=" > qi::as_string[ +(char_ - eol) ][ name_ = _1 ] > eol > eol >
                cluster_start [ submit_(name_, _1) ] % eol;

            start = skip(blank) [clusters];

            BOOST_SPIRIT_DEBUG_NODES((start)(clusters)(cluster_start)(quoted_string)(spectrum_start))
        }
      private:
        qi::_a_type name_;
        px::function<std::function<void(std::string const&, model::cluster const&)> > submit_;

        qi::rule<Iterator, std::string(), qi::blank_type> quoted_string;
        qi::rule<Iterator, cluster(), qi::blank_type> cluster_start;
        qi::rule<Iterator, spectrum(), qi::blank_type> spectrum_start;
        qi::rule<Iterator, qi::locals<std::string>, qi::blank_type> clusters;
        qi::rule<Iterator> start;
    };
}

int main()
{
    using namespace model;

    cluster_parser<boost::spirit::istream_iterator> g([&](auto const&...){std::cout << "handled\n";}); // Our grammar
    std::string str;
    //std::ifstream input("c:/test/Mo_tai.clustering");

    std::istringstream input(R"(name=GreedyClustering_0.99

=Cluster=
id=9c8c5830-5841-4f77-b819-64180509615b
SPEC    #file=w:\test\Mo_Tai_iTRAQ_f4.mgf#id=index=219#title=Mo_Tai_iTRAQ_f4.1254.1254.2 File:"Mo_Tai_iTRAQ_f4.raw", NativeID:"controllerType=0 controllerNumber=1 scan=1254"   true        300.1374    2           0.0
=Cluster=
id=f8f384a1-3d5f-4af1-9581-4d03a5aa3342
SPEC    #file=w:\test\Mo_Tai_iTRAQ_f9.mgf#id=index=560#title=Mo_Tai_iTRAQ_f9.1666.1666.3 File:"Mo_Tai_iTRAQ_f9.raw", NativeID:"controllerType=0 controllerNumber=1 scan=1666"   true        300.14413   3           0.0
SPEC    #file=w:\test\Mo_Tai_iTRAQ_f9.mgf#id=index=520#title=Mo_Tai_iTRAQ_f9.1621.1621.3 File:"Mo_Tai_iTRAQ_f9.raw", NativeID:"controllerType=0 controllerNumber=1 scan=1621"   true        300.14197   3           0.0
=Cluster=
id=b84b79e1-44bc-44c0-a9af-5391ca02582d
SPEC    #file=w:\test\Mo_Tai_iTRAQ_f2.mgf#id=index=7171#title=Mo_Tai_iTRAQ_f2.12729.12729.2 File:"Mo_Tai_iTRAQ_f2.raw", NativeID:"controllerType=0 controllerNumber=1 scan=12729"   true        300.15695   2           0.0)");
    input.unsetf(std::ios::skipws);
    boost::spirit::istream_iterator begin(input);
    boost::spirit::istream_iterator end;

    bool r = phrase_parse(begin, end, g, qi::blank);

    if (r && begin == end) {
        std::cout << "Parsing succeeded\n";
    }
    else {
        std::cout << "Parsing failed\n";
    }

    if (begin!=end) {
        std::cout << "Unparsed remaining input: '" << std::string(begin, end) << "\n";
    }

    return (r && begin==end)? 0 : 1;
}
#include <boost/asio.hpp>
#include <boost/thread.hpp>
namespace ba = boost::asio;

struct Processing {
    Processing() {
        for (unsigned i=0; i < boost::thread::hardware_concurrency(); ++i)
            _threads.create_thread([this] { _svc.run(); });
    }

    ~Processing() {
        _work.reset();
        _threads.join_all();
    }

    void submit(std::string const& name, model::cluster const& cluster) {
        _svc.post([=] { do_processing(name, cluster); });
    }

  private:
    void do_processing(std::string const& name, model::cluster const& cluster) {
        std::cout << "Thread " << boost::this_thread::get_id() << ": " << name << " cluster of " << cluster.spectra.size() << " spectra\n";
        boost::this_thread::sleep_for(boost::chrono::milliseconds(950));
    }

    ba::io_service _svc;
    boost::optional<ba::io_service::work> _work = ba::io_service::work(_svc);
    boost::thread_group _threads;
};

多亏了快速的支持,coliru回来了,我用找到的编辑了答案。添加了链接非常令人印象深刻的答案!谢谢!
Processing processing;
auto handler = [&processing](auto&... args) { processing.submit(args...); };

cluster_parser<boost::spirit::istream_iterator> g(handler); // Our grammar
Thread 7f0144a5b700: GreedyClustering_0.99 cluster of 1 spectra
Thread 7f014425a700: GreedyClustering_0.99 cluster of 2 spectra
Parsing succeeded
Thread 7f0143a59700: GreedyClustering_0.99 cluster of 1 spectra