C++ 如何处理读取到ASIO streambuf中的额外字符?

C++ 如何处理读取到ASIO streambuf中的额外字符?,c++,c++11,boost,boost-asio,C++,C++11,Boost,Boost Asio,大多数库的解析器只能在std::istream或单个连续缓冲区上工作。这些解析器读取istream直到eof,而不是文档结束。即使有一个很好的boost::asio::streambuf,可以与istream一起使用,但是读取并向其提交一个帧也存在问题。像read\u直到这样的函数正在提交它们读取的任何内容,如果它们读取下一帧的片段,解析填充将失败 这个嘲弄的显示了问题所在 假设我们需要一个高效的解决方案,而不复制缓冲区,我需要确保流的结尾是文档的正确结尾。我目前的解决方案是扫描数据,并在一个准

大多数库的解析器只能在
std::istream
或单个连续缓冲区上工作。这些解析器读取istream直到eof,而不是文档结束。即使有一个很好的
boost::asio::streambuf
,可以与
istream
一起使用,但是读取并向其提交一个帧也存在问题。像
read\u直到
这样的函数正在提交它们读取的任何内容,如果它们读取下一帧的片段,解析填充将失败

这个嘲弄的显示了问题所在

假设我们需要一个高效的解决方案,而不复制缓冲区,我需要确保流的结尾是文档的正确结尾。我目前的解决方案是扫描数据,并在一个准备好的缓冲区上进行多次提交/消耗:

size_t read_some_frames( boost::asio::streambuf& strbuf, 
                         std::function< void(istream&) > parser ) {
        auto buffers= strbuf.prepare( 1024 );
        size_t read= bad_case_of_read_some( buffers );

        vector< std::pair< size_t, size_t > > frames;
        std::pair< size_t, size_t > leftover= scanForFrames( 
                    buffers_begin(buffers), 
                    buffers_begin(buffers)+read, 
                    frames, '\0' );

        for( auto const& frame: frames ) {
            cout << "Frame size: " << frame.first 
                      << " skip: " << frame.second << endl;
            strbuf.commit( frame.first );
            strbuf.consume( frame.second );
            iostream stream( &strbuf );
            parser( stream );
        }
        cout << "Unfinished frame size: " << leftover.first 
                             << " skip:" << leftover.second << endl;
        strbuf.commit( leftover.first );
        strbuf.consume( leftover.second );
        return read;
}
size\u t read\u一些帧(boost::asio::streambuf和strbuf,
std::function解析器{
自动缓冲区=strbuf.prepare(1024);
大小读取=一些(缓冲区)读取的坏情况;
向量>帧;
std::pairleftover=scanForFrames(
缓冲区\u开始(缓冲区),
缓冲区\开始(缓冲区)+读取,
帧,'\0');
用于(自动常量和帧:帧){

cout在读取流之后直到连接关闭时才使用流的情况之外,我确实认为这样的strbuf+istream没有任何用处

简单的问题是,istream提取不会在失败/部分解析时自动更新流,从而导致输入丢失/损坏

这是您的模拟示例,固定为:

#include <iostream>
#include <utility>
#include <algorithm>
#include <boost/asio.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_match.hpp>

namespace asio = boost::asio;

std::istream &parseDocument(std::istream &is, int &data) {
    namespace qi = boost::spirit::qi;
    return is >> qi::match(qi::int_ >> '\0', data);
}

template <typename MutableBuffers> size_t 
    fake_read1(MutableBuffers const &outbuf) { return asio::buffer_copy(outbuf, asio::buffer("12345" "\0" "23", 8)); }

template <typename MutableBuffers> size_t 
    fake_read2(MutableBuffers const &outbuf) { return asio::buffer_copy(outbuf, asio::buffer("456" "\0", 4)); }

#define READ_UNTIL(/*boost::asio::streambuf &*/strbuf, fake_read) { \
    auto buffers = strbuf.prepare(1024); \
    size_t read = fake_read(buffers); \
    std::cout << "READ_UNTIL " #fake_read ": " << read << " bytes\n"; \
    strbuf.commit(read); \
}

int main() {
    // this is the easy scenario:
    {
        asio::streambuf strbuf;

        READ_UNTIL(strbuf, fake_read1);
        READ_UNTIL(strbuf, fake_read2);

        int data1, data2;
        std::istream stream(&strbuf);

        parseDocument(stream, data1);
        parseDocument(stream, data2);

        std::cout << "Yo: " << data1 << "\n";
        std::cout << "Yo: " << data2 << "\n";
    }

    // this is the tricky scenario:
    {
        asio::streambuf strbuf;

        READ_UNTIL(strbuf, fake_read1);
        //READ_UNTIL(strbuf, fake_read2); // will happen later, now we're stuck with a partial second frame

        int data1, data2;
        std::istream stream(&strbuf);

        parseDocument(stream, data1);

        while (!parseDocument(stream, data2)) {
            stream.clear();
            READ_UNTIL(strbuf, fake_read2);
        }

        std::cout << "Oops: " << data1 << "\n";
        std::cout << "Oops: " << data2 << "\n";
    }
}
#include <iostream>
#include <boost/asio.hpp>

int main() {
    std::cout << boost::asio::ip::tcp::iostream("127.0.0.1", "6769").rdbuf();
}
#include <iostream>
#include <boost/asio.hpp>
#include <boost/spirit/include/qi.hpp>

using namespace std;
namespace asio = boost::asio;

using asio::buffers_begin;
using asio::buffers_end;

template <typename ConstBuffers>
size_t parseDocument(ConstBuffers const& buffers, int &data) {

    auto b(buffers_begin(buffers)), f=b, l(buffers_end(buffers));

    namespace qi = boost::spirit::qi;
    return qi::phrase_parse(f, l, qi::int_ >> '\0', qi::space, data)
        ? (f - b) 
        : 0; // only optionally consume
}

template <typename MutableBuffers> size_t 
    fake_read1(MutableBuffers const &outbuf) { return asio::buffer_copy(outbuf, asio::buffer("12345" "\0" "23", 8)); }

template <typename MutableBuffers> size_t 
    fake_read2(MutableBuffers const &outbuf) { return asio::buffer_copy(outbuf, asio::buffer("456" "\0", 4)); }

#define READ_UNTIL(/*boost::asio::streambuf &*/strbuf, fake_read) { \
    auto buffers = strbuf.prepare(1024); \
    size_t read = fake_read(buffers); \
    std::cout << "READ_UNTIL " #fake_read ": " << read << " bytes\n"; \
    strbuf.commit(read); \
}

size_t readuntil2(boost::asio::streambuf &strbuf) {

    std::cout << __PRETTY_FUNCTION__ << "\n";
    static int delay_fake_async_receive = 6;
    if (delay_fake_async_receive--)
        return 0;

    auto buffers = strbuf.prepare(1024);
    size_t read = fake_read2(buffers);
    std::cout << "read2: " << read << " bytes\n";
    strbuf.commit(read);
    return read;
}

#include <boost/range/algorithm.hpp>

int main() {
    // this is the tricky scenario:
    asio::streambuf strbuf;

    READ_UNTIL(strbuf, fake_read1);
    //READ_UNTIL(strbuf, fake_read2); // will happen later, now we're stuck with a partial second frame

    int data1=0, data2=0;

    strbuf.consume(parseDocument(strbuf.data(), data1));

    size_t consumed = 0;
    while (!(consumed = parseDocument(strbuf.data(), data2))) {
        READ_UNTIL(strbuf, fake_read2);
    }

    std::cout << "Yay: " << data1 << "\n";
    std::cout << "Yay: " << data2 << "\n";

    //asio::ip::tcp::iostream networkstream("localhost", "6767");
    std::cout << asio::ip::tcp::iostream("localhost", "6767").rdbuf();
}
您还可以看到我在
parseDocument()
函数中切换到我喜欢的用于小型特别解析器的框架:Boost Spirit。请参阅下文,了解如何使其更适用

1.下流缓冲区 相反,您可能会寻找一个流缓冲区实现,当缓冲区下溢时,它只会等待更多的数据

我相信,例如,
asio::ip::tcp::iostream
就是:

#include <iostream>
#include <utility>
#include <algorithm>
#include <boost/asio.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_match.hpp>

namespace asio = boost::asio;

std::istream &parseDocument(std::istream &is, int &data) {
    namespace qi = boost::spirit::qi;
    return is >> qi::match(qi::int_ >> '\0', data);
}

template <typename MutableBuffers> size_t 
    fake_read1(MutableBuffers const &outbuf) { return asio::buffer_copy(outbuf, asio::buffer("12345" "\0" "23", 8)); }

template <typename MutableBuffers> size_t 
    fake_read2(MutableBuffers const &outbuf) { return asio::buffer_copy(outbuf, asio::buffer("456" "\0", 4)); }

#define READ_UNTIL(/*boost::asio::streambuf &*/strbuf, fake_read) { \
    auto buffers = strbuf.prepare(1024); \
    size_t read = fake_read(buffers); \
    std::cout << "READ_UNTIL " #fake_read ": " << read << " bytes\n"; \
    strbuf.commit(read); \
}

int main() {
    // this is the easy scenario:
    {
        asio::streambuf strbuf;

        READ_UNTIL(strbuf, fake_read1);
        READ_UNTIL(strbuf, fake_read2);

        int data1, data2;
        std::istream stream(&strbuf);

        parseDocument(stream, data1);
        parseDocument(stream, data2);

        std::cout << "Yo: " << data1 << "\n";
        std::cout << "Yo: " << data2 << "\n";
    }

    // this is the tricky scenario:
    {
        asio::streambuf strbuf;

        READ_UNTIL(strbuf, fake_read1);
        //READ_UNTIL(strbuf, fake_read2); // will happen later, now we're stuck with a partial second frame

        int data1, data2;
        std::istream stream(&strbuf);

        parseDocument(stream, data1);

        while (!parseDocument(stream, data2)) {
            stream.clear();
            READ_UNTIL(strbuf, fake_read2);
        }

        std::cout << "Oops: " << data1 << "\n";
        std::cout << "Oops: " << data2 << "\n";
    }
}
#include <iostream>
#include <boost/asio.hpp>

int main() {
    std::cout << boost::asio::ip::tcp::iostream("127.0.0.1", "6769").rdbuf();
}
#include <iostream>
#include <boost/asio.hpp>
#include <boost/spirit/include/qi.hpp>

using namespace std;
namespace asio = boost::asio;

using asio::buffers_begin;
using asio::buffers_end;

template <typename ConstBuffers>
size_t parseDocument(ConstBuffers const& buffers, int &data) {

    auto b(buffers_begin(buffers)), f=b, l(buffers_end(buffers));

    namespace qi = boost::spirit::qi;
    return qi::phrase_parse(f, l, qi::int_ >> '\0', qi::space, data)
        ? (f - b) 
        : 0; // only optionally consume
}

template <typename MutableBuffers> size_t 
    fake_read1(MutableBuffers const &outbuf) { return asio::buffer_copy(outbuf, asio::buffer("12345" "\0" "23", 8)); }

template <typename MutableBuffers> size_t 
    fake_read2(MutableBuffers const &outbuf) { return asio::buffer_copy(outbuf, asio::buffer("456" "\0", 4)); }

#define READ_UNTIL(/*boost::asio::streambuf &*/strbuf, fake_read) { \
    auto buffers = strbuf.prepare(1024); \
    size_t read = fake_read(buffers); \
    std::cout << "READ_UNTIL " #fake_read ": " << read << " bytes\n"; \
    strbuf.commit(read); \
}

size_t readuntil2(boost::asio::streambuf &strbuf) {

    std::cout << __PRETTY_FUNCTION__ << "\n";
    static int delay_fake_async_receive = 6;
    if (delay_fake_async_receive--)
        return 0;

    auto buffers = strbuf.prepare(1024);
    size_t read = fake_read2(buffers);
    std::cout << "read2: " << read << " bytes\n";
    strbuf.commit(read);
    return read;
}

#include <boost/range/algorithm.hpp>

int main() {
    // this is the tricky scenario:
    asio::streambuf strbuf;

    READ_UNTIL(strbuf, fake_read1);
    //READ_UNTIL(strbuf, fake_read2); // will happen later, now we're stuck with a partial second frame

    int data1=0, data2=0;

    strbuf.consume(parseDocument(strbuf.data(), data1));

    size_t consumed = 0;
    while (!(consumed = parseDocument(strbuf.data(), data2))) {
        READ_UNTIL(strbuf, fake_read2);
    }

    std::cout << "Yay: " << data1 << "\n";
    std::cout << "Yay: " << data2 << "\n";

    //asio::ip::tcp::iostream networkstream("localhost", "6767");
    std::cout << asio::ip::tcp::iostream("localhost", "6767").rdbuf();
}
印刷品

READ_UNTIL fake_read1: 8 bytes
READ_UNTIL fake_read2: 4 bytes
Yay: 12345
Yay: 23456
总结,集成第三方解析器 如果必须使用需要
std::istream&
进行解析的第三方库,但不能依靠传输与帧边界对齐,则可以使用混合方法:

auto n = find_frame_boundary(buffers_begin(sb.data()), buffers_end(sb.data()));
然后可能在检测到的缩减区域上使用
boost::iostream::array\u source

当操作将读取的所有数据提交到streambuf的输入序列时,它们返回一个
bytes\u transfered
值,该值包含第一个分隔符之前的字节数。本质上,它提供了e帧,您可以通过以下任一方式限制
istream
仅读取
streambuf
输入序列的一部分:

  • 使用自定义的
    istream
    来限制从streambuf读取的字节数。实现这一点的一个简单方法是使用Boost.IOStream并实现该概念的模型
  • 创建从Boost.Asio的
    streambuf
    派生的自定义
    streambuf
    。要限制从可用输入序列读取的字节数,自定义函数需要操纵输入序列的结尾。此外,自定义
    streambuf
    需要处理下溢

Boost.IOStream的自定义
源代码
Boost.IOStream的
Boost::iostreams::stream
对象将I/O操作委托给设备。设备是实现各种Boost.IOStream概念模型的用户代码。在这种情况下,只需要提供对字符序列的读取访问的源概念。此外,当
Boost::iostreams::stream
使用源设备,它将从
std::basic\u istream
继承

在以下代码中,
asio\u streambuf\u input\u device
是从Boost.asio streambuf读取的源概念模型。当读取了给定数量的字节时,
asio\u streambuf\u input\u device
指示下溢,即使底层streambuf的输入序列中仍有数据

///实现Boost.IOStream源概念模型的类型
///用于从Boost.Asio streambuf读取数据
asio类流量输入设备
:public boost::iostreams::source//使用便利类。
{
公众:
明确的
asio_streambuf_输入设备(
boost::asio::streambuf和streambuf,
std::传输的streamsize字节数
)
:streambuf_(streambuf),
剩余字节数(已传输字节数)
{}
std::streamsize读取(字符类型*缓冲区,std::streamsize缓冲区大小)
{
//确定要复制的最大字节数。
自动字节到字节复制=
std::min(剩余字节数),std::min(
静态_转换(streambuf_.size()),缓冲区_大小);
//如果没有更多的数据要读取,请按指示序列结束
//源概念。
if(0==字节到拷贝)
{
return-1;//根据源概念指示序列的结尾。
}
//从streambuf复制到提供的缓冲区中。
std::copy\u n(buffers\u begin(streambuf\uu.data()),字节到复制,buffer);
//更新剩余的字节数。
剩余字节数\-=要复制的字节数;
//从streambuf中消费。
streambuf_uu.consume(字节到字节拷贝);
返回字节到拷贝;
}
私人:
boost::asio::streambuf和streambuf;
std::streamsize字节数\u剩余字节数\uu;
};
// ...
//创建一个自定义iostream,设置字节数的限制
//这将从streambuf中读取。
boost::iostreams::stream此方法:

auto n = find_frame_boundary(buffers_begin(sb.data()), buffers_end(sb.data()));
#包括
#包括
#包括
#包括
#include//boost::iostreams::source
#包括
///实现Boost.IOStream的源概念模型的类型
///为了阅读da