C++ 如何处理读取到ASIO streambuf中的额外字符?
大多数库的解析器只能在C++ 如何处理读取到ASIO streambuf中的额外字符?,c++,c++11,boost,boost-asio,C++,C++11,Boost,Boost Asio,大多数库的解析器只能在std::istream或单个连续缓冲区上工作。这些解析器读取istream直到eof,而不是文档结束。即使有一个很好的boost::asio::streambuf,可以与istream一起使用,但是读取并向其提交一个帧也存在问题。像read\u直到这样的函数正在提交它们读取的任何内容,如果它们读取下一帧的片段,解析填充将失败 这个嘲弄的显示了问题所在 假设我们需要一个高效的解决方案,而不复制缓冲区,我需要确保流的结尾是文档的正确结尾。我目前的解决方案是扫描数据,并在一个准
std::istream
或单个连续缓冲区上工作。这些解析器读取istream直到eof,而不是文档结束。即使有一个很好的boost::asio::streambuf
,可以与istream
一起使用,但是读取并向其提交一个帧也存在问题。像read\u直到这样的函数正在提交它们读取的任何内容,如果它们读取下一帧的片段,解析填充将失败
这个嘲弄的显示了问题所在
假设我们需要一个高效的解决方案,而不复制缓冲区,我需要确保流的结尾是文档的正确结尾。我目前的解决方案是扫描数据,并在一个准备好的缓冲区上进行多次提交/消耗:
size_t read_some_frames( boost::asio::streambuf& strbuf,
std::function< void(istream&) > parser ) {
auto buffers= strbuf.prepare( 1024 );
size_t read= bad_case_of_read_some( buffers );
vector< std::pair< size_t, size_t > > frames;
std::pair< size_t, size_t > leftover= scanForFrames(
buffers_begin(buffers),
buffers_begin(buffers)+read,
frames, '\0' );
for( auto const& frame: frames ) {
cout << "Frame size: " << frame.first
<< " skip: " << frame.second << endl;
strbuf.commit( frame.first );
strbuf.consume( frame.second );
iostream stream( &strbuf );
parser( stream );
}
cout << "Unfinished frame size: " << leftover.first
<< " skip:" << leftover.second << endl;
strbuf.commit( leftover.first );
strbuf.consume( leftover.second );
return read;
}
size\u t read\u一些帧(boost::asio::streambuf和strbuf,
std::function解析器{
自动缓冲区=strbuf.prepare(1024);
大小读取=一些(缓冲区)读取的坏情况;
向量>帧;
std::pairleftover=scanForFrames(
缓冲区\u开始(缓冲区),
缓冲区\开始(缓冲区)+读取,
帧,'\0');
用于(自动常量和帧:帧){
cout在读取流之后直到连接关闭时才使用流的情况之外,我确实认为这样的strbuf+istream没有任何用处
简单的问题是,istream提取不会在失败/部分解析时自动更新流,从而导致输入丢失/损坏
这是您的模拟示例,固定为:
#include <iostream>
#include <utility>
#include <algorithm>
#include <boost/asio.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_match.hpp>
namespace asio = boost::asio;
std::istream &parseDocument(std::istream &is, int &data) {
namespace qi = boost::spirit::qi;
return is >> qi::match(qi::int_ >> '\0', data);
}
template <typename MutableBuffers> size_t
fake_read1(MutableBuffers const &outbuf) { return asio::buffer_copy(outbuf, asio::buffer("12345" "\0" "23", 8)); }
template <typename MutableBuffers> size_t
fake_read2(MutableBuffers const &outbuf) { return asio::buffer_copy(outbuf, asio::buffer("456" "\0", 4)); }
#define READ_UNTIL(/*boost::asio::streambuf &*/strbuf, fake_read) { \
auto buffers = strbuf.prepare(1024); \
size_t read = fake_read(buffers); \
std::cout << "READ_UNTIL " #fake_read ": " << read << " bytes\n"; \
strbuf.commit(read); \
}
int main() {
// this is the easy scenario:
{
asio::streambuf strbuf;
READ_UNTIL(strbuf, fake_read1);
READ_UNTIL(strbuf, fake_read2);
int data1, data2;
std::istream stream(&strbuf);
parseDocument(stream, data1);
parseDocument(stream, data2);
std::cout << "Yo: " << data1 << "\n";
std::cout << "Yo: " << data2 << "\n";
}
// this is the tricky scenario:
{
asio::streambuf strbuf;
READ_UNTIL(strbuf, fake_read1);
//READ_UNTIL(strbuf, fake_read2); // will happen later, now we're stuck with a partial second frame
int data1, data2;
std::istream stream(&strbuf);
parseDocument(stream, data1);
while (!parseDocument(stream, data2)) {
stream.clear();
READ_UNTIL(strbuf, fake_read2);
}
std::cout << "Oops: " << data1 << "\n";
std::cout << "Oops: " << data2 << "\n";
}
}
#include <iostream>
#include <boost/asio.hpp>
int main() {
std::cout << boost::asio::ip::tcp::iostream("127.0.0.1", "6769").rdbuf();
}
#include <iostream>
#include <boost/asio.hpp>
#include <boost/spirit/include/qi.hpp>
using namespace std;
namespace asio = boost::asio;
using asio::buffers_begin;
using asio::buffers_end;
template <typename ConstBuffers>
size_t parseDocument(ConstBuffers const& buffers, int &data) {
auto b(buffers_begin(buffers)), f=b, l(buffers_end(buffers));
namespace qi = boost::spirit::qi;
return qi::phrase_parse(f, l, qi::int_ >> '\0', qi::space, data)
? (f - b)
: 0; // only optionally consume
}
template <typename MutableBuffers> size_t
fake_read1(MutableBuffers const &outbuf) { return asio::buffer_copy(outbuf, asio::buffer("12345" "\0" "23", 8)); }
template <typename MutableBuffers> size_t
fake_read2(MutableBuffers const &outbuf) { return asio::buffer_copy(outbuf, asio::buffer("456" "\0", 4)); }
#define READ_UNTIL(/*boost::asio::streambuf &*/strbuf, fake_read) { \
auto buffers = strbuf.prepare(1024); \
size_t read = fake_read(buffers); \
std::cout << "READ_UNTIL " #fake_read ": " << read << " bytes\n"; \
strbuf.commit(read); \
}
size_t readuntil2(boost::asio::streambuf &strbuf) {
std::cout << __PRETTY_FUNCTION__ << "\n";
static int delay_fake_async_receive = 6;
if (delay_fake_async_receive--)
return 0;
auto buffers = strbuf.prepare(1024);
size_t read = fake_read2(buffers);
std::cout << "read2: " << read << " bytes\n";
strbuf.commit(read);
return read;
}
#include <boost/range/algorithm.hpp>
int main() {
// this is the tricky scenario:
asio::streambuf strbuf;
READ_UNTIL(strbuf, fake_read1);
//READ_UNTIL(strbuf, fake_read2); // will happen later, now we're stuck with a partial second frame
int data1=0, data2=0;
strbuf.consume(parseDocument(strbuf.data(), data1));
size_t consumed = 0;
while (!(consumed = parseDocument(strbuf.data(), data2))) {
READ_UNTIL(strbuf, fake_read2);
}
std::cout << "Yay: " << data1 << "\n";
std::cout << "Yay: " << data2 << "\n";
//asio::ip::tcp::iostream networkstream("localhost", "6767");
std::cout << asio::ip::tcp::iostream("localhost", "6767").rdbuf();
}
您还可以看到我在parseDocument()
函数中切换到我喜欢的用于小型特别解析器的框架:Boost Spirit。请参阅下文,了解如何使其更适用
1.下流缓冲区
相反,您可能会寻找一个流缓冲区实现,当缓冲区下溢时,它只会等待更多的数据
我相信,例如,asio::ip::tcp::iostream
就是:
#include <iostream>
#include <utility>
#include <algorithm>
#include <boost/asio.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_match.hpp>
namespace asio = boost::asio;
std::istream &parseDocument(std::istream &is, int &data) {
namespace qi = boost::spirit::qi;
return is >> qi::match(qi::int_ >> '\0', data);
}
template <typename MutableBuffers> size_t
fake_read1(MutableBuffers const &outbuf) { return asio::buffer_copy(outbuf, asio::buffer("12345" "\0" "23", 8)); }
template <typename MutableBuffers> size_t
fake_read2(MutableBuffers const &outbuf) { return asio::buffer_copy(outbuf, asio::buffer("456" "\0", 4)); }
#define READ_UNTIL(/*boost::asio::streambuf &*/strbuf, fake_read) { \
auto buffers = strbuf.prepare(1024); \
size_t read = fake_read(buffers); \
std::cout << "READ_UNTIL " #fake_read ": " << read << " bytes\n"; \
strbuf.commit(read); \
}
int main() {
// this is the easy scenario:
{
asio::streambuf strbuf;
READ_UNTIL(strbuf, fake_read1);
READ_UNTIL(strbuf, fake_read2);
int data1, data2;
std::istream stream(&strbuf);
parseDocument(stream, data1);
parseDocument(stream, data2);
std::cout << "Yo: " << data1 << "\n";
std::cout << "Yo: " << data2 << "\n";
}
// this is the tricky scenario:
{
asio::streambuf strbuf;
READ_UNTIL(strbuf, fake_read1);
//READ_UNTIL(strbuf, fake_read2); // will happen later, now we're stuck with a partial second frame
int data1, data2;
std::istream stream(&strbuf);
parseDocument(stream, data1);
while (!parseDocument(stream, data2)) {
stream.clear();
READ_UNTIL(strbuf, fake_read2);
}
std::cout << "Oops: " << data1 << "\n";
std::cout << "Oops: " << data2 << "\n";
}
}
#include <iostream>
#include <boost/asio.hpp>
int main() {
std::cout << boost::asio::ip::tcp::iostream("127.0.0.1", "6769").rdbuf();
}
#include <iostream>
#include <boost/asio.hpp>
#include <boost/spirit/include/qi.hpp>
using namespace std;
namespace asio = boost::asio;
using asio::buffers_begin;
using asio::buffers_end;
template <typename ConstBuffers>
size_t parseDocument(ConstBuffers const& buffers, int &data) {
auto b(buffers_begin(buffers)), f=b, l(buffers_end(buffers));
namespace qi = boost::spirit::qi;
return qi::phrase_parse(f, l, qi::int_ >> '\0', qi::space, data)
? (f - b)
: 0; // only optionally consume
}
template <typename MutableBuffers> size_t
fake_read1(MutableBuffers const &outbuf) { return asio::buffer_copy(outbuf, asio::buffer("12345" "\0" "23", 8)); }
template <typename MutableBuffers> size_t
fake_read2(MutableBuffers const &outbuf) { return asio::buffer_copy(outbuf, asio::buffer("456" "\0", 4)); }
#define READ_UNTIL(/*boost::asio::streambuf &*/strbuf, fake_read) { \
auto buffers = strbuf.prepare(1024); \
size_t read = fake_read(buffers); \
std::cout << "READ_UNTIL " #fake_read ": " << read << " bytes\n"; \
strbuf.commit(read); \
}
size_t readuntil2(boost::asio::streambuf &strbuf) {
std::cout << __PRETTY_FUNCTION__ << "\n";
static int delay_fake_async_receive = 6;
if (delay_fake_async_receive--)
return 0;
auto buffers = strbuf.prepare(1024);
size_t read = fake_read2(buffers);
std::cout << "read2: " << read << " bytes\n";
strbuf.commit(read);
return read;
}
#include <boost/range/algorithm.hpp>
int main() {
// this is the tricky scenario:
asio::streambuf strbuf;
READ_UNTIL(strbuf, fake_read1);
//READ_UNTIL(strbuf, fake_read2); // will happen later, now we're stuck with a partial second frame
int data1=0, data2=0;
strbuf.consume(parseDocument(strbuf.data(), data1));
size_t consumed = 0;
while (!(consumed = parseDocument(strbuf.data(), data2))) {
READ_UNTIL(strbuf, fake_read2);
}
std::cout << "Yay: " << data1 << "\n";
std::cout << "Yay: " << data2 << "\n";
//asio::ip::tcp::iostream networkstream("localhost", "6767");
std::cout << asio::ip::tcp::iostream("localhost", "6767").rdbuf();
}
印刷品
READ_UNTIL fake_read1: 8 bytes
READ_UNTIL fake_read2: 4 bytes
Yay: 12345
Yay: 23456
总结,集成第三方解析器
如果必须使用需要std::istream&
进行解析的第三方库,但不能依靠传输与帧边界对齐,则可以使用混合方法:
auto n = find_frame_boundary(buffers_begin(sb.data()), buffers_end(sb.data()));
然后可能在检测到的缩减区域上使用boost::iostream::array\u source
。当操作将读取的所有数据提交到streambuf的输入序列时,它们返回一个bytes\u transfered
值,该值包含第一个分隔符之前的字节数。本质上,它提供了e帧,您可以通过以下任一方式限制istream
仅读取streambuf
输入序列的一部分:
- 使用自定义的
istream
来限制从streambuf读取的字节数。实现这一点的一个简单方法是使用Boost.IOStream并实现该概念的模型
- 创建从Boost.Asio的
streambuf
派生的自定义streambuf
。要限制从可用输入序列读取的字节数,自定义函数需要操纵输入序列的结尾。此外,自定义streambuf
需要处理下溢
Boost.IOStream的自定义源代码
Boost.IOStream的Boost::iostreams::stream
对象将I/O操作委托给设备。设备是实现各种Boost.IOStream概念模型的用户代码。在这种情况下,只需要提供对字符序列的读取访问的源概念。此外,当Boost::iostreams::stream
使用源设备,它将从std::basic\u istream
继承
在以下代码中,asio\u streambuf\u input\u device
是从Boost.asio streambuf读取的源概念模型。当读取了给定数量的字节时,asio\u streambuf\u input\u device
指示下溢,即使底层streambuf的输入序列中仍有数据
///实现Boost.IOStream源概念模型的类型
///用于从Boost.Asio streambuf读取数据
asio类流量输入设备
:public boost::iostreams::source//使用便利类。
{
公众:
明确的
asio_streambuf_输入设备(
boost::asio::streambuf和streambuf,
std::传输的streamsize字节数
)
:streambuf_(streambuf),
剩余字节数(已传输字节数)
{}
std::streamsize读取(字符类型*缓冲区,std::streamsize缓冲区大小)
{
//确定要复制的最大字节数。
自动字节到字节复制=
std::min(剩余字节数),std::min(
静态_转换(streambuf_.size()),缓冲区_大小);
//如果没有更多的数据要读取,请按指示序列结束
//源概念。
if(0==字节到拷贝)
{
return-1;//根据源概念指示序列的结尾。
}
//从streambuf复制到提供的缓冲区中。
std::copy\u n(buffers\u begin(streambuf\uu.data()),字节到复制,buffer);
//更新剩余的字节数。
剩余字节数\-=要复制的字节数;
//从streambuf中消费。
streambuf_uu.consume(字节到字节拷贝);
返回字节到拷贝;
}
私人:
boost::asio::streambuf和streambuf;
std::streamsize字节数\u剩余字节数\uu;
};
// ...
//创建一个自定义iostream,设置字节数的限制
//这将从streambuf中读取。
boost::iostreams::stream此方法:
auto n = find_frame_boundary(buffers_begin(sb.data()), buffers_end(sb.data()));
#包括
#包括
#包括
#包括
#include//boost::iostreams::source
#包括
///实现Boost.IOStream的源概念模型的类型
///为了阅读da