C# protobuf net:序列化System.IO.Stream类型的属性,而不将整个流加载到内存中
protobuf net无法序列化以下类,因为不支持序列化类型为C# protobuf net:序列化System.IO.Stream类型的属性,而不将整个流加载到内存中,c#,serialization,protobuf-net,C#,Serialization,Protobuf Net,protobuf net无法序列化以下类,因为不支持序列化类型为Stream的对象: [ProtoContract] class StreamObject { [ProtoMember(1)] public Stream StreamProperty { get; set; } } 我知道我可以通过使用类型为byte[]的序列化属性并将流读入该属性来解决这个问题,如中所示。但这需要将整个字节[]加载到内存中,如果流很长,可能会很快耗尽系统资源 有没有一种方法可以在protobu
Stream
的对象:
[ProtoContract]
class StreamObject
{
[ProtoMember(1)]
public Stream StreamProperty { get; set; }
}
我知道我可以通过使用类型为byte[]
的序列化属性并将流读入该属性来解决这个问题,如中所示。但这需要将整个字节[]
加载到内存中,如果流很长,可能会很快耗尽系统资源
有没有一种方法可以在protobuf-net中将流序列化为字节数组而不将整个字节序列加载到内存中?这里的基本困难不是protobuf-net,而是protobuf-net。重复元素(例如字节数组或流)有两种编码方式:
- 作为压缩重复元素。在这里,字段的所有元素都打包到一个键值对中,导线类型为2(长度分隔)。每个元素的编码方式与通常相同,只是前面没有标记 protobuf net自动以这种格式编码字节数组,但是这样做需要事先知道字节总数。对于字节流,这可能需要将整个流加载到内存中(例如,何时),这违反了您的要求
- 作为重复元素。这里,编码的消息具有零个或多个具有相同标签号的键值对 对于字节流,使用此格式将导致编码消息中大量膨胀,因为每个字节都需要一个额外的整数密钥
StreamObject
执行此操作:
[ProtoContract]
class StreamObject
{
public StreamObject() : this(new MemoryStream()) { }
public StreamObject(Stream stream)
{
if (stream == null)
throw new ArgumentNullException();
this.StreamProperty = stream;
}
[ProtoIgnore]
public Stream StreamProperty { get; set; }
internal static event EventHandler OnDataReadBegin;
internal static event EventHandler OnDataReadEnd;
const int ChunkSize = 4096;
[ProtoMember(1, IsPacked = false, OverwriteList = true)]
IEnumerable<ByteBuffer> Data
{
get
{
if (OnDataReadBegin != null)
OnDataReadBegin(this, new EventArgs());
while (true)
{
byte[] buffer = new byte[ChunkSize];
int read = StreamProperty.Read(buffer, 0, buffer.Length);
if (read <= 0)
{
break;
}
else if (read == buffer.Length)
{
yield return new ByteBuffer { Data = buffer };
}
else
{
Array.Resize(ref buffer, read);
yield return new ByteBuffer { Data = buffer };
break;
}
}
if (OnDataReadEnd != null)
OnDataReadEnd(this, new EventArgs());
}
set
{
if (value == null)
return;
foreach (var buffer in value)
StreamProperty.Write(buffer.Data, 0, buffer.Data.Length);
}
}
}
[ProtoContract]
struct ByteBuffer
{
[ProtoMember(1, IsPacked = true)]
public byte[] Data { get; set; }
}
internal class TestClass
{
public void Test()
{
var writeStream = new MemoryStream();
long beginLength = 0;
long endLength = 0;
EventHandler begin = (o, e) => { beginLength = writeStream.Length; Console.WriteLine(string.Format("Begin serialization of Data, writeStream.Length = {0}", writeStream.Length)); };
EventHandler end = (o, e) => { endLength = writeStream.Length; Console.WriteLine(string.Format("End serialization of Data, writeStream.Length = {0}", writeStream.Length)); };
StreamObject.OnDataReadBegin += begin;
StreamObject.OnDataReadEnd += end;
try
{
int length = 1000000;
var inputStream = new MemoryStream();
for (int i = 0; i < length; i++)
{
inputStream.WriteByte(unchecked((byte)i));
}
inputStream.Position = 0;
var streamObject = new StreamObject(inputStream);
Serializer.Serialize(writeStream, streamObject);
var data = writeStream.ToArray();
StreamObject newStreamObject;
using (var s = new MemoryStream(data))
{
newStreamObject = Serializer.Deserialize<StreamObject>(s);
}
if (beginLength >= endLength)
{
throw new InvalidOperationException("inputStream was completely buffered before writing to writeStream");
}
inputStream.Position = 0;
newStreamObject.StreamProperty.Position = 0;
if (!inputStream.AsEnumerable().SequenceEqual(newStreamObject.StreamProperty.AsEnumerable()))
{
throw new InvalidOperationException("!inputStream.AsEnumerable().SequenceEqual(newStreamObject.StreamProperty.AsEnumerable())");
}
else
{
Console.WriteLine("Streams identical.");
}
}
finally
{
StreamObject.OnDataReadBegin -= begin;
StreamObject.OnDataReadEnd -= end;
}
}
}
public static class StreamExtensions
{
public static IEnumerable<byte> AsEnumerable(this Stream stream)
{
if (stream == null)
throw new ArgumentNullException();
int b;
while ((b = stream.ReadByte()) != -1)
yield return checked((byte)b);
}
}
上述的输出是:
Begin serialization of Data, writeStream.Length = 0
End serialization of Data, writeStream.Length = 1000888
Streams identical.
这表明输入流确实流到了输出,而没有立即完全加载到内存中
原型
是否有一种机制可用于从流中以字节递增方式写出压缩的重复元素,提前知道长度?
看来不是。假设您有一个
CanSeek==true
的流,您可以将其封装在IList
中,该流枚举流中的字节,提供对流中字节的随机访问,并返回IList.Count
中的流长度。有一把小提琴样本显示了这种尝试。然而,不幸的是,在将其写入输出流之前,只需枚举列表并缓冲其编码内容,这会导致将输入流完全加载到内存中。我认为这是因为protobuf-net对列表
的编码不同于字节[]
,即作为一个长度分隔的序列。由于字节
的变量表示有时需要多个字节,因此无法从列表计数预先计算长度。有关字节数组和列表编码方式差异的更多详细信息,请参阅。应该可以用与字节[]
相同的方式来实现IList的编码,但目前还不可用。这里的基本困难不是protobuf-net,而是protobuf-net。重复元素(例如字节数组或流)有两种编码方式:
- 作为压缩重复元素。在这里,字段的所有元素都打包到一个键值对中,导线类型为2(长度分隔)。每个元素的编码方式与通常相同,只是前面没有标记
protobuf net自动以这种格式编码字节数组,但是这样做需要事先知道字节总数。对于字节流,这可能需要将整个流加载到内存中(例如,何时),这违反了您的要求
- 作为重复元素。这里,编码的消息具有零个或多个具有相同标签号的键值对
对于字节流,使用此格式将导致编码消息中大量膨胀,因为每个字节都需要一个额外的整数密钥
如您所见,两种默认表示形式都不能满足您的需要。相反,将一个大字节流编码为一个“相当大”的块序列是有意义的,其中每个块都是压缩的,但整个序列不是
以下版本的StreamObject
执行此操作:
[ProtoContract]
class StreamObject
{
public StreamObject() : this(new MemoryStream()) { }
public StreamObject(Stream stream)
{
if (stream == null)
throw new ArgumentNullException();
this.StreamProperty = stream;
}
[ProtoIgnore]
public Stream StreamProperty { get; set; }
internal static event EventHandler OnDataReadBegin;
internal static event EventHandler OnDataReadEnd;
const int ChunkSize = 4096;
[ProtoMember(1, IsPacked = false, OverwriteList = true)]
IEnumerable<ByteBuffer> Data
{
get
{
if (OnDataReadBegin != null)
OnDataReadBegin(this, new EventArgs());
while (true)
{
byte[] buffer = new byte[ChunkSize];
int read = StreamProperty.Read(buffer, 0, buffer.Length);
if (read <= 0)
{
break;
}
else if (read == buffer.Length)
{
yield return new ByteBuffer { Data = buffer };
}
else
{
Array.Resize(ref buffer, read);
yield return new ByteBuffer { Data = buffer };
break;
}
}
if (OnDataReadEnd != null)
OnDataReadEnd(this, new EventArgs());
}
set
{
if (value == null)
return;
foreach (var buffer in value)
StreamProperty.Write(buffer.Data, 0, buffer.Data.Length);
}
}
}
[ProtoContract]
struct ByteBuffer
{
[ProtoMember(1, IsPacked = true)]
public byte[] Data { get; set; }
}
internal class TestClass
{
public void Test()
{
var writeStream = new MemoryStream();
long beginLength = 0;
long endLength = 0;
EventHandler begin = (o, e) => { beginLength = writeStream.Length; Console.WriteLine(string.Format("Begin serialization of Data, writeStream.Length = {0}", writeStream.Length)); };
EventHandler end = (o, e) => { endLength = writeStream.Length; Console.WriteLine(string.Format("End serialization of Data, writeStream.Length = {0}", writeStream.Length)); };
StreamObject.OnDataReadBegin += begin;
StreamObject.OnDataReadEnd += end;
try
{
int length = 1000000;
var inputStream = new MemoryStream();
for (int i = 0; i < length; i++)
{
inputStream.WriteByte(unchecked((byte)i));
}
inputStream.Position = 0;
var streamObject = new StreamObject(inputStream);
Serializer.Serialize(writeStream, streamObject);
var data = writeStream.ToArray();
StreamObject newStreamObject;
using (var s = new MemoryStream(data))
{
newStreamObject = Serializer.Deserialize<StreamObject>(s);
}
if (beginLength >= endLength)
{
throw new InvalidOperationException("inputStream was completely buffered before writing to writeStream");
}
inputStream.Position = 0;
newStreamObject.StreamProperty.Position = 0;
if (!inputStream.AsEnumerable().SequenceEqual(newStreamObject.StreamProperty.AsEnumerable()))
{
throw new InvalidOperationException("!inputStream.AsEnumerable().SequenceEqual(newStreamObject.StreamProperty.AsEnumerable())");
}
else
{
Console.WriteLine("Streams identical.");
}
}
finally
{
StreamObject.OnDataReadBegin -= begin;
StreamObject.OnDataReadEnd -= end;
}
}
}
public static class StreamExtensions
{
public static IEnumerable<byte> AsEnumerable(this Stream stream)
{
if (stream == null)
throw new ArgumentNullException();
int b;
while ((b = stream.ReadByte()) != -1)
yield return checked((byte)b);
}
}
上述的输出是:
Begin serialization of Data, writeStream.Length = 0
End serialization of Data, writeStream.Length = 1000888
Streams identical.
这表明输入流确实流到了输出,而没有立即完全加载到内存中
原型
是否有一种机制可用于从流中以字节递增方式写出压缩的重复元素,提前知道长度?
看来不是。假设您有一个CanSeek==true
的流,您可以将其封装在IList
中,该流枚举流中的字节,提供对流中字节的随机访问,并返回IList.Count
中的流长度。有一把小提琴样本显示了这种尝试。然而,不幸的是,在将其写入输出流之前,只需枚举列表并缓冲其编码内容,这会导致将输入流完全加载到内存中。我认为这是因为protobuf-net对列表
的编码不同于字节[]
,即作为一个长度分隔的序列。由于字节
的变量表示有时需要多个字节,因此无法从列表计数预先计算长度。有关字节数组和列表编码方式差异的更多详细信息,请参阅。这应该是可能的