.net 是否有将二进制数据打包成UTF-16字符串的标准技术?
(在.NET中)我将任意二进制数据存储在一个字节[](例如图像)中。现在,我需要将该数据存储在一个字符串中(遗留API的“注释”字段)。是否有一种标准技术可以将这个二进制数据打包成字符串?我所说的“打包”是指对于任何相当大的随机数据集,bytes.Length/2与packing.Length大致相同;因为两个字节或多或少是一个字符 这两个“显而易见”的答案并不符合所有标准:.net 是否有将二进制数据打包成UTF-16字符串的标准技术?,.net,unicode,encoding,binary,utf-16,.net,Unicode,Encoding,Binary,Utf 16,(在.NET中)我将任意二进制数据存储在一个字节[](例如图像)中。现在,我需要将该数据存储在一个字符串中(遗留API的“注释”字段)。是否有一种标准技术可以将这个二进制数据打包成字符串?我所说的“打包”是指对于任何相当大的随机数据集,bytes.Length/2与packing.Length大致相同;因为两个字节或多或少是一个字符 这两个“显而易见”的答案并不符合所有标准: string base64 = System.Convert.ToBase64String(bytes) 无法非常有效
string base64 = System.Convert.ToBase64String(bytes)
无法非常有效地使用字符串,因为它只使用了大约60000个可用字符中的64个字符(我的存储是System.string)。配合
string utf16 = System.Text.Encoding.Unicode.GetString(bytes)
更好地利用字符串,但对于包含无效Unicode字符(例如不匹配的代理项对)的数据无效。显示了这种精确(差)的技术
让我们看一个简单的例子:
byte[] bytes = new byte[] { 0x41, 0x00, 0x31, 0x00};
string utf16 = System.Text.Encoding.Unicode.GetString(bytes);
byte[] utf16_bytes = System.Text.Encoding.Unicode.GetBytes(utf16);
在本例中,字节和utf16_字节是相同的,因为原始字节是UTF-16字符串。使用base64编码执行相同的过程将得到16个成员的base64_字节数组
现在,使用无效的UTF-16数据重复此过程:
byte[] bytes = new byte[] { 0x41, 0x00, 0x00, 0xD8};
您会发现utf16_字节与原始数据不匹配
我编写的代码使用U+FFFD作为无效Unicode字符之前的转义符;这是可行的,但我想知道是否有比我自己做的更标准的技术。更不用说,我不喜欢捕获DecoderFallbackException作为检测无效字符的方法
我想您可以称之为“基本BMP”或“基本UTF-16”编码(使用Unicode基本多语言平面中的所有字符)。是的,理想情况下我会跟随并传递字节[]
我同意彼得·豪塞尔的建议,认为这是“正确”的答案,因为他是唯一一个接近于提出“标准技术”的人。
编辑得更好。吉姆·贝弗里奇(Jim Beverridge)有一个。您可以将二进制数据视为。UTF-8b编码假定字节是UTF-8多字节序列,但对非UTF-8多字节序列有回退编码。我在直接字符数组上胡闹,你的一个失败案例适用于我的实现。代码已经过很好的测试:所以先做测试 您可以通过使用不安全的代码来加快速度。但我确信Unicodeincoding同样慢(如果不是慢的话)
//
///表示将字节紧密打包到字符串中的编码。
///
公共类字节编码:编码
{
///
///获取字节编码实例。
///
public static readonly Encoding=new ByteEncoding();
私有字节编码()
{
}
公共重写int GetBytes(char[]chars,int charIndex,int charCount,byte[]bytes,int byteIndex)
{
for(int i=0;i
下面是一些测试代码:
static void Main(string[] args)
{
byte[] original = new byte[256];
// Note that we can't tell on the decode side how
// long the array was if the original length is
// an odd number. This will result in an
// inconclusive result.
for (int i = 0; i < original.Length; i++)
original[i] = (byte) Math.Abs(i - 1);
string packed = ByteEncoding.Encoding.GetString(original);
byte[] unpacked = ByteEncoding.Encoding.GetBytes(packed);
bool pass = true;
if (original.Length != unpacked.Length)
{
Console.WriteLine("Inconclusive: Lengths differ.");
pass = false;
}
int min = Math.Min(original.Length, unpacked.Length);
for (int i = 0; i < min; i++)
{
if (original[i] != unpacked[i])
{
Console.WriteLine("Fail: Invalid at a position {0}.", i);
pass = false;
}
}
Console.WriteLine(pass ? "All Passed" : "Failure Present");
Console.ReadLine();
}
static void Main(字符串[]args)
{
字节[]原始=新字节[256];
//请注意,我们无法在解码端说明如何进行解码
//如果原始长度为,则数组的长度为
//奇数。这将导致
//不确定的结果。
for(int i=0;i
测试是可行的,但您必须使用API函数对其进行测试。有另一种方法可以绕过此限制:尽管我不确定它的工作情况如何 首先,您需要弄清楚API调用所期望的字符串类型,以及该字符串的结构。如果我举一个简单的例子,让我们考虑.NETString:
- Int32_长度李>
- 字节[]_数据李>
- 字节终止符=
static void Main(string[] args) { byte[] original = new byte[256]; // Note that we can't tell on the decode side how // long the array was if the original length is // an odd number. This will result in an // inconclusive result. for (int i = 0; i < original.Length; i++) original[i] = (byte) Math.Abs(i - 1); string packed = ByteEncoding.Encoding.GetString(original); byte[] unpacked = ByteEncoding.Encoding.GetBytes(packed); bool pass = true; if (original.Length != unpacked.Length) { Console.WriteLine("Inconclusive: Lengths differ."); pass = false; } int min = Math.Min(original.Length, unpacked.Length); for (int i = 0; i < min; i++) { if (original[i] != unpacked[i]) { Console.WriteLine("Fail: Invalid at a position {0}.", i); pass = false; } } Console.WriteLine(pass ? "All Passed" : "Failure Present"); Console.ReadLine(); }
[DllImport("legacy.dll")] private static extern void MyLegacyFunction(byte[] data); [DllImport("legacy.dll")] private static extern void MyLegacyFunction(string comment);
public static void TheLegacyWisperer(byte[] data) { byte[] realData = new byte[data.Length + 4 /* _length */ + 1 /* _terminator */ ]; byte[] lengthBytes = BitConverter.GetBytes(data.Length); Array.Copy(lengthBytes, realData, 4); Array.Copy(data, 0, realData, 4, data.Length); // realData[end] is equal to 0 in any case. MyLegacyFunction(realData); }
using System; using System.Collections.Generic; using System.Text; using System.Text.RegularExpressions; using System.Linq; // // Base16k.cpp : Variant of base64 used to efficiently encode binary into Unicode UTF16 strings. Based on work by // Markus Scherer at https://sites.google.com/site/markusicu/unicode/base16k // // This code is hereby placed in the Public Domain. // Jim Beveridge, November 29, 2011. // // C# port of http://qualapps.blogspot.com/2011/11/base64-for-unicode-utf16.html // This code is hereby placed in the Public Domain. // J. Daniel Smith, February 23, 2015 // namespace JDanielSmith { public static partial class Convert { /// <summary> /// Encode a binary array into a Base16k string for Unicode. /// </summary> public static string ToBase16kString(byte[] inArray) { int len = inArray.Length; var sb = new StringBuilder(len*6/5); sb.Append(len); int code = 0; for (int i=0; i<len; ++i) { byte byteValue = inArray[i]; switch (i%7) { case 0: code = byteValue<<6; break; case 1: code |= byteValue>>2; code += 0x5000; sb.Append(System.Convert.ToChar(code)); code = (byteValue&3)<<12; break; case 2: code |= byteValue<<4; break; case 3: code |= byteValue>>4; code+=0x5000; sb.Append(System.Convert.ToChar(code)); code = (byteValue&0xf)<<10; break; case 4: code |= byteValue<<2; break; case 5: code|=byteValue>>6; code+=0x5000; sb.Append(System.Convert.ToChar(code)); code=(byteValue&0x3f)<<8; break; case 6: code|=byteValue; code+=0x5000; sb.Append(System.Convert.ToChar(code)); code=0; break; } } // emit a character for remaining bits if (len%7 != 0) { code += 0x5000; sb.Append(System.Convert.ToChar(code)); } return sb.ToString(); } /// <summary> /// Decode a Base16k string for Unicode into a binary array. /// </summary> public static byte[] FromBase16kString(string s) { // read the length var r = new Regex(@"^\d+", RegexOptions.None, matchTimeout: TimeSpan.FromMilliseconds(100)); Match m = r.Match(s); if (!m.Success) return null; int length; if (!Int32.TryParse(m.Value, out length)) return null; var buf = new List<byte>(length); int pos=0; // position in s while ((pos < s.Length) && (s[pos] >= '0' && s[pos] <= '9')) ++pos; // decode characters to bytes int i = 0; // byte position modulo 7 (0..6 wrapping around) int code=0; byte byteValue=0; while (length-- > 0) { if (((1<<i)&0x2b)!=0) { // fetch another Han character at i=0, 1, 3, 5 if(pos >= s.Length) { // Too few Han characters representing binary data. System.Diagnostics.Debug.Assert(pos < s.Length); return null; } code=s[pos++]-0x5000; } switch (i%7) { case 0: byteValue = System.Convert.ToByte(code>>6); buf.Add(byteValue); byteValue = System.Convert.ToByte((code&0x3f)<<2); break; case 1: byteValue |= System.Convert.ToByte(code>>12); buf.Add(byteValue); break; case 2: byteValue = System.Convert.ToByte((code>>4)&0xff); buf.Add(byteValue); byteValue = System.Convert.ToByte((code&0xf)<<4); break; case 3: byteValue |= System.Convert.ToByte(code>>10); buf.Add(byteValue); break; case 4: byteValue = System.Convert.ToByte((code>>2)&0xff); buf.Add(byteValue); byteValue = System.Convert.ToByte((code&3)<<6); break; case 5: byteValue |= System.Convert.ToByte(code>>8); buf.Add(byteValue); break; case 6: byteValue = System.Convert.ToByte(code&0xff); buf.Add(byteValue); break; } // advance to the next byte position if(++i==7) i=0; } return buf.ToArray(); } } } namespace Base16kCS { class Program { static void Main(string[] args) { var drand = new Random(); // Create 500 different binary objects, then encode and decode them. // The first 16 objects will have length 0,1,2 ... 16 to test boundary conditions. for (int loop = 0; loop < 500; ++loop) { Console.WriteLine("{0}", loop); int dw = drand.Next(128000); var org = new List<byte>(dw); for (int i = 0; i < dw; ++i) org.Add(Convert.ToByte(drand.Next(256))); if (loop < 16) org = org.Take(loop).ToList(); string wstr = JDanielSmith.Convert.ToBase16kString(org.ToArray()); byte[] bin = JDanielSmith.Convert.FromBase16kString(wstr); System.Diagnostics.Debug.Assert(org.SequenceEqual(bin)); } } } }