Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/algorithm/11.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C# 字节数组的Base-N编码_C#_Algorithm_Encoding_Bit Manipulation_Radix - Fatal编程技术网

C# 字节数组的Base-N编码

C# 字节数组的Base-N编码,c#,algorithm,encoding,bit-manipulation,radix,C#,Algorithm,Encoding,Bit Manipulation,Radix,几天前,我遇到了Base-36编码的字节数组。然而,接下来的答案并没有涉及到解码回字节数组,或者可能重用答案来执行不同基(基数)的编码 链接问题的答案使用BigInteger。因此,就实现而言,基数及其数字可以参数化 但BigInteger的问题是,我们将输入视为一个假定的整数。然而,我们的输入,字节数组,只是一系列不透明的值 如果字节数组以一系列零字节结尾,例如{0xFF,0x7F,0x00,0x00},则在回答中使用算法时,这些字节将丢失(仅编码{0xFF,0x7F}) 如果最后一个非零字

几天前,我遇到了Base-36编码的字节数组。然而,接下来的答案并没有涉及到解码回字节数组,或者可能重用答案来执行不同基(基数)的编码

链接问题的答案使用BigInteger。因此,就实现而言,基数及其数字可以参数化

但BigInteger的问题是,我们将输入视为一个假定的整数。然而,我们的输入,字节数组,只是一系列不透明的值

  • 如果字节数组以一系列零字节结尾,例如{0xFF,0x7F,0x00,0x00},则在回答中使用算法时,这些字节将丢失(仅编码{0xFF,0x7F})
  • 如果最后一个非零字节设置了符号位,则前一个零字节将被消耗,因为它被视为BigInt的符号分隔符。因此{0xFF,0xFF,0x00,0x00}将仅编码为{0xFF,0xFF,0x00}

NET程序员如何使用BigInteger创建一个合理高效的基数不可知编码器,具有解码支持,加上处理endian的能力,以及“处理”丢失的结尾零字节的能力?

编辑[2020/01/26]:FWIW,以及它的live-On-side my

edit[2016/04/19]:如果您喜欢异常,您可能希望将一些解码实现代码更改为抛出
InvalidDataException
,而不是只返回null

edit[2014/09/14]:我在Encode()中添加了一个“HACK”来处理输入中最后一个字节有符号的情况(如果要转换为sbyte)。我现在能想到的唯一明智的解决方案就是调整()的大小数组是一个接一个的。此情况下的其他单元测试已通过,但我没有重新运行perf代码来解释此类情况。如果您能提供帮助,请始终在Encode()的输入末尾包含一个伪0字节,以避免额外的分配

用法 我创建了一个RadiXencode类(可在“代码”部分找到),该类使用三个参数进行初始化:

  • 基数数字作为字符串(长度当然决定实际基数)
  • 输入字节数组的假定字节顺序(endian)
  • 以及用户是否希望编码/解码逻辑确认结束零字节
  • 要创建一个Base-36编码,只需很少的endian输入,并且关于结束零字节:

    const string k_base36_digits = "0123456789abcdefghijklmnopqrstuvwxyz";
    var base36_no_zeros = new RadixEncoding(k_base36_digits, EndianFormat.Little, false);
    
    然后实际执行编码/解码:

    const string k_input = "A test 1234";
    byte[] input_bytes = System.Text.Encoding.UTF8.GetBytes(k_input);
    string encoded_string = base36_no_zeros.Encode(input_bytes);
    byte[] decoded_bytes = base36_no_zeros.Decode(encoded_string);
    
    演出 使用Diagnostics.Stopwatch计时,在i7 860@2.80GHz上运行。计时EXE自行运行,而不是在调试器下运行

    编码是用上面相同的k_base36_数字字符串EndianFormat.Little初始化的,并确认了结束零字节(即使UTF8字节没有任何额外的结束零字节)

    对“测试1234”的UTF8字节进行1000000次编码需要2.6567905秒
    要解码相同的字符串,相同的时间需要3.3916248秒

    对“测试1234.稍微大一点!”的UTF8字节进行编码100000次需要1.1577325秒
    要解码相同的字符串,相同的时间需要1.244326秒

    代码 如果您没有,您将不得不使用If/throw代码重新实现契约

    using System;
    using System.Collections.Generic;
    using System.Numerics;
    using Contract = System.Diagnostics.Contracts.Contract;
    
    public enum EndianFormat
    {
        /// <summary>Least Significant Bit order (lsb)</summary>
        /// <remarks>Right-to-Left</remarks>
        /// <see cref="BitConverter.IsLittleEndian"/>
        Little,
        /// <summary>Most Significant Bit order (msb)</summary>
        /// <remarks>Left-to-Right</remarks>
        Big,
    };
    
    /// <summary>Encodes/decodes bytes to/from a string</summary>
    /// <remarks>
    /// Encoded string is always in big-endian ordering
    /// 
    /// <p>Encode and Decode take a <b>includeProceedingZeros</b> parameter which acts as a work-around
    /// for an edge case with our BigInteger implementation.
    /// MSDN says BigInteger byte arrays are in LSB->MSB ordering. So a byte buffer with zeros at the 
    /// end will have those zeros ignored in the resulting encoded radix string.
    /// If such a loss in precision absolutely cannot occur pass true to <b>includeProceedingZeros</b>
    /// and for a tiny bit of extra processing it will handle the padding of zero digits (encoding)
    /// or bytes (decoding).</p>
    /// <p>Note: doing this for decoding <b>may</b> add an extra byte more than what was originally 
    /// given to Encode.</p>
    /// </remarks>
    // Based on the answers from http://codereview.stackexchange.com/questions/14084/base-36-encoding-of-a-byte-array/
    public class RadixEncoding
    {
        const int kByteBitCount = 8;
    
        readonly string kDigits;
        readonly double kBitsPerDigit;
        readonly BigInteger kRadixBig;
        readonly EndianFormat kEndian;
        readonly bool kIncludeProceedingZeros;
    
        /// <summary>Numerial base of this encoding</summary>
        public int Radix { get { return kDigits.Length; } }
        /// <summary>Endian ordering of bytes input to Encode and output by Decode</summary>
        public EndianFormat Endian { get { return kEndian; } }
        /// <summary>True if we want ending zero bytes to be encoded</summary>
        public bool IncludeProceedingZeros { get { return kIncludeProceedingZeros; } }
    
        public override string ToString()
        {
            return string.Format("Base-{0} {1}", Radix.ToString(), kDigits);
        }
    
        /// <summary>Create a radix encoder using the given characters as the digits in the radix</summary>
        /// <param name="digits">Digits to use for the radix-encoded string</param>
        /// <param name="bytesEndian">Endian ordering of bytes input to Encode and output by Decode</param>
        /// <param name="includeProceedingZeros">True if we want ending zero bytes to be encoded</param>
        public RadixEncoding(string digits,
            EndianFormat bytesEndian = EndianFormat.Little, bool includeProceedingZeros = false)
        {
            Contract.Requires<ArgumentNullException>(digits != null);
            int radix = digits.Length;
    
            kDigits = digits;
            kBitsPerDigit = System.Math.Log(radix, 2);
            kRadixBig = new BigInteger(radix);
            kEndian = bytesEndian;
            kIncludeProceedingZeros = includeProceedingZeros;
        }
    
        // Number of characters needed for encoding the specified number of bytes
        int EncodingCharsCount(int bytesLength)
        {
            return (int)Math.Ceiling((bytesLength * kByteBitCount) / kBitsPerDigit);
        }
        // Number of bytes needed to decoding the specified number of characters
        int DecodingBytesCount(int charsCount)
        {
            return (int)Math.Ceiling((charsCount * kBitsPerDigit) / kByteBitCount);
        }
    
        /// <summary>Encode a byte array into a radix-encoded string</summary>
        /// <param name="bytes">byte array to encode</param>
        /// <returns>The bytes in encoded into a radix-encoded string</returns>
        /// <remarks>If <paramref name="bytes"/> is zero length, returns an empty string</remarks>
        public string Encode(byte[] bytes)
        {
            Contract.Requires<ArgumentNullException>(bytes != null);
            Contract.Ensures(Contract.Result<string>() != null);
    
            // Don't really have to do this, our code will build this result (empty string),
            // but why not catch the condition before doing work?
            if (bytes.Length == 0) return string.Empty;
    
            // if the array ends with zeros, having the capacity set to this will help us know how much
            // 'padding' we will need to add
            int result_length = EncodingCharsCount(bytes.Length);
            // List<> has a(n in-place) Reverse method. StringBuilder doesn't. That's why.
            var result = new List<char>(result_length);
    
            // HACK: BigInteger uses the last byte as the 'sign' byte. If that byte's MSB is set, 
            // we need to pad the input with an extra 0 (ie, make it positive)
            if ( (bytes[bytes.Length-1] & 0x80) == 0x80 )
                Array.Resize(ref bytes, bytes.Length+1);
    
            var dividend = new BigInteger(bytes);
            // IsZero's computation is less complex than evaluating "dividend > 0"
            // which invokes BigInteger.CompareTo(BigInteger)
            while (!dividend.IsZero)
            {
                BigInteger remainder;
                dividend = BigInteger.DivRem(dividend, kRadixBig, out remainder);
                int digit_index = System.Math.Abs((int)remainder);
                result.Add(kDigits[digit_index]);
            }
    
            if (kIncludeProceedingZeros)
                for (int x = result.Count; x < result.Capacity; x++)
                    result.Add(kDigits[0]); // pad with the character that represents 'zero'
    
            // orientate the characters in big-endian ordering
            if (kEndian == EndianFormat.Little)
                result.Reverse();
            // If we didn't end up adding padding, ToArray will end up returning a TrimExcess'd array, 
            // so nothing wasted
            return new string(result.ToArray());
        }
    
        void DecodeImplPadResult(ref byte[] result, int padCount)
        {
            if (padCount > 0)
            {
                int new_length = result.Length + DecodingBytesCount(padCount);
                Array.Resize(ref result, new_length); // new bytes will be zero, just the way we want it
            }
        }
        #region Decode (Little Endian)
        byte[] DecodeImpl(string chars, int startIndex = 0)
        {
            var bi = new BigInteger();
            for (int x = startIndex; x < chars.Length; x++)
            {
                int i = kDigits.IndexOf(chars[x]);
                if (i < 0) return null; // invalid character
                bi *= kRadixBig;
                bi += i;
            }
    
            return bi.ToByteArray();
        }
        byte[] DecodeImplWithPadding(string chars)
        {
            int pad_count = 0;
            for (int x = 0; x < chars.Length; x++, pad_count++)
                if (chars[x] != kDigits[0]) break;
    
            var result = DecodeImpl(chars, pad_count);
            DecodeImplPadResult(ref result, pad_count);
    
            return result;
        }
        #endregion
        #region Decode (Big Endian)
        byte[] DecodeImplReversed(string chars, int startIndex = 0)
        {
            var bi = new BigInteger();
            for (int x = (chars.Length-1)-startIndex; x >= 0; x--)
            {
                int i = kDigits.IndexOf(chars[x]);
                if (i < 0) return null; // invalid character
                bi *= kRadixBig;
                bi += i;
            }
    
            return bi.ToByteArray();
        }
        byte[] DecodeImplReversedWithPadding(string chars)
        {
            int pad_count = 0;
            for (int x = chars.Length - 1; x >= 0; x--, pad_count++)
                if (chars[x] != kDigits[0]) break;
    
            var result = DecodeImplReversed(chars, pad_count);
            DecodeImplPadResult(ref result, pad_count);
    
            return result;
        }
        #endregion
        /// <summary>Decode a radix-encoded string into a byte array</summary>
        /// <param name="radixChars">radix string</param>
        /// <returns>The decoded bytes, or null if an invalid character is encountered</returns>
        /// <remarks>
        /// If <paramref name="radixChars"/> is an empty string, returns a zero length array
        /// 
        /// Using <paramref name="IncludeProceedingZeros"/> has the potential to return a buffer with an
        /// additional zero byte that wasn't in the input. So a 4 byte buffer was encoded, this could end up
        /// returning a 5 byte buffer, with the extra byte being null.
        /// </remarks>
        public byte[] Decode(string radixChars)
        {
            Contract.Requires<ArgumentNullException>(radixChars != null);
    
            if (kEndian == EndianFormat.Big)
                return kIncludeProceedingZeros ? DecodeImplReversedWithPadding(radixChars) : DecodeImplReversed(radixChars);
            else
                return kIncludeProceedingZeros ? DecodeImplWithPadding(radixChars) : DecodeImpl(radixChars);
        }
    };
    
    使用系统;
    使用System.Collections.Generic;
    使用系统数字;
    使用合同=System.Diagnostics.Contracts.Contract;
    公共枚举EndianFormat
    {
    ///最低有效位顺序(lsb)
    ///从右向左
    /// 
    少,,
    ///最高有效位顺序(msb)
    ///从左到右
    大的
    };
    ///编码/解码字符串中的字节
    /// 
    ///编码字符串始终采用大端排序
    /// 
    ///编码和解码采用IncludeProcedingZeros参数,该参数作为一种变通方法
    ///对于带有BigInteger实现的边缘情况。
    ///MSDN说,双整数字节数组是按LSB->MSB顺序排列的
    ///end将在生成的编码基数字符串中忽略这些零。
    ///如果绝对不能发生精度损失,则将true传递给IncludeProcedingZero
    ///对于一点点额外的处理,它将处理零位的填充(编码)
    ///或字节(解码)

    ///注意:在解码时这样做可能会比原来增加一个额外的字节 ///用于编码的

    /// //根据来自http://codereview.stackexchange.com/questions/14084/base-36-encoding-of-a-byte-array/ 公共类RADIXENCODE { 常量int kByteBitCount=8; 只读字符串kDigits; 只读双位数字; 只读大整数kRadixBig; 只读endian格式kEndian; 只读bool kincludeproceedingzero; ///这个编码的数字基数 公共整数基数{get{return kDigits.Length;} ///输入到编码和解码输出的字节的Endian排序 public EndianFormat Endian{get{return kEndian;}} ///如果要对结尾的零字节进行编码,则为True 公共bool IncludeProceedingZeros{get{return kIncludeProceedingZeros;}} 公共重写字符串ToString() { 返回string.Format(“Base-{0}{1}”,Radix.ToString(),kDigits); } ///使用给定字符作为基数中的数字创建基数编码器 ///用于基数编码字符串的数字 ///输入到编码和解码输出的字节的Endian排序 ///如果要对结尾的零字节进行编码,则为True 公共半径编码(字符串数字, EndianFormat bytesEndian=EndianFormat.Little,bool includeProceedingZeros=false) { 合同。需要(位数!=null); int基数=数字。长度; kDigits=数字; kBitsPerDigit=System.Math.Log(基数为2); kRadixBig=新的大整数(基数); 肯迪安=拜特森迪安; kIncludeProceedingZeros=includeProceedingZeros; } //ch数
    using System;
    using Microsoft.VisualStudio.TestTools.UnitTesting;
    
    static bool ArraysCompareN<T>(T[] input, T[] output)
        where T : IEquatable<T>
    {
        if (output.Length < input.Length) return false;
        for (int x = 0; x < input.Length; x++)
            if(!output[x].Equals(input[x])) return false;
    
        return true;
    }
    static bool RadixEncodingTest(RadixEncoding encoding, byte[] bytes)
    {
        string encoded = encoding.Encode(bytes);
        byte[] decoded = encoding.Decode(encoded);
    
        return ArraysCompareN(bytes, decoded);
    }
    [TestMethod]
    public void TestRadixEncoding()
    {
        const string k_base36_digits = "0123456789abcdefghijklmnopqrstuvwxyz";
        var base36 = new RadixEncoding(k_base36_digits, EndianFormat.Little, true);
        var base36_no_zeros = new RadixEncoding(k_base36_digits, EndianFormat.Little, true);
    
        byte[] ends_with_zero_neg = { 0xFF, 0xFF, 0x00, 0x00 };
        byte[] ends_with_zero_pos = { 0xFF, 0x7F, 0x00, 0x00 };
        byte[] text = System.Text.Encoding.ASCII.GetBytes("A test 1234");
    
        Assert.IsTrue(RadixEncodingTest(base36, ends_with_zero_neg));
        Assert.IsTrue(RadixEncodingTest(base36, ends_with_zero_pos));
        Assert.IsTrue(RadixEncodingTest(base36_no_zeros, text));
    }
    
    const string input = "hello world";
    
    public static void Main(string[] args)
    {
    
      using (System.Security.Cryptography.MD5 md5 = System.Security.Cryptography.MD5.Create())
      {
        byte[] inputBytes = System.Text.Encoding.ASCII.GetBytes(input);
    
        byte[] hashBytes = md5.ComputeHash(inputBytes);
    
        // Convert the byte array to hexadecimal string
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < hashBytes.Length; i++)
        {
          sb.Append(hashBytes[i].ToString("X2"));
        }
        Console.WriteLine(sb.ToString());
      }
    }
    
    private static final char[] BASE16_CHARS = "0123456789abcdef".toCharArray();
    private static final BigInteger BIGINT_16 = BigInteger.valueOf(16);
    
    private static final char[] BASE36_CHARS = "0123456789abcdefghijklmnopqrstuvwxyz".toCharArray();
    private static final BigInteger BIGINT_36 = BigInteger.valueOf(36);
    
    public static String toBaseX(byte[] bytes, BigInteger base, char[] chars)
    {
        if (bytes == null) {
            return null;
        }
    
        final int bitsPerByte = 8;
        double bitsPerDigit = Math.log(chars.length) / Math.log(2);
    
        // Number of chars to encode specified bytes
        int size = (int) Math.ceil((bytes.length * bitsPerByte) / bitsPerDigit);
    
        StringBuilder sb = new StringBuilder(size);
    
        for (BigInteger value = new BigInteger(bytes); !value.equals(BigInteger.ZERO);) {
            BigInteger[] quotientAndRemainder = value.divideAndRemainder(base);
            sb.insert(0, chars[Math.abs(quotientAndRemainder[1].intValue())]);
            value = quotientAndRemainder[0];
        }
    
        return sb.toString();
    }