Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/csharp/332.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C# 将大整数压缩为尽可能小的字符串_C#_Asp.net_Vb.net_Compression - Fatal编程技术网

C# 将大整数压缩为尽可能小的字符串

C# 将大整数压缩为尽可能小的字符串,c#,asp.net,vb.net,compression,C#,Asp.net,Vb.net,Compression,我有一堆10位数的整数,我正在传递一个URL。比如: "4294965286", "2292964213". 它们将始终为正,并且始终为10位数字 我想将这些整数压缩成最小的形式,仍然可以在URL中使用(也就是字母和数字都很好),然后再解压缩它们。我已经研究过使用gzipstream,但它会创建更大的字符串,而不是更短的字符串 我目前正在使用asp.net,因此vb.net或c#解决方案是最好的 谢谢是的。GZIP是一种压缩算法,它既需要可压缩的数据,又有开销(帧和字典等)。应该使用编码算法 使

我有一堆10位数的整数,我正在传递一个URL。比如: "4294965286", "2292964213". 它们将始终为正,并且始终为10位数字

我想将这些整数压缩成最小的形式,仍然可以在URL中使用(也就是字母和数字都很好),然后再解压缩它们。我已经研究过使用gzipstream,但它会创建更大的字符串,而不是更短的字符串

我目前正在使用asp.net,因此vb.net或c#解决方案是最好的

谢谢

是的。GZIP是一种压缩算法,它既需要可压缩的数据,又有开销(帧和字典等)。应该使用编码算法

使用“简单”的方法。

也就是说,将数字(在字符串中以10为基数表示)转换为表示该数字的实际字节序列(5个字节将包含10位十进制数字),然后将结果转换为以64为基数。每个base-64字符存储6位信息(小数~3.3位/字符),因此大小约为一半以上(在这种情况下,需要6*base-64输出字符)

此外,由于输入/输出长度可从数据本身获得,“123”可能最初(在被base-64编码之前)被转换为1字节,“30000”被转换为2字节,等等。如果不是所有数字的长度都大致相同,这将是有利的

快乐编码


*使用base-64需要6个输出字符

编辑:一开始我错了,我说“2.3位/字符”表示十进制,并建议不到一半的字符是必需的。我已经更新了上面的答案,并在这里显示了(应该是正确的)数学,其中
lg(n)
是以2为底的日志

表示输入数字所需的输入位数为
bits/char*chars
->
lg(10)*10
(或仅
lg(9999999999)
)->
~33.2位
。使用jball的操作首先移位数字,所需的位数为
lg(899999999)
->
~33.06位
。但是,在这种特殊情况下,这种转换无法提高效率(需要将输入位的数量减少到30位或以下才能产生差异)

因此,我们试图找到一个x(base-64编码中的字符数),以便:

lg(64)*x=33.2
->
6*x=33.2
->
x~5.53
。当然,5个半字符是没有意义的,因此我们选择6作为编码base-64编码中最多99999999的值所需的最大字符数。这是原来10个字符的一半多一点

然而,应该注意的是,要在base-64输出中仅获取6个字符,需要使用非标准的base-64编码器或进行少量操作(大多数base-64编码器只处理整个字节)。这是因为在最初的5个“必需字节”中,40位中只有34位被使用(前6位总是0)。对所有40位进行编码需要7个base-64字符

这是对Guffa在回答中发布的代码的修改(如果你喜欢的话,去给他投票),只需要6个base-64字符。请参阅Guffa回答中的其他注释,因为下面的方法不使用URL友好映射

byte[] data = BitConverter.GetBytes(value);
// make data big-endian if needed
if (BitConverter.IsLittleEndian) {
   Array.Reverse(data);
}
// first 5 base-64 character always "A" (as first 30 bits always zero)
// only need to keep the 6 characters (36 bits) at the end 
string base64 = Convert.ToBase64String(data, 0, 8).Substring(5,6);

byte[] data2 = new byte[8];
// add back in all the characters removed during encoding
Convert.FromBase64String("AAAAA" + base64 + "=").CopyTo(data2, 0);
// reverse again from big to little-endian
if (BitConverter.IsLittleEndian) {
   Array.Reverse(data2);
}
long decoded = BitConverter.ToInt64(data2, 0);

让它更“漂亮”

由于base-64已被确定为使用6个字符,因此任何仍将输入位编码为6个字符的编码变体将创建同样小的输出。使用a不会完全起作用,因为在base-32编码中,6个字符只能存储30位的信息(
lg(32)*6

然而,使用定制的base-48(或52/62)编码可以实现相同的输出大小。(基数48-62的优点是,它们只需要字母数字字符的子集,不需要符号;对于变体,可以选择避免“不明确”的符号,如1和“I”)。在base-48系统中,6个字符可以编码约33.5位(
lg(48)*6
)的信息,其刚好高于所需的约33.2位(或约33.06位)(
lg(10)*10

以下是概念证明:

// This does not "pad" values
string Encode(long inp, IEnumerable<char> map) {
    Debug.Assert(inp >= 0, "not implemented for negative numbers");

    var b = map.Count();
    // value -> character
    var toChar = map.Select((v, i) => new {Value = v, Index = i}).ToDictionary(i => i.Index, i => i.Value);
    var res = "";
    if (inp == 0) {
      return "" + toChar[0];
    }
    while (inp > 0) {
      // encoded least-to-most significant
      var val = (int)(inp % b);
      inp = inp / b;
      res += toChar[val];
    }
    return res;
}

long Decode(string encoded, IEnumerable<char> map) {
    var b = map.Count();
    // character -> value
    var toVal = map.Select((v, i) => new {Value = v, Index = i}).ToDictionary(i => i.Value, i => i.Index);      
    long res = 0;
    // go in reverse to mirror encoding
    for (var i = encoded.Length - 1; i >= 0; i--) {
      var ch = encoded[i];
      var val = toVal[ch];
      res = (res * b) + val;
    }
    return res;
}

void Main()
{
    // for a 48-bit base, omits l/L, 1, i/I, o/O, 0
    var map = new char [] {
        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K',
        'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
        'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
        'h', 'j', 'k', 'm', 'n', 'p', 'q', 'r', 's', 't',
        'u', 'v', 'x', 'y', 'z', '2', '3', '4',
    };
    var test = new long[] {0, 1, 9999999999, 4294965286, 2292964213, 1000000000};
    foreach (var t in test) {
        var encoded = Encode(t, map);
        var decoded = Decode(encoded, map);
        Console.WriteLine(string.Format("value: {0} encoded: {1}", t, encoded));
        if (t != decoded) {
            throw new Exception("failed for " + t);
        }
    }
}
//这不会“填充”值
字符串编码(长inp,IEnumerable映射){
Assert(inp>=0,“未针对负数实现”);
var b=map.Count();
//值->字符
var toChar=map.Select((v,i)=>new{Value=v,Index=i})。ToDictionary(i=>i.Index,i=>i.Value);
var res=“”;
如果(inp==0){
返回“+toChar[0];
}
而(inp>0){
//编码最低至最高有效
var val=(int)(inp%b);
inp=inp/b;
res+=toChar[val];
}
返回res;
}
长解码(字符串编码,IEnumerable映射){
var b=map.Count();
//字符->值
var toVal=map.Select((v,i)=>new{Value=v,Index=i}).ToDictionary(i=>i.Value,i=>i.Index);
长res=0;
//与镜像编码相反
对于(var i=encoded.Length-1;i>=0;i--){
var ch=编码的[i];
var-val=toVal[ch];
res=(res*b)+val;
}
返回res;
}
void Main()
{
//对于48位基址,省略l/l、1、i/i、o/o、0
变量映射=新字符[]{
‘A’、‘B’、‘C’、‘D’、‘E’、‘F’、‘G’、‘H’、‘J’、‘K’,
‘M’、‘N’、‘P’、‘Q’、‘R’、‘S’、‘T’、‘U’、‘V’、‘W’,
“X”、“Y”、“Z”、“a”、“b”、“c”、“d”、“e”、“f”、“g”,
‘h’、‘j’、‘k’、‘m’、‘n’、‘p’、‘q’、‘r’、‘s’、‘t’,
‘u’、‘v’、‘x’、‘y’、‘z’、‘2’、‘3’、‘4’,
};
var测试=新长[]{0,1999999999994294965286229296421100000000};
foreach(测试中的var t){
var encoded=编码(t,map);
var decoded=解码(编码,映射);
WriteLine(string.Format(“值:{0}编码:{1}”,t,编码));
如果(t!=已解码){
value: 0 encoded: A
value: 1 encoded: B
value: 9999999999 encoded: SrYsNt
value: 4294965286 encoded: ZNGEvT
value: 2292964213 encoded: rHd24J
value: 1000000000 encoded: TrNVzD
long value = 4294965286;

// get the value as an eight byte array (where the last three are zero)
byte[] data = BitConverter.GetBytes(value);
// encode the first five bytes
string base64 = Convert.ToBase64String(data, 0, 5).Substring(0, 7);
Console.WriteLine(base64);
Jvj//wA
// create an eight byte array
byte[] data = new byte[8];
// decode the text info five bytes and put in the array
Convert.FromBase64String(base64 + "=").CopyTo(data, 0);
// get the value from the array
long value = BitConverter.ToInt64(data, 0);

Console.WriteLine(value);
4294965286
var hashids = new Hashids('my salt', 1, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890');
var input = 4294965286;
var hex = input.toString(16); // 8 characters: fffff826
var hashid = hashids.encode(input); // 7 characters: 0LzaR1Y
var base64 = window.btoa(input).replace(/=+/, ''); // 14 characters: NDI5NDk2NTI4Ng
        public static string EncodeNumber(ulong input)
        {
            return EncodeNumber(input, Mapping85Bit);
        }

        // This does not "pad" values
        private static string EncodeNumber(ulong inp, char[] map)
        {
            // use ulong count instead of int since does not matter on x64 operating system.
            ulong cnt = (ulong)map.Length;
            // value -> character
            if (inp == 0)
            {
                return map[0].ToString();
            }
            var sb = new StringBuilder();
            while (inp > 0)
            {
                // encoded most-to-least significant
                ulong val = inp % cnt;
                inp = inp / cnt;
                sb.Insert(0, map[(int)val]);
            }
            return sb.ToString();
        }

        public static ulong DecodeNumber(string encoded)
        {
            return DecodeNumber(encoded, Mapping85Bit, Mapping85BitDict);
        }

        private static ulong DecodeNumber(string encoded, char[] map, Dictionary<char, ulong> charMapDict)
        {
            // use ulong count instead of int since does not matter on x64 operating system.
            ulong b = (ulong)map.Length;
            ulong res = 0;
            for (var i = 0; i < encoded.Length; i++)
            {
                char ch = encoded[i];
                if(!charMapDict.TryGetValue(ch, out ulong val))
                {
                    throw new ArgumentException($"Invalid encoded number: '{encoded}'. '{ch}' is not a valid character for this encoding.");
                }
                res = (res * b) + val;
            }
            return res;
        }



        // Windows file system reserved characters:     < > : " / \ | = * 

        /// <summary>
        /// Compatible with file system. Originates from ASCII table except starting like Base64Url and except windows path reserved chars. Skipped '/' and '\' to prevent path problems. Skipped ' for sql problems.
        /// https://www.ascii-code.com/
        /// Does not need to be encoded for json since it doesn't use \ and ". No encoding also needed for xml since &lt; &gt; are also not used. That is why it is also different to https://en.wikipedia.org/wiki/Ascii85
        /// </summary>
        public static readonly char[] Mapping85Bit = new char[] {
            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
            'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
            'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd',
            'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
            'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
            'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7',
            '8', '9', '-', '_', ' ', '!', '#', '$', '%', '&',
            '(', ')', '+', ',', '.', ';', '?', '@', '[', ']',
            '^', '`', '{', '}', '~'
        };
        private static readonly Dictionary<char, ulong> Mapping85BitDict = Mapping85Bit.Select((v, i) => new { Value = v, Index = (ulong)i }).ToDictionary(i => i.Value, i => i.Index);

    [Test]
    public void EncodeTest()
    {
        // 85Bit Encoding:
        Assert.AreEqual(EncodeNumber(85), "BA");
        Assert.AreEqual(EncodeNumber(86), "BB");
        Assert.AreEqual(EncodeNumber(3), "D");
        Assert.AreEqual(EncodeNumber(84), "~");

        Assert.AreEqual(EncodeNumber(0), "A");

        Assert.AreEqual(DecodeNumber("BA"), 85);

        Assert.AreEqual(DecodeNumber("BA"), 85);
        Assert.AreEqual(DecodeNumber("`"), 81);
    }