String 为字符串生成校验和_String_Algorithm_Checksum

String 为字符串生成校验和

string algorithm

String 为字符串生成校验和,string,algorithm,checksum,String,Algorithm,Checksum,我想为字符串/数据生成校验和 1. The same data should produce the same Checksum 2. Two different data strings can't product same checksum. Random collision of 0.1% can be negligible 3. No encryption/decryption of data 4. Checksum length need not be too huge and co

我想为字符串/数据生成校验和

1. The same data should produce the same Checksum
2. Two different data strings can't product same checksum. Random collision of 0.1% can be negligible
3. No encryption/decryption of data 
4. Checksum length need not be too huge and contains letters and characters.
5. Must be too fast and efficient. Imagine generating checksum(s) for 100 Mb of text data should be in less than 5mins. Generating 1000 checksums for less than 1 KB of each segment data should be in less than 10 seconds.

非常感谢任何算法或实现参考和建议。

您可以编写自定义哈希函数：（c++）

long int散列（字符串s）{
长k=7；
对于（int i=0；i


这将为您提供一个良好的（大多数示例无冲突）哈希值。
您可以编写一个自定义哈希函数：（c++）
long int散列（字符串s）{
长k=7；
对于（int i=0；i

这将为您提供一个良好的（大多数样本无冲突）哈希值。
一个非常常见的快速校验和是CRC-32，一种32位多项式循环冗余校验。下面是CRC-32的三种C实现，它们在速度和复杂度上有所不同：（这是从）
#包括
#包括
//------------------------------反面--------------------------------
//反转（反射）32位字中的位。
无符号反向（无符号x）{
x=（（x&0x555555）>1）和0x555555）；
x=（（x&0x33333333）>2）和0x33333333）；
x=（（x&0x0f0f）>4）和0x0f0f）；
x=（x8）&0xFF00）|（x>>24）；
返回x；
}
//------------------------------------crc32a--------------------------------
/*这是没有优化的基本CRC算法。它遵循
逻辑电路尽可能紧密*/
无符号整数crc32a（无符号字符*消息）{
int i，j；
无符号整数字节，crc；
i=0；
crc=0xFFFFFF；
while（消息[i]！=0）{
字节=消息[i]；//获取下一个字节。
字节=反向（字节）；//32位反向。
对于（j=0；j>1）^（0xEDB88320&掩码）；
}
表[字节]=crc；
}
}
/*通过表格设置，现在计算CRC*/
i=0；
crc=0xFFFFFF；
而（（字节=消息[i]）！=0）{
crc=（crc>>8）^表[（crc^字节）&0xFF]；
i=i+1；
}
返回~crc；
}

简单地说，您将获得比您可能吸收的更多的信息。
一种非常常见的快速校验和是CRC-32，一种32位多项式循环冗余校验。下面是CRC-32的三种C实现，它们在速度和复杂度上有所不同：（这是从）
#包括
#包括
//------------------------------反面--------------------------------
//反转（反射）32位字中的位。
无符号反向（无符号x）{
x=（（x&0x555555）>1）和0x555555）；
x=（（x&0x33333333）>2）和0x33333333）；
x=（（x&0x0f0f）>4）和0x0f0f）；
x=（x8）&0xFF00）|（x>>24）；
返回x；
}
//------------------------------------crc32a--------------------------------
/*这是没有优化的基本CRC算法。它遵循
逻辑电路尽可能紧密*/
无符号整数crc32a（无符号字符*消息）{
int i，j；
无符号整数字节，crc；
i=0；
crc=0xFFFFFF；
while（消息[i]！=0）{
字节=消息[i]；//获取下一个字节。
字节=反向（字节）；//32位反向。
对于（j=0；j>1）^（0xEDB88320&掩码）；
}
表[字节]=crc；
}
}
/*通过表格设置，现在计算CRC*/
i=0；
crc=0xFFFFFF；
而（（字节=消息[i]）！=0）{
crc=（crc>>8）^表[（crc^字节）&0xFF]；
i=i+1；
}
返回~crc；
}

如果你只是简单地说，你会得到比你所能吸收的更多的信息。如果你不能加密/解密，就使用散列。@vish4071，最好建议一些散列技术而不是“使用散列”。Sha-1、Sha-2、MD5…等等。或定义自定义哈希函数。如果0.1%的冲突容忍度意味着只需要1000个可能的校验和，那么校验和可以占用10位（2^10=1024）。一个非常简单、非常快速的校验和，只需将所有字节模16相加即可。看看杂音散列，它的冲突率非常低，性能也可以接受（在我的情况下）：如果不能加密/解密，请使用散列。@vish4071，最好建议使用一些散列技术，而不是“使用散列”。Sha-1、Sha-2、MD5…等等。或定义自定义哈希函数。如果0.1%的冲突容忍度意味着只需要1000个可能的校验和，那么校验和可以占用10位（2^10=1024）。一个非常简单、非常快速的校验和，只需将所有字节加上模16即可。看看杂音散列，它的冲突率非常低，性能也可以接受（在我的例子中）：与23和13相乘是随机的，或者后面有逻辑吗？与23和13相乘是随机的，或者后面有逻辑吗？
long long int hash(String s){
    long long k = 7;
    for(int i = 0; i < s.length(); i++){
        k *= 23;
        k += s[i];
        k *= 13;
        k %= 1000000009;
    }
    return k;
}

#include <stdio.h>
#include <stdlib.h>

// ---------------------------- reverse --------------------------------

// Reverses (reflects) bits in a 32-bit word.
unsigned reverse(unsigned x) {
   x = ((x & 0x55555555) <<  1) | ((x >>  1) & 0x55555555);
   x = ((x & 0x33333333) <<  2) | ((x >>  2) & 0x33333333);
   x = ((x & 0x0F0F0F0F) <<  4) | ((x >>  4) & 0x0F0F0F0F);
   x = (x << 24) | ((x & 0xFF00) << 8) |
       ((x >> 8) & 0xFF00) | (x >> 24);
   return x;
}

// ----------------------------- crc32a --------------------------------

/* This is the basic CRC algorithm with no optimizations. It follows the
logic circuit as closely as possible. */

unsigned int crc32a(unsigned char *message) {
   int i, j;
   unsigned int byte, crc;

   i = 0;
   crc = 0xFFFFFFFF;
   while (message[i] != 0) {
      byte = message[i];            // Get next byte.
      byte = reverse(byte);         // 32-bit reversal.
      for (j = 0; j <= 7; j++) {    // Do eight times.
         if ((int)(crc ^ byte) < 0)
              crc = (crc << 1) ^ 0x04C11DB7;
         else crc = crc << 1;
         byte = byte << 1;          // Ready next msg bit.
      }
      i = i + 1;
   }
   return reverse(~crc);
}

// ----------------------------- crc32b --------------------------------

/* This is the basic CRC-32 calculation with some optimization but no
table lookup. The the byte reversal is avoided by shifting the crc reg
right instead of left and by using a reversed 32-bit word to represent
the polynomial.
   When compiled to Cyclops with GCC, this function executes in 8 + 72n
instructions, where n is the number of bytes in the input message. It
should be doable in 4 + 61n instructions.
   If the inner loop is strung out (approx. 5*8 = 40 instructions),
it would take about 6 + 46n instructions. */

unsigned int crc32b(unsigned char *message) {
   int i, j;
   unsigned int byte, crc, mask;

   i = 0;
   crc = 0xFFFFFFFF;
   while (message[i] != 0) {
      byte = message[i];            // Get next byte.
      crc = crc ^ byte;
      for (j = 7; j >= 0; j--) {    // Do eight times.
         mask = -(crc & 1);
         crc = (crc >> 1) ^ (0xEDB88320 & mask);
      }
      i = i + 1;
   }
   return ~crc;
}

// ----------------------------- crc32c --------------------------------

/* This is derived from crc32b but does table lookup. First the table
itself is calculated, if it has not yet been set up.
Not counting the table setup (which would probably be a separate
function), when compiled to Cyclops with GCC, this function executes in
7 + 13n instructions, where n is the number of bytes in the input
message. It should be doable in 4 + 9n instructions. In any case, two
of the 13 or 9 instrucions are load byte.
   This is Figure 14-7 in the text. */

unsigned int crc32c(unsigned char *message) {
   int i, j;
   unsigned int byte, crc, mask;
   static unsigned int table[256];

   /* Set up the table, if necessary. */

   if (table[1] == 0) {
      for (byte = 0; byte <= 255; byte++) {
         crc = byte;
         for (j = 7; j >= 0; j--) {    // Do eight times.
            mask = -(crc & 1);
            crc = (crc >> 1) ^ (0xEDB88320 & mask);
         }
         table[byte] = crc;
      }
   }

   /* Through with table setup, now calculate the CRC. */

   i = 0;
   crc = 0xFFFFFFFF;
   while ((byte = message[i]) != 0) {
      crc = (crc >> 8) ^ table[(crc ^ byte) & 0xFF];
      i = i + 1;
   }
   return ~crc;
}