Java 如何获得CRC64分布式计算(利用其线性特性)?

Java 如何获得CRC64分布式计算(利用其线性特性)?,java,c,hash,distributed-computing,crc,Java,C,Hash,Distributed Computing,Crc,我需要散列存储在分布式FS上的相当大的文件。我能够以比整个文件更好的性能处理文件的部分,所以我希望能够对部分进行哈希计算,然后求和 我考虑将CRC64作为散列算法,但我不知道如何使用其理论上的“线性函数”属性,这样我就可以对文件的部分进行CRC求和。有什么建议吗?我错过了什么 我为什么要看CRC64的附加说明: 我可以控制文件块,但由于应用程序的性质,它们需要有不同的大小(最多1字节,不可能有任何固定块) 我知道CRC32实现(zlib),其中包括对部分进行CRC求和的方法,但我想要更广泛的方

我需要散列存储在分布式FS上的相当大的文件。我能够以比整个文件更好的性能处理文件的部分,所以我希望能够对部分进行哈希计算,然后求和

我考虑将
CRC64
作为散列算法,但我不知道如何使用其理论上的“线性函数”属性,这样我就可以对文件的部分进行CRC求和。有什么建议吗?我错过了什么

我为什么要看
CRC64
的附加说明:

  • 我可以控制文件块,但由于应用程序的性质,它们需要有不同的大小(最多1字节,不可能有任何固定块)
  • 我知道
    CRC32
    实现(
    zlib
    ),其中包括对部分进行CRC求和的方法,但我想要更广泛的方法。8字节对我来说很不错
  • 我知道CRC很快。我想从中获利,因为文件可能非常巨大(高达几Gb)

认为这通常非常有用,可以编写并提供:

/* crc64.c -- compute CRC-64
 * Copyright (C) 2013 Mark Adler
 * Version 1.4  16 Dec 2013  Mark Adler
 */

/*
  This software is provided 'as-is', without any express or implied
  warranty.  In no event will the author be held liable for any damages
  arising from the use of this software.

  Permission is granted to anyone to use this software for any purpose,
  including commercial applications, and to alter it and redistribute it
  freely, subject to the following restrictions:

  1. The origin of this software must not be misrepresented; you must not
     claim that you wrote the original software. If you use this software
     in a product, an acknowledgment in the product documentation would be
     appreciated but is not required.
  2. Altered source versions must be plainly marked as such, and must not be
     misrepresented as being the original software.
  3. This notice may not be removed or altered from any source distribution.

  Mark Adler
  madler@alumni.caltech.edu
 */

/* Compute CRC-64 in the manner of xz, using the ECMA-182 polynomial,
   bit-reversed, with one's complement pre and post processing.  Provide a
   means to combine separately computed CRC-64's. */

/* Version history:
   1.0  13 Dec 2013  First version
   1.1  13 Dec 2013  Fix comments in test code
   1.2  14 Dec 2013  Determine endianess at run time
   1.3  15 Dec 2013  Add eight-byte processing for big endian as well
                     Make use of the pthread library optional
   1.4  16 Dec 2013  Make once variable volatile for limited thread protection
 */

#include <stdio.h>
#include <inttypes.h>
#include <assert.h>

/* The include of pthread.h below can be commented out in order to not use the
   pthread library for table initialization.  In that case, the initialization
   will not be thread-safe.  That's fine, so long as it can be assured that
   there is only one thread using crc64(). */
#include <pthread.h>            /* link with -lpthread */

/* 64-bit CRC polynomial with these coefficients, but reversed:
    64, 62, 57, 55, 54, 53, 52, 47, 46, 45, 40, 39, 38, 37, 35, 33, 32,
    31, 29, 27, 24, 23, 22, 21, 19, 17, 13, 12, 10, 9, 7, 4, 1, 0 */
#define POLY UINT64_C(0xc96c5795d7870f42)

/* Tables for CRC calculation -- filled in by initialization functions that are
   called once.  These could be replaced by constant tables generated in the
   same way.  There are two tables, one for each endianess.  Since these are
   static, i.e. local, one should be compiled out of existence if the compiler
   can evaluate the endianess check in crc64() at compile time. */
static uint64_t crc64_little_table[8][256];
static uint64_t crc64_big_table[8][256];

/* Fill in the CRC-64 constants table. */
static void crc64_init(uint64_t table[][256])
{
    unsigned n, k;
    uint64_t crc;

    /* generate CRC-64's for all single byte sequences */
    for (n = 0; n < 256; n++) {
        crc = n;
        for (k = 0; k < 8; k++)
            crc = crc & 1 ? POLY ^ (crc >> 1) : crc >> 1;
        table[0][n] = crc;
    }

    /* generate CRC-64's for those followed by 1 to 7 zeros */
    for (n = 0; n < 256; n++) {
        crc = table[0][n];
        for (k = 1; k < 8; k++) {
            crc = table[0][crc & 0xff] ^ (crc >> 8);
            table[k][n] = crc;
        }
    }
}

/* This function is called once to initialize the CRC-64 table for use on a
   little-endian architecture. */
static void crc64_little_init(void)
{
    crc64_init(crc64_little_table);
}

/* Reverse the bytes in a 64-bit word. */
static inline uint64_t rev8(uint64_t a)
{
    uint64_t m;

    m = UINT64_C(0xff00ff00ff00ff);
    a = ((a >> 8) & m) | (a & m) << 8;
    m = UINT64_C(0xffff0000ffff);
    a = ((a >> 16) & m) | (a & m) << 16;
    return a >> 32 | a << 32;
}

/* This function is called once to initialize the CRC-64 table for use on a
   big-endian architecture. */
static void crc64_big_init(void)
{
    unsigned k, n;

    crc64_init(crc64_big_table);
    for (k = 0; k < 8; k++)
        for (n = 0; n < 256; n++)
            crc64_big_table[k][n] = rev8(crc64_big_table[k][n]);
}

/* Run the init() function exactly once.  If pthread.h is not included, then
   this macro will use a simple static state variable for the purpose, which is
   not thread-safe.  The init function must be of the type void init(void). */
#ifdef PTHREAD_ONCE_INIT
#  define ONCE(init) \
    do { \
        static pthread_once_t once = PTHREAD_ONCE_INIT; \
        pthread_once(&once, init); \
    } while (0)
#else
#  define ONCE(init) \
    do { \
        static volatile int once = 1; \
        if (once) { \
            if (once++ == 1) { \
                init(); \
                once = 0; \
            } \
            else \
                while (once) \
                    ; \
        } \
    } while (0)
#endif

/* Calculate a CRC-64 eight bytes at a time on a little-endian architecture. */
static inline uint64_t crc64_little(uint64_t crc, void *buf, size_t len)
{
    unsigned char *next = buf;

    ONCE(crc64_little_init);
    crc = ~crc;
    while (len && ((uintptr_t)next & 7) != 0) {
        crc = crc64_little_table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8);
        len--;
    }
    while (len >= 8) {
        crc ^= *(uint64_t *)next;
        crc = crc64_little_table[7][crc & 0xff] ^
              crc64_little_table[6][(crc >> 8) & 0xff] ^
              crc64_little_table[5][(crc >> 16) & 0xff] ^
              crc64_little_table[4][(crc >> 24) & 0xff] ^
              crc64_little_table[3][(crc >> 32) & 0xff] ^
              crc64_little_table[2][(crc >> 40) & 0xff] ^
              crc64_little_table[1][(crc >> 48) & 0xff] ^
              crc64_little_table[0][crc >> 56];
        next += 8;
        len -= 8;
    }
    while (len) {
        crc = crc64_little_table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8);
        len--;
    }
    return ~crc;
}

/* Calculate a CRC-64 eight bytes at a time on a big-endian architecture. */
static inline uint64_t crc64_big(uint64_t crc, void *buf, size_t len)
{
    unsigned char *next = buf;

    ONCE(crc64_big_init);
    crc = ~rev8(crc);
    while (len && ((uintptr_t)next & 7) != 0) {
        crc = crc64_big_table[0][(crc >> 56) ^ *next++] ^ (crc << 8);
        len--;
    }
    while (len >= 8) {
        crc ^= *(uint64_t *)next;
        crc = crc64_big_table[0][crc & 0xff] ^
              crc64_big_table[1][(crc >> 8) & 0xff] ^
              crc64_big_table[2][(crc >> 16) & 0xff] ^
              crc64_big_table[3][(crc >> 24) & 0xff] ^
              crc64_big_table[4][(crc >> 32) & 0xff] ^
              crc64_big_table[5][(crc >> 40) & 0xff] ^
              crc64_big_table[6][(crc >> 48) & 0xff] ^
              crc64_big_table[7][crc >> 56];
        next += 8;
        len -= 8;
    }
    while (len) {
        crc = crc64_big_table[0][(crc >> 56) ^ *next++] ^ (crc << 8);
        len--;
    }
    return ~rev8(crc);
}

/* Return the CRC-64 of buf[0..len-1] with initial crc, processing eight bytes
   at a time.  This selects one of two routines depending on the endianess of
   the architecture.  A good optimizing compiler will determine the endianess
   at compile time if it can, and get rid of the unused code and table.  If the
   endianess can be changed at run time, then this code will handle that as
   well, initializing and using two tables, if called upon to do so. */
uint64_t crc64(uint64_t crc, void *buf, size_t len)
{
    uint64_t n = 1;

    return *(char *)&n ? crc64_little(crc, buf, len) :
                         crc64_big(crc, buf, len);
}

#define GF2_DIM 64      /* dimension of GF(2) vectors (length of CRC) */

static uint64_t gf2_matrix_times(uint64_t *mat, uint64_t vec)
{
    uint64_t sum;

    sum = 0;
    while (vec) {
        if (vec & 1)
            sum ^= *mat;
        vec >>= 1;
        mat++;
    }
    return sum;
}

static void gf2_matrix_square(uint64_t *square, uint64_t *mat)
{
    unsigned n;

    for (n = 0; n < GF2_DIM; n++)
        square[n] = gf2_matrix_times(mat, mat[n]);
}

/* Return the CRC-64 of two sequential blocks, where crc1 is the CRC-64 of the
   first block, crc2 is the CRC-64 of the second block, and len2 is the length
   of the second block. */
uint64_t crc64_combine(uint64_t crc1, uint64_t crc2, uintmax_t len2)
{
    unsigned n;
    uint64_t row;
    uint64_t even[GF2_DIM];     /* even-power-of-two zeros operator */
    uint64_t odd[GF2_DIM];      /* odd-power-of-two zeros operator */

    /* degenerate case */
    if (len2 == 0)
        return crc1;

    /* put operator for one zero bit in odd */
    odd[0] = POLY;              /* CRC-64 polynomial */
    row = 1;
    for (n = 1; n < GF2_DIM; n++) {
        odd[n] = row;
        row <<= 1;
    }

    /* put operator for two zero bits in even */
    gf2_matrix_square(even, odd);

    /* put operator for four zero bits in odd */
    gf2_matrix_square(odd, even);

    /* apply len2 zeros to crc1 (first square will put the operator for one
       zero byte, eight zero bits, in even) */
    do {
        /* apply zeros operator for this bit of len2 */
        gf2_matrix_square(even, odd);
        if (len2 & 1)
            crc1 = gf2_matrix_times(even, crc1);
        len2 >>= 1;

        /* if no more bits set, then done */
        if (len2 == 0)
            break;

        /* another iteration of the loop with odd and even swapped */
        gf2_matrix_square(odd, even);
        if (len2 & 1)
            crc1 = gf2_matrix_times(odd, crc1);
        len2 >>= 1;

        /* if no more bits set, then done */
    } while (len2 != 0);

    /* return combined crc */
    crc1 ^= crc2;
    return crc1;
}

/* Test crc64() on vector[0..len-1] which should have CRC-64 crc.  Also test
   crc64_combine() on vector[] split in two. */
static void crc64_test(void *vector, size_t len, uint64_t crc)
{
    uint64_t crc1, crc2;

    /* test crc64() */
    crc1 = crc64(0, vector, len);
    if (crc1 ^ crc)
        printf("mismatch: %" PRIx64 ", should be %" PRIx64 "\n", crc1, crc);

    /* test crc64_combine() */
    crc1 = crc64(0, vector, (len + 1) >> 1);
    crc2 = crc64(0, vector + ((len + 1) >> 1), len >> 1);
    crc1 = crc64_combine(crc1, crc2, len >> 1);
    if (crc1 ^ crc)
        printf("mismatch: %" PRIx64 ", should be %" PRIx64 "\n", crc1, crc);
}

/* Test vectors. */
#define TEST1 "123456789"
#define TESTLEN1 9
#define TESTCRC1 UINT64_C(0x995dc9bbdf1939fa)
#define TEST2 "This is a test of the emergency broadcast system."
#define TESTLEN2 49
#define TESTCRC2 UINT64_C(0x27db187fc15bbc72)

int main(void)
{
    crc64_test(TEST1, TESTLEN1, TESTCRC1);
    crc64_test(TEST2, TESTLEN2, TESTCRC2);
    return 0;
}
/*crc64.c——计算CRC-64
*版权所有(C)2013马克·阿德勒
*版本1.4 2013年12月16日Mark Adler
*/
/*
本软件按“原样”提供,无任何明示或暗示
担保在任何情况下,提交人都不对任何损害负责
由于使用本软件而产生的。
允许任何人出于任何目的使用本软件,
包括商业应用程序,并对其进行修改和重新发布
自由,受以下限制:
1.不得歪曲本软件的来源;你不能
声称您编写了原始软件。如果你使用这个软件
在产品中,产品文档中的确认是
感谢,但不是必需的。
2.更改后的源版本必须清楚地标记为这样,并且不得更改
被误传为原始软件。
3.不得从任何源分发中删除或更改此通知。
马克艾德勒
madler@alumni.caltech.edu
*/
/*使用ECMA-182多项式,以xz的方式计算CRC-64,
位反转,带有一个补码的前后处理。提供
是指组合单独计算的CRC-64*/
/*版本历史记录:
1.0 2013年12月13日第一版
1.1 2013年12月13日测试代码中的修复注释
1.2 2013年12月14日在运行时确定端度
1.3 2013年12月15日为big-endian添加八字节处理
使用pthread库是可选的
1.4 2013年12月16日为有限的线程保护设置一次可变易失性
*/
#包括
#包括
#包括
/*以下pthread.h的include可以注释掉,以便不使用
用于表初始化的pthread库。在这种情况下,初始化
不会是线程安全的。那很好,只要能保证
只有一个线程使用crc64()*/
#包含带有-lpthread的/*链接*/
/*具有这些系数但反向的64位CRC多项式:
64, 62, 57, 55, 54, 53, 52, 47, 46, 45, 40, 39, 38, 37, 35, 33, 32,
31, 29, 27, 24, 23, 22, 21, 19, 17, 13, 12, 10, 9, 7, 4, 1, 0 */
#定义多边形UINT64_C(0xc96c5795d7870f42)
/*CRC计算表——由以下初始化函数填写:
打过一次电话。这些可以由在
同样的方式。有两个表,每个表对应一个endianess。既然这些是
静态,即本地,如果编译器
可以在编译时计算crc64()中的endianess检查*/
静态uint64_t crc64_little_表[8][256];
静态uint64_t crc64_big_表[8][256];
/*填写CRC-64常数表*/
静态无效crc64_init(uint64_t表[][256])
{
无符号n,k;
uint64_t crc;
/*为所有单字节序列生成CRC-64*/
对于(n=0;n<256;n++){
crc=n;
对于(k=0;k<8;k++)
crc=crc&1?POLY^(crc>>1):crc>>1;
表[0][n]=crc;
}
/*为后跟1到7个零的数字生成CRC-64*/
对于(n=0;n<256;n++){
crc=表[0][n];
对于(k=1;k<8;k++){
crc=表[0][crc&0xff]^(crc>>8);
表[k][n]=crc;
}
}
}
/*此函数被调用一次,以初始化CRC-64表以用于
小端建筑*/
静态无效crc64\u little\u初始(无效)
{
crc64_init(crc64_小表);
}
/*反转64位字中的字节*/
静态内联uint64\u t版本8(uint64\u t a)
{
uint64_t m;
m=UINT64_C(0xff00ff00ff00ff);
a=((a>>8)&m)|(a&m)>16)&m)|(a&m)>32 | a>8);
蓝--;
}
而(len>=8){
crc^=*(uint64_t*)下一步;
crc=crc64_小_表[7][crc&0xff]^
crc64_little_表[6][(crc>>8)&0xff]^
crc64_little_表[5][(crc>>16)&0xff]^
crc64_little_表[4][(crc>>24)&0xff]^
crc64_little_表[3][(crc>>32)&0xff]^
crc64_little_表[2][(crc>>40)&0xff]^
crc64_little_表[1][(crc>>48)&0xff]^
crc64_little_表[0][crc>>56];
下一个+=8;
len-=8;
}
while(len){
crc=crc64_little_table[0][(crc^*next++)和0xff]^(crc>>8);
蓝--;
}
返回~crc;
}
/*在big-endian体系结构上一次计算8个字节的CRC-64*/
静态内联uint64_t crc64_大(uint64_t crc,空*空,大小长度)
{
无符号字符*next=buf;
一次(crc64_big_init);
crc=~rev8(crc);
而(len和((uintptr_____________;t)next和7)!=0){
crc=crc64_大_表[0][(crc>>56)^*下一步++]^(crc=8){
crc^=*(uint64_t*)下一步;
crc=crc64_大_表[0][crc&0xff]^
crc64_大_表[1][(crc>>8)&0xff]^
crc64_大_表[2][(crc>>16)&0xff]^
package com.test;

import java.util.Arrays;

/**
 * CRC-64 implementation with ability to combine checksums calculated over different blocks of data.
 **/
public class CRC64 {

    private final static long POLY = (long) 0xc96c5795d7870f42L; // ECMA-182

    /* CRC64 calculation table. */
    private final static long[] table;

    /* Current CRC value. */
    private long value;

    static {
        table = new long[256];

        for (int n = 0; n < 256; n++) {
            long crc = n;
            for (int k = 0; k < 8; k++) {
                if ((crc & 1) == 1) {
                    crc = (crc >>> 1) ^ POLY;
                } else {
                    crc = (crc >>> 1);
                }
            }
            table[n] = crc;
        }
    }

    public CRC64() {
        this.value = 0;
    }

    public CRC64(long value) {
        this.value = value;
    }

    public CRC64(byte [] b, int len) {
        this.value = 0;
        update(b, len);
    }

    /**
     * Construct new CRC64 instance from byte array.
     **/
    public static CRC64 fromBytes(byte [] b) {
        long l = 0;
        for (int i = 0; i < 4; i++) {
            l <<= 8;
            l ^= (long) b[i] & 0xFF;
        }
        return new CRC64(l);
    }

    /**
     * Get 8 byte representation of current CRC64 value.
     **/
    public byte[] getBytes() {
        byte [] b = new byte[8];
        for (int i = 0; i < 8; i++) {
            b[7 - i] = (byte) (this.value >>> (i * 8));
        }
        return b;
    }

    /**
     * Get long representation of current CRC64 value.
     **/
    public long getValue() {
        return this.value;
    }

    /**
     * Update CRC64 with new byte block.
     **/
    public void update(byte [] b, int len) {

        int idx = 0;
        this.value = ~this.value;
        while (len > 0) {
            this.value = table[((int) (this.value ^ b[idx])) & 0xff] ^ (this.value >>> 8);
            idx++;
            len--;
        }
        this.value = ~this.value;
    }

    private static final int GF2_DIM = 64; /* dimension of GF(2) vectors (length of CRC) */

    private static long gf2MatrixTimes(long [] mat, long vec)
    {
        long sum = 0;
        int idx = 0;
        while (vec != 0) {
            if ((vec & 1) == 1)
                sum ^= mat[idx];
            vec >>>= 1;
            idx++;
        }
        return sum;
    }

    private static void gf2MatrixSquare(long [] square, long [] mat)
    {
        for (int n = 0; n < GF2_DIM; n++)
            square[n] = gf2MatrixTimes(mat, mat[n]);
    }

    /*
     * Return the CRC-64 of two sequential blocks, where summ1 is the CRC-64 of the
     * first block, summ2 is the CRC-64 of the second block, and len2 is the length
     * of the second block.
     */
    static public CRC64 combine(CRC64 summ1, CRC64 summ2, long len2)
    {
        // degenerate case.
        if (len2 == 0)
            return new CRC64(summ1.getValue());

        int n;
        long row;
        long [] even = new long[GF2_DIM]; // even-power-of-two zeros operator
        long [] odd  = new long[GF2_DIM];  // odd-power-of-two zeros operator

        // put operator for one zero bit in odd
        odd[0] = POLY;      // CRC-64 polynomial

        row = 1;
        for (n = 1; n < GF2_DIM; n++) {
            odd[n] = row;
            row <<= 1;
        }

        // put operator for two zero bits in even
        gf2MatrixSquare(even, odd);

        // put operator for four zero bits in odd
        gf2MatrixSquare(odd, even);

        // apply len2 zeros to crc1 (first square will put the operator for one
        // zero byte, eight zero bits, in even)
        long crc1 = summ1.getValue();
        long crc2 = summ2.getValue();
        do {
            // apply zeros operator for this bit of len2
            gf2MatrixSquare(even, odd);
            if ((len2 & 1) == 1)
                crc1 = gf2MatrixTimes(even, crc1);
            len2 >>>= 1;

            // if no more bits set, then done
            if (len2 == 0)
                break;

            // another iteration of the loop with odd and even swapped
            gf2MatrixSquare(odd, even);
            if ((len2 & 1) == 1)
                crc1 = gf2MatrixTimes(odd, crc1);
            len2 >>>= 1;

            // if no more bits set, then done
        } while (len2 != 0);

        // return combined crc.
        crc1 ^= crc2;
        return new CRC64(crc1);
    }

    private static void test(byte [] b, int len, long crcValue) throws Exception {

        /* Test CRC64 default calculation. */
        CRC64 crc = new CRC64(b, len);
        if (crc.getValue() != crcValue) {
            throw new Exception("mismatch: " + String.format("%016x", crc.getValue())
                + " should be " + String.format("%016x", crcValue));
        }

        /* test combine() */
        CRC64 crc1 = new CRC64(b, (len + 1) >>> 1);
        CRC64 crc2 = new CRC64(Arrays.copyOfRange(b, (len + 1) >>> 1, b.length), len >>> 1);
        crc = CRC64.combine(crc1, crc2, len >>> 1);

        if (crc.getValue() != crcValue) {
            throw new Exception("mismatch: " + String.format("%016x", crc.getValue())
                + " should be " + String.format("%016x", crcValue));
        }
    }

    public static void main(String [] args) throws Exception {

        final byte[] TEST1 = "123456789".getBytes();
        final int    TESTLEN1 = 9;
        final long   TESTCRC1 = 0x995dc9bbdf1939faL; // ECMA.
        test(TEST1, TESTLEN1, TESTCRC1);

        final byte[] TEST2 = "This is a test of the emergency broadcast system.".getBytes();
        final int    TESTLEN2 = 49;
        final long   TESTCRC2 = 0x27db187fc15bbc72L; // ECMA.
        test(TEST2, TESTLEN2, TESTCRC2);

        final byte[] TEST3 = "IHATEMATH".getBytes();
        final int    TESTLEN3 = 9;
        final long   TESTCRC3 = 0x3920e0f66b6ee0c8L; // ECMA.
        test(TEST3, TESTLEN3, TESTCRC3);
    }
}