试图获取一个非常大的文件的散列（SHA-512），在javascript中超过2.5g_Javascript_Hash_Cryptography_Filereader_Arraybuffer

试图获取一个非常大的文件的散列（SHA-512），在javascript中超过2.5g

javascript hash cryptography

试图获取一个非常大的文件的散列（SHA-512），在javascript中超过2.5g,javascript,hash,cryptography,filereader,arraybuffer,Javascript,Hash,Cryptography,Filereader,Arraybuffer,我正在尝试获取一个大文件的SHA512。2.5g，可能更大的文件。因此，我认为创建一个arraybuffer的方法是通过crypto.division.digest API进行摘要的问题是我总是有一个阵列缓冲区分配失败是我的块大小，数组缓冲区有限制。我不知道了？或者有更好的方法来获取哈希摘要，而不是使用完整的arraybuffer // received a file object function CalculateHash(file) { var obj = {

我正在尝试获取一个大文件的SHA512。2.5g，可能更大的文件。因此，我认为创建一个arraybuffer的方法是通过crypto.division.digest API进行摘要的

问题是我总是有一个

阵列缓冲区分配失败

是我的块大小，数组缓冲区有限制。我不知道了？或者有更好的方法来获取哈希摘要，而不是使用完整的arraybuffer

// received a file object 
    function CalculateHash(file)
    {
 var obj = { File : file}; 
    
     var chunkSize = 10485760;
      const chunksQuantity = Math.ceil(obj.File.size / chunkSize);
      const chunksQueue = new Array(chunksQuantity).fill().map((_, index) => index).reverse();
      
    
    
      
      var buffer = null;
      
      reader.onload =  async function (evt) {
        if (buffer == null) {
          buffer = evt.currentTarget.result;
        } else {
          var tmp = new Uint8Array(buffer.byteLength + evt.currentTarget.result.byteLength);
          tmp.set(new Uint8Array(buffer), 0);
          tmp.set(new Uint8Array(evt.currentTarget.result), buffer.byteLength);
          buffer = tmp;
        }    
        readNext();
      }
      
    
        var readNext = async function () {
          if (chunksQueue.length > 0) {
            const chunkId = chunksQueue.pop();
            const sentSize = chunkId * chunkSize;
            const chunk = obj.File.slice(sentSize, sentSize + chunkSize);
            reader.readAsArrayBuffer(chunk);
    
          } else {
            var x = await digestMessage(buffer);
            hash.SHA512 = x.toUpperCase();   
            buffer = null;
           
          }
      }
    
      readNext();
    }
    
    async function digestMessage(file) {  
      const hashBuffer = await crypto.subtle.digest('SHA-512', file);           // hash the message
      const hashArray = Array.from(new Uint8Array(hashBuffer));                     // convert buffer to byte array
      const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join(''); // convert bytes to hex string
      return hashHex;
    }

基于@ArtjomB。答案是，问题在于渐进散列。BufferArray和浏览器的限制

这是最终的工作代码。它将这两种方法与比cryptoJS库速度最快的本机摘要混合在一起。如果文件大于1Gb，则使用CryptoJS库，否则使用本机浏览器摘要。欢迎任何建议

    var window = self;
        var document = {};
        self.importScripts("/Crypto.min.js");
        
        
        onmessage = async function (args) {
          var obj = args.data;  
      var reader = new FileReader();
      var hash = {};
      var chunkSize = 10485760;
      var largeFileTrigger = 1048576000;
      const chunksQuantity = Math.ceil(obj.File.size / chunkSize);
      const chunksQueue = new Array(chunksQuantity).fill().map((_, index) => index).reverse();
      var isLargeFile = obj.File.size > largeFileTrigger;
      var buffer = null;
      var progressiveArray = [];
      reader.onload = async function (evt) {
        if (isLargeFile) {
          progressiveArray.push(evt.currentTarget.result);
        } else {
          if (buffer == null) {
            buffer = evt.currentTarget.result;
          } else {
            var tmp = new Uint8Array(buffer.byteLength + evt.currentTarget.result.byteLength);
            tmp.set(new Uint8Array(buffer), 0);
            tmp.set(new Uint8Array(evt.currentTarget.result), buffer.byteLength);
            buffer = tmp;
          }
        }    
        readNext();
      }
    
        var readNext = async function () {
          if (chunksQueue.length > 0) {
            const chunkId = chunksQueue.pop();
            const sentSize = chunkId * chunkSize;
            const chunk = obj.File.slice(sentSize, sentSize + chunkSize);
            reader.readAsArrayBuffer(chunk);
    
          } else {
            var hexHash = null;
            if (isLargeFile) {
              var sha = CryptoJS.algo.SHA512.create();
              for (var i = 0; i < progressiveArray.length; i++) {
                sha.update(arrayBufferToWordArray(progressiveArray[i]));
              }
              hexHash = sha.finalize().toString();          
            } else {
              hexHash = await digestMessage(buffer);
            }     
           
            SHA512 = hexHash.toUpperCase();    
            buffer = null;
            progressiveArray = null;
            postMessage({ Hash: SHA512 });
          }
      }
    
      readNext();
    }
    
    async function digestMessage(file) {  
      const hashBuffer = await crypto.subtle.digest('SHA-512', file);           // hash the message
      const hashArray = Array.from(new Uint8Array(hashBuffer));                     // convert buffer to byte array
      const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join(''); // convert bytes to hex string
      return hashHex;
    }
    
    function arrayBufferToWordArray(ab) {
      var i8a = new Uint8Array(ab);
      var a = [];
      for (var i = 0; i < i8a.length; i += 4) {
        a.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]);
      }
      return CryptoJS.lib.WordArray.create(a, i8a.length);
    }

var窗口=self；
var文件={}；
self.importScripts（“/Crypto.min.js”）；
onmessage=异步函数（args）{
var obj=args.data；
var reader=new FileReader（）；
var hash={}；
var chunkSize=10485760；
var largeFileTrigger=1048576000；
const chunksQuantity=Math.ceil（obj.File.size/chunkSize）；
const chunksQueue=新数组（chunksQuantity）.fill（）.map（（uu，index）=>index.reverse（）；
var isLargeFile=obj.File.size>largeFileTrigger；
var buffer=null；
var progressiveArray=[]；
reader.onload=异步函数（evt）{
if（isLargeFile）{
progressiveArray.push（evt.currentTarget.result）；
}否则{
if（buffer==null）{
缓冲区=evt.currentTarget.result；
}否则{
var tmp=新的UINT8阵列（buffer.ByTeleLength+evt.currentTarget.result.ByTeleLength）；
tmp.set（新的Uint8Array（buffer），0）；
tmp.set（新的Uint8Array（evt.currentTarget.result），buffer.ByTeleLength）；
缓冲区=tmp；
}
}    
readNext（）；
}
var readNext=异步函数（）{
如果（chunksQueue.length>0）{
const chunkId=chunksquee.pop（）；
const sentSize=chunkId*chunkSize；
const chunk=obj.File.slice（sentSize，sentSize+chunkSize）；
reader.readAsArrayBuffer（块）；
}否则{
var hexHash=null；
if（isLargeFile）{
var sha=CryptoJS.algo.SHA512.create（）；
对于（var i=0；ib.toString（16）.padStart（2，'0'））.join（“”）；//将字节转换为十六进制字符串
返回hashHex；
}
函数数组BufferToWordArray（ab）{
var i8a=新的Uint8Array（ab）；
var a=[]；
对于（变量i=0；ia、 推（i8a[i]通常，每当我需要对一个大文件进行哈希运算时，我会对其中的1MB进行采样。当1MB的非常独特时，没有充分的理由对整个文件进行哈希运算。大文件的前1MB可能会发生更改，但假设文件的最新字节发生了更改。您的哈希运算将给出错误的结果否？似乎crypto.minute.digest
不提供渐进式散列。如果您想与命令行散列工具兼容，可能需要另一个散列实现。请参阅我基于crypto js的慢速散列的答案：@ArtjomB。谢谢，我认为您解决了我的问题。我混合了这两种策略，因为本机摘要比crypto.js快。文件太大了r 1 GB将由隐式进程处理，较小的进程将由原生进程处理。@ C.DrICBOIVIN：没有答案的问题对于以后程序员来说不是超级大的。请考虑如何解决问题来回答自己的问题。谢谢。