Javascript 大文件的快速文件哈希
我正在使用客户端散列文件,以便在开始完全上载之前检查服务器上是否存在这些文件 然而,它似乎有点慢。散列一个8MB的文件大约需要15秒。我不确定这是因为库速度慢,JavaScript速度慢,还是算法本身就慢。它使用的是Javascript 大文件的快速文件哈希,javascript,performance,hash,Javascript,Performance,Hash,我正在使用客户端散列文件,以便在开始完全上载之前检查服务器上是否存在这些文件 然而,它似乎有点慢。散列一个8MB的文件大约需要15秒。我不确定这是因为库速度慢,JavaScript速度慢,还是算法本身就慢。它使用的是sha256,这对于我所需要的可能有点过分了。速度是关键——加密安全和冲突并不特别重要 有没有更快的方法 $(document).on('drop', function(dropEvent) { dropEvent.preventDefault(); _.each(d
sha256
,这对于我所需要的可能有点过分了。速度是关键——加密安全和冲突并不特别重要
有没有更快的方法
$(document).on('drop', function(dropEvent) {
dropEvent.preventDefault();
_.each(dropEvent.originalEvent.dataTransfer.files, function(file) {
var reader = new FileReader();
var pos = 0;
var startTime = +new Date();
var hashObj = new sjcl.hash.sha256();
reader.onprogress = function(progress) {
var chunk = new Uint8Array(reader.result).subarray(pos, progress.loaded);
hashObj.update(chunk);
pos = progress.loaded;
if(progress.lengthComputable) {
console.log((progress.loaded/progress.total*100).toFixed(1)+'%');
}
};
reader.onload = function() {
var endTime = +new Date();
console.log('hashed',file.name,'in',endTime-startTime,'ms');
var chunk = new Uint8Array(reader.result, pos);
if(chunk.length > 0) hashObj.update(chunk);
console.log(sjcl.codec.hex.fromBits(hashObj.finalize()));
};
reader.readAsArrayBuffer(file);
});
});
编辑:刚根据发现。初始测试使它在一秒钟内为同一个8MB文件运行,但仍然比我希望的慢。速度要快得多:
var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice;
var chunkSize = 1024 * 1024 * 2;
$(document).on('drop', function (dropEvent) {
dropEvent.preventDefault();
_.each(dropEvent.originalEvent.dataTransfer.files, function (file) {
var startTime = +new Date(), elapsed;
var chunks = Math.ceil(file.size / chunkSize);
var currentChunk = 0;
var spark = new SparkMD5.ArrayBuffer();
var fileReader = new FileReader();
var readNextChunk = function() {
var start = currentChunk * chunkSize;
var end = Math.min(start + chunkSize, file.size);
fileReader.readAsArrayBuffer(blobSlice.call(file, start, end));
};
fileReader.onload = function (e) {
console.log("read chunk nr", currentChunk + 1, "of", chunks);
spark.append(e.target.result); // append array buffer
++currentChunk;
if (currentChunk < chunks) {
readNextChunk();
} else {
elapsed = +new Date() - startTime;
console.info("computed hash", spark.end(), 'for file', file.name, 'in', elapsed, 'ms'); // compute hash
}
};
fileReader.onerror = function () {
console.warn("oops, something went wrong.");
};
readNextChunk();
});
});
var blobSlice=File.prototype.slice | | | File.prototype.mozSlice | | File.prototype.webkitSlice;
var chunkSize=1024*1024*2;
$(文档).on('drop',函数(dropEvent){
preventDefault();
_.each(dropEvent.originalEvent.dataTransfer.files,函数(文件){
var startTime=+新日期(),已过;
var chunks=Math.ceil(file.size/chunkSize);
var-currentcunk=0;
var spark=new SparkMD5.ArrayBuffer();
var fileReader=newfilereader();
var readNextChunk=函数(){
var start=currentChunk*chunkSize;
var end=Math.min(start+chunkSize,file.size);
readAsArrayBuffer(blobSlice.call(file,start,end));
};
fileReader.onload=函数(e){
log(“读取区块编号”,当前区块+1,“of”,区块);
spark.append(e.target.result);//追加数组缓冲区
++当前块;
如果(当前块<块){
readNextChunk();
}否则{
已用=+新日期()-开始时间;
console.info(“computed hash”,spark.end(),'for file',file.name,'in',appead,'ms');//计算hash
}
};
fileReader.onerror=函数(){
console.warn(“哎呀,出了点问题。”);
};
readNextChunk();
});
});
给出32位哈希。它似乎比SparkMD5快30%左右。然而,它似乎不适用于HTML5的ArrayBuffer
,因此该文件必须以文本形式读取
var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice;
var chunkSize = 1024 * 1024 * 2;
$(document).on('drop', function (dropEvent) {
dropEvent.preventDefault();
_.each(dropEvent.originalEvent.dataTransfer.files, function (file) {
var startTime = +new Date(), elapsed;
var chunks = Math.ceil(file.size / chunkSize);
var currentChunk = 0;
var xxh = XXH();
var fileReader = new FileReader();
var readNextChunk = function() {
var start = currentChunk * chunkSize;
var end = Math.min(start + chunkSize, file.size);
fileReader.readAsText(blobSlice.call(file, start, end));
};
fileReader.onload = function (e) {
console.log("read chunk nr", currentChunk + 1, "of", chunks);
xxh.update(e.target.result);
++currentChunk;
if (currentChunk < chunks) {
readNextChunk();
} else {
elapsed = +new Date() - startTime;
console.info("computed hash", xxh.digest().toString(16), 'for file', file.name, 'in', elapsed, 'ms');
}
};
fileReader.onerror = function () {
console.warn("oops, something went wrong.");
};
readNextChunk();
});
});
不幸的是,它们在速度上几乎完全相同,而且它仍在复制。但是,它在原始8MB文件上运行大约270ms,这比15s要好得多。xxHash宣传了非常令人印象深刻的速度。@JasonLeBrun:我正在尝试。它不会将
ArrayBuffer
作为输入,这可能会有问题。
/**
* Hack to make Uint8Array work like a Node.js Buffer
*
* @param {Buffer} targetBuffer Buffer to copy into
* @param {Number} targetStart Optional, Default: 0
* @param {Number} sourceStart Optional, Default: 0
* @param {Number} sourceEnd Optional, Default: source length
* @see http://nodejs.org/api/buffer.html#buffer_buf_copy_targetbuffer_targetstart_sourcestart_sourceend
* @see https://developer.mozilla.org/en-US/docs/Web/API/Uint32Array
*/
Uint8Array.prototype.copy = function(targetBuffer, targetStart, sourceStart, sourceEnd) {
targetStart = targetStart || 0;
sourceStart = sourceStart || 0;
sourceEnd = sourceEnd || this.length;
for(var i=sourceStart; i<sourceEnd; ++i) {
targetBuffer[targetStart+i] = this[i];
}
};
$(document).on('drop', function(dropEvent) {
dropEvent.preventDefault();
_.each(dropEvent.originalEvent.dataTransfer.files, function(file) {
var reader = new FileReader();
var pos = 0;
var startTime = +new Date();
var xxh = XXH();
reader.onprogress = function(progress) {
var length = progress.loaded - pos;
var arr = new Uint8Array(reader.result, pos, length);
pos += length;
xxh.update(arr);
if(progress.lengthComputable) {
console.log((progress.loaded/progress.total*100).toFixed(1)+'%');
}
};
reader.onload = function() {
var arr = new Uint8Array(reader.result, pos);
xxh.update(arr);
var elapsed = +new Date() - startTime;
console.info("computed hash", xxh.digest().toString(16), 'for file', file.name, 'in', elapsed, 'ms');
};
reader.readAsArrayBuffer(file);
});
});