Javascript 在tesseract.js中加载脱机lang数据
我正在尝试将自己经过训练的数据加载到tesseract.js。由于文件放在本地,我尝试脱机加载所有内容。我使用的代码如下所示:Javascript 在tesseract.js中加载脱机lang数据,javascript,tesseract.js,Javascript,Tesseract.js,我正在尝试将自己经过训练的数据加载到tesseract.js。由于文件放在本地,我尝试脱机加载所有内容。我使用的代码如下所示: <script src="tesseract.js"></script> <script> //Set the worker, core and lang to local files (function() { var path = (function() { //absolute path var pathArray =
<script src="tesseract.js"></script>
<script>
//Set the worker, core and lang to local files
(function() {
var path = (function() { //absolute path
var pathArray = window.location.pathname.split( '/' );
pathArray.pop(); //Remove the last ("**.html")
return window.location.origin + pathArray.join("/");
})();
console.log(path);
window.Tesseract = Tesseract.create({
workerPath: path + '/worker.js',
//langPath: path + '/traineddata/',
corePath: path + '/index.js',
});
})();
</script>
<script>
function recognizeFile(file){
document.querySelector("#log").innerHTML = ''
Tesseract.recognize(file, {
lang: document.querySelector('#langsel').value
})
.progress(function(packet){
console.info(packet)
progressUpdate(packet)
})
.then(function(data){
console.log(data)
progressUpdate({ status: 'done', data: data })
})
}
</script>
我在/traineddata文件夹中有eng.traineddata和eng.traineddata.gz,因为显然取消压缩过程被跳过。有什么我忽略的吗?感谢您的帮助 我知道这个问题很老,但最近我需要在我的一个项目中使用Tesseract.js。我需要在本地加载数据文件,所以这里是我所做的 而不是创建一个新的工作者。我修改了可用的默认辅助选项。因此,我没有使用Tesseract.createWorker,而是直接设置路径并使用recognize
Tesseract.workerOptions.langPath =
window.location.origin // take protocol://domain.com part
+ "/scripts/tesseract/dist/"; // location of data files
//you could set core and worker paths too but I didn't need it
Tesseract.workerOptions.workerPath =
window.location.origin // take protocol://domain.com part
+ "/scripts/tesseract/dist/worker.js"; // location of worker.js
//you could set core and worker paths too but I didn't need it
Tesseract.workerOptions.corePath =
window.location.origin // take protocol://domain.com part
+ "/scripts/tesseract/dist/index.js"; // location of index.js
//example lang path would be protocol://domain.com/scripts/tesseract/dist/
通过这样做,我保持了指向默认CDN的工作路径和核心路径不变
PS:当使用本地worker.js和core.js路径时,我在worker.js中的postMessage()上发现了未捕获的错误。这就是为什么我只对langData使用本地路径。我仍然不知道如何修复它,也不知道它为什么会发生。但是,您可以关注这个问题,我通过从
和来自的语言gz嗨,你找到了解决问题的方法吗?我也面临着类似的问题。这在任何意义上都不是一个解决方案,但我将数据转换为blob并将其放入js文件中以加载它。所以tesseract索引文件是否能够读取该blob文件并给出正确的输出?无论如何,我能够在本地加载语言文件,它现在对我来说工作正常。
AdaptedTemplates != NULL:Error:Assert failed:in file ../classify/adaptmatch.cpp, line 190
SCRIPT0: abort() at Error
at Na (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:36:24)
at ka (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:511:83)
at Module.de._abort (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:377:166)
at $L (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:387:55709)
at jpa (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:392:22274)
at lT (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:391:80568)
at mT (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:391:80698)
at BS (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:391:69009)
at bP (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:387:110094)
at jT (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:391:80280)
at RJ (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:387:19088)
at QJ (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:387:17789)
at zI (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:403:90852)
at tw (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:401:49079)
at rw (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:401:48155)
at lw (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:401:39071)
at _v (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:401:22565)
at aw (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:401:24925)
at cw (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:401:27237)
at oj (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:386:24689)
at Og (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:386:10421)
at $.prototype.Recognize (file:///C:/Users/user/Downloads/tesseract.js-master/dist/index.js:558:379)
at Anonymous function (file:///C:/Users/user/Downloads/tesseract.js-master/dist/worker.js:8814:9)
at Anonymous function (file:///C:/Users/user/Downloads/tesseract.js-master/dist/worker.js:8786:9)
at xhr.onerror (file:///C:/Users/user/Downloads/tesseract.js-master/dist/worker.js:8429:9)
If this abort() is unexpected, build with -s ASSERTIONS=1 which can give more information.
index.js (8,1)
Tesseract.workerOptions.langPath =
window.location.origin // take protocol://domain.com part
+ "/scripts/tesseract/dist/"; // location of data files
//you could set core and worker paths too but I didn't need it
Tesseract.workerOptions.workerPath =
window.location.origin // take protocol://domain.com part
+ "/scripts/tesseract/dist/worker.js"; // location of worker.js
//you could set core and worker paths too but I didn't need it
Tesseract.workerOptions.corePath =
window.location.origin // take protocol://domain.com part
+ "/scripts/tesseract/dist/index.js"; // location of index.js
//example lang path would be protocol://domain.com/scripts/tesseract/dist/
window.Tesseract = Tesseract.create({
workerPath: window.location.origin + "/tesseract/worker.js", //tesseract.js-1.0.10
langPath: window.location.origin + "/tesseract/",
corePath: window.location.origin + "/tesseract/index.js", //tesseract.js-core-0.1.0
});