Javascript Node.js EBADF使用可写流写入文件时出错
我尝试使用Node.js处理500MB的Apache日志文件,将其语法从Javascript Node.js EBADF使用可写流写入文件时出错,javascript,node.js,fs,Javascript,Node.js,Fs,我尝试使用Node.js处理500MB的Apache日志文件,将其语法从 ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26 到 ,然后写入另一个文本文件 为了更好的内存控制和性能,我使用了fs.createReadStream和fs.createWriteStream,但只将第一行写入output.txt,因为脚本以错误结尾: {[Error:EBADF,write]errno
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
到
,然后写入另一个文本文件
为了更好的内存控制和性能,我使用了fs.createReadStream
和fs.createWriteStream
,但只将第一行写入output.txt
,因为脚本以错误结尾:
{[Error:EBADF,write]errno:9,code:'EBADF'}
这里我发布了一些可能有助于调试的信息
input.txt的头部:
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:18 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
output.txt的内容
:
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:18 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
整个剧本:
var fs = require('fs');
var data ='';
var n=0; //For line control
var r = fs.createReadStream('./input.txt',{
encoding: 'ascii',
start:0,
// end: 100000,
});
var w = fs.createWriteStream('./output.txt',{
encoding:'ascii'
});
function put(line){ //write into w;
++n;
w.write(line+'\n');
}
function end(){
r.destroy();
w.destroy();
}
function onData(chunk){
var hasNewline = chunk.indexOf('\n')!==-1;
if(hasNewline){
var arr = chunk.split('\n');
var first = arr.shift();
var last = arr.pop();
data+=first;
put(data); //write a complete line
arr.forEach(function(line){
put(line); //write a complete line
});
data=last;
}else{
data+=chunk;
}
if(n>100){
end();
}
}
function onErr(e){
console.log(e);
}
r.addListener( "data", onData);
r.addListener( "end", end);
r.addListener('error',onErr);
w.addListener('error',onErr);
我看到了两个问题
第一个是end
函数调用ReadStream上的destroy
,但在一般情况下,这是由end
事件触发的,这意味着流已经关闭,它将自动调用destroy
。这意味着r.destroy
将被调用两次,从而触发一个错误。这就是您看到的打印错误的原因
第二个问题是您正在WriteStream上调用destroy
。我建议你去阅读相关文件:
具体地说,任何排队的写入数据都不会被发送
,这就是您丢失一些输出的原因
基本上,如果您希望ReadStream提前关闭,您应该只调用ReadStream上的destroy
,就像您的n>100
案例一样。然后您希望使用WriteStream的end
,以便流有时间写入所有缓冲数据
这是一个简化的版本,我认为应该是一样的。我也不会麻烦绑定error
,因为不管怎样,错误都会自动打印到控制台
var fs = require('fs');
var data ='';
var n=0; //For line control
var r = fs.createReadStream('./input.txt',{
encoding: 'ascii',
start:0,
// end: 100000,
});
var w = fs.createWriteStream('./output.txt',{
encoding:'ascii'
});
r.addListener( "data", function(chunk){
data += chunk;
var lines = data.split('\n');
data = lines.pop();
lines.forEach(function(line){
if (!r.readable) return; // If already destroyed
if (n >= 100) {
// Stop any more 'data' events and close the file.
// This will also trigger 'close' below and close the writestream.
r.destroy();
return;
}
n++;
w.write(line + '\n');
});
});
r.addListener( "end", function(){
// When we hit the end of the file, close the write stream,
// and write any remaining line content
w.write(data);
});
r.addListener("close", function(){
w.end();
});
啊,行得通。谢谢,这看起来像是一个典型的新手错误。