从包含多行JSON的日志文件中读取单个JSON

从包含多行JSON的日志文件中读取单个JSON,json,node.js,parsing,iterator,Json,Node.js,Parsing,Iterator,我试图读取一个日志文件,其中每个条目都是一行JSON(JSON结构化文本) 我最终希望做的是迭代每一行,如果 “事件”:“SparkListenerTaskEnd” 发现JSON行将被解析为键“Finish Time”和“Executor CPU Time”的值 我是node.js的新手,所以可能不是完全错误,但到目前为止,我已经获得了一段用于迭代文件的代码: exports.getId(function(err, id){ console.log(id); var data =

我试图读取一个日志文件,其中每个条目都是一行JSON(JSON结构化文本)

我最终希望做的是迭代每一行,如果

“事件”:“SparkListenerTaskEnd”

发现JSON行将被解析为键“Finish Time”和“Executor CPU Time”的值

我是node.js的新手,所以可能不是完全错误,但到目前为止,我已经获得了一段用于迭代文件的代码:

exports.getId(function(err, id){
    console.log(id);
    var data = fs.readFileSync('../PC Files/' + id, 'utf8', function(err, data) {
        var content = data.split('\n');
        async.map(content, function (item, callback) {
            callback(null, JSON.parse(item));
        }, function (err, content) {
            console.log(content);
        });
    });
    //console.log(data);
});
但这似乎没什么作用。但是,我知道可以读取日志文件,因为如果我取消注释
//console.log(数据),就可以看到它

下面是我正在谈论的一个JSON行示例:

{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1514983570810,"Executor ID":"0","Host":"192.168.111.123","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1514983574496,"Failed":false,"Killed":false,"Accumulables":[{"ID":22,"Name":"internal.metrics.input.recordsRead","Update":99171,"Value":99171,"Internal":true,"Count Failed Values":true},{"ID":20,"Name":"internal.metrics.shuffle.write.writeTime","Update":5893440,"Value":5893440,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":3872,"Value":3872,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":1468516,"Value":1468516,"Internal":true,"Count Failed Values":true},{"ID":10,"Name":"internal.metrics.peakExecutionMemory","Update":16842752,"Value":16842752,"Internal":true,"Count Failed Values":true},{"ID":9,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":8,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":7,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":6,"Name":"internal.metrics.jvmGCTime","Update":103,"Value":103,"Internal":true,"Count Failed Values":true},{"ID":5,"Name":"internal.metrics.resultSize","Update":2597,"Value":2597,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.executorCpuTime","Update":1207164005,"Value":1207164005,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorRunTime","Update":2738,"Value":2738,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorDeserializeCpuTime","Update":542927064,"Value":542927064,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeTime","Update":835,"Value":835,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":835,"Executor Deserialize CPU Time":542927064,"Executor Run Time":2738,"Executor CPU Time":1207164005,"Result Size":2597,"JVM GC Time":103,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":1468516,"Shuffle Write Time":5893440,"Shuffle Records Written":3872},"Input Metrics":{"Bytes Read":0,"Records Read":99171},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[{"Block ID":"broadcast_1_piece0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Deserialized":false,"Replication":1},"Memory Size":5941,"Disk Size":0}},{"Block ID":"broadcast_1","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Deserialized":true,"Replication":1},"Memory Size":9568,"Disk Size":0}},{"Block ID":"broadcast_0_piece0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Deserialized":false,"Replication":1},"Memory Size":25132,"Disk Size":0}},{"Block ID":"broadcast_0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Deserialized":true,"Replication":1},"Memory Size":390808,"Disk Size":0}}]}}
更新 这是我的全部代码。我肯定它不漂亮,但它很管用。我现在将研究如何改进它

var http = require("http");
var fs = require('fs');
var async = require('async');
var readline = require('readline')


//get file name
var options =  {
    "method" : "GET",
    "hostname" : "xxx.xxx.xxx.xxx",
    "port" : "18080",
    "path" : "/api/v1/applications/"
};
exports.getId = function(callback) {
    var req = http.request(options, function (res) {

        var chunks = [];

        res.on("data", function (chunk) {
            chunks.push(chunk);
        });

        res.on("end", function () {
            var body = JSON.parse(Buffer.concat(chunks));

            var arrFound = Object.keys(body).filter(function(key) {
                if (body[key].name.indexOf("TestName") > -1) {
                    return body[key].name;
                }
            }).reduce(function(obj, key){
                obj = body[key].id;
                return obj;
            }, {});;
            //console.log("ID: ", arrFound);
            callback(null, arrFound);
        });
    });
    req.end();
}

// parse log file line at a time and for any use lines where the Event = SparkListenerTaskEnd
exports.getId(function(err, id){
    console.log(id);
    var lineReader = readline.createInterface({
        input: fs.createReadStream('../PC Files/' + id, 'utf8')
      });

    lineReader.on('line', function (line) {
        var obj = JSON.parse(line);
        if(obj.Event == "SparkListenerTaskEnd") {
            console.log('Line from file:', obj['Task Info']['Finish Time']);
        }
      });
});
Adam,我尝试了你建议的代码,但出现以下错误:

null
fs.js:646
  return binding.open(pathModule._makeLong(path), stringToFlags(flags), mode);
             ^

Error: ENOENT: no such file or directory, open '../PC Files/null'
    at Object.fs.openSync (fs.js:646:18)
    at Object.fs.readFileSync (fs.js:551:33)
    at /test-runner/modules/getEventLog.js:61:19
    at IncomingMessage.<anonymous> (/test-runner/modules/getEventLog.js:35:13)
    at emitNone (events.js:111:20)
    at IncomingMessage.emit (events.js:208:7)
    at endReadableNT (_stream_readable.js:1056:12)
    at _combinedTickCallback (internal/process/next_tick.js:138:11)
    at process._tickCallback (internal/process/next_tick.js:180:9)
null
财政司司长:646
返回binding.open(pathModule.\u makeLong(path)、stringToFlags(flags)、mode);
^
错误:enoint:没有这样的文件或目录,请打开“../PC Files/null”
在Object.fs.openSync(fs.js:646:18)
在Object.fs.readFileSync(fs.js:551:33)
at/test runner/modules/getEventLog.js:61:19
在收到消息时。(/testrunner/modules/getEventLog.js:35:13)
在emitNone(events.js:111:20)
在IncomingMessage.emit(events.js:208:7)
在endReadableNT(_stream_readable.js:1056:12)
at _combinedTickCallback(内部/流程/下一步_tick.js:138:11)
在进程中。_tick回调(内部/process/next_tick.js:180:9)

乍一看,您似乎没有正确使用回调

我假设您正在使用
getId
函数,如:

getId(function(error, data) {
  // Do something with data
}
在这种情况下,回调函数的返回方式如下:

// Remove the error, this will not be entered as a parameter
// Add callback as parameter
exports.getId(function(id, callback){
    console.log(id);
    var data = fs.readFileSync('../PC Files/' + id, 'utf8', function(err, data) {
        var content = data.split('\n');
        // Removed callback from here
        // We will not have access to the
        // to it here
        async.map(content, function (item) {
            callback(null, JSON.parse(item));
        // Add callback with error in place of null
        }, function (err, content) {
            callback(err)
            console.log(content);
        });
    });
    //console.log(data);
});

一旦JSON(文本)被转换成JavaScript对象(即,在发生“解析”的地方
JSON.parse
之后),就没有JSON了。因此假设每一行都包含一个单独解析的不同的独立JSON对象,那么问题真的是:“我如何使用f(g(Y))所在的行执行X?”?这是真/假。
f
g
的内容在很大程度上与结构无关(在本例中是“将行解析为JSON的结果”),一次只做一步。查看JSON规范,我很确定您的日志文件不是JSON。然而,日志文件中的每一行(!)都包含有效的JSON是很常见的,因此您的问题归结为在进一步使用内容之前阅读行并对其进行JSON解码。是的,每一行基本上只是一行文本,但都是JSON结构。文件本身不是JSON文件。如何调用
getId
函数?你能发那个密码吗?