如何解析用python实时编写的JSON文件?

如何解析用python实时编写的JSON文件?,python,linux,parsing,Python,Linux,Parsing,我正在使用一个linux服务,该服务在/var/log中生成JSON格式的日志。日志文件几乎在不断增加。实际上,我正在使用的服务没有任何数据库连接器或包装器,无法使日志直接进入数据库,因此我必须使用自己的服务进行解析和发送 哪种方法是不断解析文件并将新行上传到数据库的最佳方法 补充:我不想使用任何与麋鹿堆相关的东西 谢谢 要读取文件,就像执行tail命令一样,我需要编写一个小脚本: logtodb.py import json import os import time def tail(s

我正在使用一个linux服务,该服务在/var/log中生成JSON格式的日志。日志文件几乎在不断增加。实际上,我正在使用的服务没有任何数据库连接器或包装器,无法使日志直接进入数据库,因此我必须使用自己的服务进行解析和发送

哪种方法是不断解析文件并将新行上传到数据库的最佳方法

补充:我不想使用任何与麋鹿堆相关的东西


谢谢

要读取文件,就像执行
tail
命令一样,我需要编写一个小脚本:

logtodb.py

import json
import os
import time


def tail(stream_file):
    """ Read a file like the Unix command `tail`. Code from https://stackoverflow.com/questions/44895527/reading-infinite-stream-tail """
    stream_file.seek(0, os.SEEK_END)  # Go to the end of file

    while True:
        if stream_file.closed:
            raise StopIteration

        line = stream_file.readline()

        yield line


def log_to_db(log_path, db):
    """ Read log (JSON format) and insert data in db """
    with open(log_path, "r") as log_file:
        for line in tail(log_file):
            try:
                log_data = json.loads(line)
            except ValueError:
                # Bad json format, maybe corrupted...
                continue  # Read next line

            # Do what you want with data:
            # db.execute("INSERT INTO ...", log_data["level"], ...)
            print(log_data["message"])
import random
import json
import time
import threading
import logtodata


def generate_test_json_log(log_path):
    with open(log_path, "w") as log_file:
        while True:
            log_data = {
                "level": "ERROR" if random.random() > 0.5 else "WARNING",
                "message": "The program exit with the code '{0}'".format(str(int(random.random() * 200)))
            }

            log_file.write("{0}\n".format(
                json.dumps(log_data, ensure_ascii=False)))
            log_file.flush()
            time.sleep(0.5)  # Sleep 500 ms


if __name__ == "__main__":
    log_path = "my-log.json"
    generator = threading.Thread(
        target=generate_test_json_log, args=(log_path,))
    generator.start()

    logtodata.log_to_db(log_path, db=None)
和一个测试文件:

test\u logtodb.py

import json
import os
import time


def tail(stream_file):
    """ Read a file like the Unix command `tail`. Code from https://stackoverflow.com/questions/44895527/reading-infinite-stream-tail """
    stream_file.seek(0, os.SEEK_END)  # Go to the end of file

    while True:
        if stream_file.closed:
            raise StopIteration

        line = stream_file.readline()

        yield line


def log_to_db(log_path, db):
    """ Read log (JSON format) and insert data in db """
    with open(log_path, "r") as log_file:
        for line in tail(log_file):
            try:
                log_data = json.loads(line)
            except ValueError:
                # Bad json format, maybe corrupted...
                continue  # Read next line

            # Do what you want with data:
            # db.execute("INSERT INTO ...", log_data["level"], ...)
            print(log_data["message"])
import random
import json
import time
import threading
import logtodata


def generate_test_json_log(log_path):
    with open(log_path, "w") as log_file:
        while True:
            log_data = {
                "level": "ERROR" if random.random() > 0.5 else "WARNING",
                "message": "The program exit with the code '{0}'".format(str(int(random.random() * 200)))
            }

            log_file.write("{0}\n".format(
                json.dumps(log_data, ensure_ascii=False)))
            log_file.flush()
            time.sleep(0.5)  # Sleep 500 ms


if __name__ == "__main__":
    log_path = "my-log.json"
    generator = threading.Thread(
        target=generate_test_json_log, args=(log_path,))
    generator.start()

    logtodata.log_to_db(log_path, db=None)
我假设日志文件如下所示:

{"level": "ERROR", "message": "The program exit with the code '181'"}
{"level": "WARNING", "message": "The program exit with the code '51'"}
{"level": "ERROR", "message": "The program exit with the code '69'"}

如果我的脚本格式不正确,我可以帮助您更新脚本。要读取文件,就像执行
tail
命令一样,我需要执行一个小脚本:

logtodb.py

import json
import os
import time


def tail(stream_file):
    """ Read a file like the Unix command `tail`. Code from https://stackoverflow.com/questions/44895527/reading-infinite-stream-tail """
    stream_file.seek(0, os.SEEK_END)  # Go to the end of file

    while True:
        if stream_file.closed:
            raise StopIteration

        line = stream_file.readline()

        yield line


def log_to_db(log_path, db):
    """ Read log (JSON format) and insert data in db """
    with open(log_path, "r") as log_file:
        for line in tail(log_file):
            try:
                log_data = json.loads(line)
            except ValueError:
                # Bad json format, maybe corrupted...
                continue  # Read next line

            # Do what you want with data:
            # db.execute("INSERT INTO ...", log_data["level"], ...)
            print(log_data["message"])
import random
import json
import time
import threading
import logtodata


def generate_test_json_log(log_path):
    with open(log_path, "w") as log_file:
        while True:
            log_data = {
                "level": "ERROR" if random.random() > 0.5 else "WARNING",
                "message": "The program exit with the code '{0}'".format(str(int(random.random() * 200)))
            }

            log_file.write("{0}\n".format(
                json.dumps(log_data, ensure_ascii=False)))
            log_file.flush()
            time.sleep(0.5)  # Sleep 500 ms


if __name__ == "__main__":
    log_path = "my-log.json"
    generator = threading.Thread(
        target=generate_test_json_log, args=(log_path,))
    generator.start()

    logtodata.log_to_db(log_path, db=None)
和一个测试文件:

test\u logtodb.py

import json
import os
import time


def tail(stream_file):
    """ Read a file like the Unix command `tail`. Code from https://stackoverflow.com/questions/44895527/reading-infinite-stream-tail """
    stream_file.seek(0, os.SEEK_END)  # Go to the end of file

    while True:
        if stream_file.closed:
            raise StopIteration

        line = stream_file.readline()

        yield line


def log_to_db(log_path, db):
    """ Read log (JSON format) and insert data in db """
    with open(log_path, "r") as log_file:
        for line in tail(log_file):
            try:
                log_data = json.loads(line)
            except ValueError:
                # Bad json format, maybe corrupted...
                continue  # Read next line

            # Do what you want with data:
            # db.execute("INSERT INTO ...", log_data["level"], ...)
            print(log_data["message"])
import random
import json
import time
import threading
import logtodata


def generate_test_json_log(log_path):
    with open(log_path, "w") as log_file:
        while True:
            log_data = {
                "level": "ERROR" if random.random() > 0.5 else "WARNING",
                "message": "The program exit with the code '{0}'".format(str(int(random.random() * 200)))
            }

            log_file.write("{0}\n".format(
                json.dumps(log_data, ensure_ascii=False)))
            log_file.flush()
            time.sleep(0.5)  # Sleep 500 ms


if __name__ == "__main__":
    log_path = "my-log.json"
    generator = threading.Thread(
        target=generate_test_json_log, args=(log_path,))
    generator.start()

    logtodata.log_to_db(log_path, db=None)
我假设日志文件如下所示:

{"level": "ERROR", "message": "The program exit with the code '181'"}
{"level": "WARNING", "message": "The program exit with the code '51'"}
{"level": "ERROR", "message": "The program exit with the code '69'"}

如果我的脚本格式不正确,我可以帮助您更新它

日志文件的示例是什么?它是一个有效的JSON值流(例如,
{…}{…}…
),还是一个单一的、不完整的值(
[{…},{…},{…},
)?我猜服务每行发送一个json对象。其他一切都没有意义。你只需要编写一个程序,逐行读取文件,解析每行的json对象,然后用itPS做任何事情:让我补充一点,我个人觉得使用json处理日志消息是错误的,因为它效率非常低但是我知道现在有些人正在这样做。顺便说一句,你可能想看看。这个项目的目的是提供你想要的。日志文件的示例是什么样子的?它是一个有效的JSON值流(例如,
{…}{…}…
),还是一个单一的、不完整的值(
[{…},{…},
)?我猜服务每行发送一个json对象。其他一切都没有意义。你只需要编写一个程序,逐行读取文件,解析每行的json对象,然后用itPS做任何事情:让我补充一点,我个人觉得使用json处理日志消息是错误的,因为它效率非常低但我知道现在有些人正在这样做。顺便说一句,你可能想看看。该项目旨在提供你的想法。我没有仔细审查建议的解决方案中的代码,但基本想法是正确的。是否应该增强该功能,以记录最后解析的偏移量,并在重新启动时寻找该偏移量,这只是pe的问题问问题的人可以回答。我没有仔细检查提议的解决方案中的代码,但基本想法是正确的。是否应该对其进行增强,以记录最后解析的偏移量,并在重新启动时查找到该偏移量,只有问问题的人才能回答。