如何解析用python实时编写的JSON文件？_Python_Linux_Parsing

如何解析用python实时编写的JSON文件？

python linux parsing

如何解析用python实时编写的JSON文件？,python,linux,parsing,Python,Linux,Parsing,我正在使用一个linux服务，该服务在/var/log中生成JSON格式的日志。日志文件几乎在不断增加。实际上，我正在使用的服务没有任何数据库连接器或包装器，无法使日志直接进入数据库，因此我必须使用自己的服务进行解析和发送哪种方法是不断解析文件并将新行上传到数据库的最佳方法补充：我不想使用任何与麋鹿堆相关的东西谢谢要读取文件，就像执行tail命令一样，我需要编写一个小脚本： logtodb.py import json import os import time def tail(s

我正在使用一个linux服务，该服务在/var/log中生成JSON格式的日志。日志文件几乎在不断增加。实际上，我正在使用的服务没有任何数据库连接器或包装器，无法使日志直接进入数据库，因此我必须使用自己的服务进行解析和发送

哪种方法是不断解析文件并将新行上传到数据库的最佳方法

补充：我不想使用任何与麋鹿堆相关的东西

谢谢

要读取文件，就像执行

tail

命令一样，我需要编写一个小脚本：

logtodb.py

import json
import os
import time


def tail(stream_file):
    """ Read a file like the Unix command `tail`. Code from https://stackoverflow.com/questions/44895527/reading-infinite-stream-tail """
    stream_file.seek(0, os.SEEK_END)  # Go to the end of file

    while True:
        if stream_file.closed:
            raise StopIteration

        line = stream_file.readline()

        yield line


def log_to_db(log_path, db):
    """ Read log (JSON format) and insert data in db """
    with open(log_path, "r") as log_file:
        for line in tail(log_file):
            try:
                log_data = json.loads(line)
            except ValueError:
                # Bad json format, maybe corrupted...
                continue  # Read next line

            # Do what you want with data:
            # db.execute("INSERT INTO ...", log_data["level"], ...)
            print(log_data["message"])

import random
import json
import time
import threading
import logtodata


def generate_test_json_log(log_path):
    with open(log_path, "w") as log_file:
        while True:
            log_data = {
                "level": "ERROR" if random.random() > 0.5 else "WARNING",
                "message": "The program exit with the code '{0}'".format(str(int(random.random() * 200)))
            }

            log_file.write("{0}\n".format(
                json.dumps(log_data, ensure_ascii=False)))
            log_file.flush()
            time.sleep(0.5)  # Sleep 500 ms


if __name__ == "__main__":
    log_path = "my-log.json"
    generator = threading.Thread(
        target=generate_test_json_log, args=(log_path,))
    generator.start()

    logtodata.log_to_db(log_path, db=None)

和一个测试文件：

test\u logtodb.py

import json
import os
import time


def tail(stream_file):
    """ Read a file like the Unix command `tail`. Code from https://stackoverflow.com/questions/44895527/reading-infinite-stream-tail """
    stream_file.seek(0, os.SEEK_END)  # Go to the end of file

    while True:
        if stream_file.closed:
            raise StopIteration

        line = stream_file.readline()

        yield line


def log_to_db(log_path, db):
    """ Read log (JSON format) and insert data in db """
    with open(log_path, "r") as log_file:
        for line in tail(log_file):
            try:
                log_data = json.loads(line)
            except ValueError:
                # Bad json format, maybe corrupted...
                continue  # Read next line

            # Do what you want with data:
            # db.execute("INSERT INTO ...", log_data["level"], ...)
            print(log_data["message"])

import random
import json
import time
import threading
import logtodata


def generate_test_json_log(log_path):
    with open(log_path, "w") as log_file:
        while True:
            log_data = {
                "level": "ERROR" if random.random() > 0.5 else "WARNING",
                "message": "The program exit with the code '{0}'".format(str(int(random.random() * 200)))
            }

            log_file.write("{0}\n".format(
                json.dumps(log_data, ensure_ascii=False)))
            log_file.flush()
            time.sleep(0.5)  # Sleep 500 ms


if __name__ == "__main__":
    log_path = "my-log.json"
    generator = threading.Thread(
        target=generate_test_json_log, args=(log_path,))
    generator.start()

    logtodata.log_to_db(log_path, db=None)

我假设日志文件如下所示：

{"level": "ERROR", "message": "The program exit with the code '181'"}
{"level": "WARNING", "message": "The program exit with the code '51'"}
{"level": "ERROR", "message": "The program exit with the code '69'"}

如果我的脚本格式不正确，我可以帮助您更新脚本。要读取文件，就像执行

tail

命令一样，我需要执行一个小脚本：

logtodb.py

import json
import os
import time


def tail(stream_file):
    """ Read a file like the Unix command `tail`. Code from https://stackoverflow.com/questions/44895527/reading-infinite-stream-tail """
    stream_file.seek(0, os.SEEK_END)  # Go to the end of file

    while True:
        if stream_file.closed:
            raise StopIteration

        line = stream_file.readline()

        yield line


def log_to_db(log_path, db):
    """ Read log (JSON format) and insert data in db """
    with open(log_path, "r") as log_file:
        for line in tail(log_file):
            try:
                log_data = json.loads(line)
            except ValueError:
                # Bad json format, maybe corrupted...
                continue  # Read next line

            # Do what you want with data:
            # db.execute("INSERT INTO ...", log_data["level"], ...)
            print(log_data["message"])

import random
import json
import time
import threading
import logtodata


def generate_test_json_log(log_path):
    with open(log_path, "w") as log_file:
        while True:
            log_data = {
                "level": "ERROR" if random.random() > 0.5 else "WARNING",
                "message": "The program exit with the code '{0}'".format(str(int(random.random() * 200)))
            }

            log_file.write("{0}\n".format(
                json.dumps(log_data, ensure_ascii=False)))
            log_file.flush()
            time.sleep(0.5)  # Sleep 500 ms


if __name__ == "__main__":
    log_path = "my-log.json"
    generator = threading.Thread(
        target=generate_test_json_log, args=(log_path,))
    generator.start()

    logtodata.log_to_db(log_path, db=None)

和一个测试文件：

test\u logtodb.py

import json
import os
import time


def tail(stream_file):
    """ Read a file like the Unix command `tail`. Code from https://stackoverflow.com/questions/44895527/reading-infinite-stream-tail """
    stream_file.seek(0, os.SEEK_END)  # Go to the end of file

    while True:
        if stream_file.closed:
            raise StopIteration

        line = stream_file.readline()

        yield line


def log_to_db(log_path, db):
    """ Read log (JSON format) and insert data in db """
    with open(log_path, "r") as log_file:
        for line in tail(log_file):
            try:
                log_data = json.loads(line)
            except ValueError:
                # Bad json format, maybe corrupted...
                continue  # Read next line

            # Do what you want with data:
            # db.execute("INSERT INTO ...", log_data["level"], ...)
            print(log_data["message"])

import random
import json
import time
import threading
import logtodata


def generate_test_json_log(log_path):
    with open(log_path, "w") as log_file:
        while True:
            log_data = {
                "level": "ERROR" if random.random() > 0.5 else "WARNING",
                "message": "The program exit with the code '{0}'".format(str(int(random.random() * 200)))
            }

            log_file.write("{0}\n".format(
                json.dumps(log_data, ensure_ascii=False)))
            log_file.flush()
            time.sleep(0.5)  # Sleep 500 ms


if __name__ == "__main__":
    log_path = "my-log.json"
    generator = threading.Thread(
        target=generate_test_json_log, args=(log_path,))
    generator.start()

    logtodata.log_to_db(log_path, db=None)

我假设日志文件如下所示：

{"level": "ERROR", "message": "The program exit with the code '181'"}
{"level": "WARNING", "message": "The program exit with the code '51'"}
{"level": "ERROR", "message": "The program exit with the code '69'"}

如果我的脚本格式不正确，我可以帮助您更新它

日志文件的示例是什么？它是一个有效的JSON值流（例如，

{…}{…}…

），还是一个单一的、不完整的值（

[{…}，{…}，{…}，

）？我猜服务每行发送一个json对象。其他一切都没有意义。你只需要编写一个程序，逐行读取文件，解析每行的json对象，然后用itPS做任何事情：让我补充一点，我个人觉得使用json处理日志消息是错误的，因为它效率非常低但是我知道现在有些人正在这样做。顺便说一句，你可能想看看。这个项目的目的是提供你想要的。日志文件的示例是什么样子的？它是一个有效的JSON值流（例如，

{…}{…}…

），还是一个单一的、不完整的值（

[{…}，{…}，

）？我猜服务每行发送一个json对象。其他一切都没有意义。你只需要编写一个程序，逐行读取文件，解析每行的json对象，然后用itPS做任何事情：让我补充一点，我个人觉得使用json处理日志消息是错误的，因为它效率非常低但我知道现在有些人正在这样做。顺便说一句，你可能想看看。该项目旨在提供你的想法。我没有仔细审查建议的解决方案中的代码，但基本想法是正确的。是否应该增强该功能，以记录最后解析的偏移量，并在重新启动时寻找该偏移量，这只是pe的问题问问题的人可以回答。我没有仔细检查提议的解决方案中的代码，但基本想法是正确的。是否应该对其进行增强，以记录最后解析的偏移量，并在重新启动时查找到该偏移量，只有问问题的人才能回答。