如何解析用python实时编写的JSON文件?
我正在使用一个linux服务,该服务在/var/log中生成JSON格式的日志。日志文件几乎在不断增加。实际上,我正在使用的服务没有任何数据库连接器或包装器,无法使日志直接进入数据库,因此我必须使用自己的服务进行解析和发送 哪种方法是不断解析文件并将新行上传到数据库的最佳方法 补充:我不想使用任何与麋鹿堆相关的东西如何解析用python实时编写的JSON文件?,python,linux,parsing,Python,Linux,Parsing,我正在使用一个linux服务,该服务在/var/log中生成JSON格式的日志。日志文件几乎在不断增加。实际上,我正在使用的服务没有任何数据库连接器或包装器,无法使日志直接进入数据库,因此我必须使用自己的服务进行解析和发送 哪种方法是不断解析文件并将新行上传到数据库的最佳方法 补充:我不想使用任何与麋鹿堆相关的东西 谢谢 要读取文件,就像执行tail命令一样,我需要编写一个小脚本: logtodb.py import json import os import time def tail(s
谢谢 要读取文件,就像执行
tail
命令一样,我需要编写一个小脚本:
logtodb.py
import json
import os
import time
def tail(stream_file):
""" Read a file like the Unix command `tail`. Code from https://stackoverflow.com/questions/44895527/reading-infinite-stream-tail """
stream_file.seek(0, os.SEEK_END) # Go to the end of file
while True:
if stream_file.closed:
raise StopIteration
line = stream_file.readline()
yield line
def log_to_db(log_path, db):
""" Read log (JSON format) and insert data in db """
with open(log_path, "r") as log_file:
for line in tail(log_file):
try:
log_data = json.loads(line)
except ValueError:
# Bad json format, maybe corrupted...
continue # Read next line
# Do what you want with data:
# db.execute("INSERT INTO ...", log_data["level"], ...)
print(log_data["message"])
import random
import json
import time
import threading
import logtodata
def generate_test_json_log(log_path):
with open(log_path, "w") as log_file:
while True:
log_data = {
"level": "ERROR" if random.random() > 0.5 else "WARNING",
"message": "The program exit with the code '{0}'".format(str(int(random.random() * 200)))
}
log_file.write("{0}\n".format(
json.dumps(log_data, ensure_ascii=False)))
log_file.flush()
time.sleep(0.5) # Sleep 500 ms
if __name__ == "__main__":
log_path = "my-log.json"
generator = threading.Thread(
target=generate_test_json_log, args=(log_path,))
generator.start()
logtodata.log_to_db(log_path, db=None)
和一个测试文件:
test\u logtodb.py
import json
import os
import time
def tail(stream_file):
""" Read a file like the Unix command `tail`. Code from https://stackoverflow.com/questions/44895527/reading-infinite-stream-tail """
stream_file.seek(0, os.SEEK_END) # Go to the end of file
while True:
if stream_file.closed:
raise StopIteration
line = stream_file.readline()
yield line
def log_to_db(log_path, db):
""" Read log (JSON format) and insert data in db """
with open(log_path, "r") as log_file:
for line in tail(log_file):
try:
log_data = json.loads(line)
except ValueError:
# Bad json format, maybe corrupted...
continue # Read next line
# Do what you want with data:
# db.execute("INSERT INTO ...", log_data["level"], ...)
print(log_data["message"])
import random
import json
import time
import threading
import logtodata
def generate_test_json_log(log_path):
with open(log_path, "w") as log_file:
while True:
log_data = {
"level": "ERROR" if random.random() > 0.5 else "WARNING",
"message": "The program exit with the code '{0}'".format(str(int(random.random() * 200)))
}
log_file.write("{0}\n".format(
json.dumps(log_data, ensure_ascii=False)))
log_file.flush()
time.sleep(0.5) # Sleep 500 ms
if __name__ == "__main__":
log_path = "my-log.json"
generator = threading.Thread(
target=generate_test_json_log, args=(log_path,))
generator.start()
logtodata.log_to_db(log_path, db=None)
我假设日志文件如下所示:
{"level": "ERROR", "message": "The program exit with the code '181'"}
{"level": "WARNING", "message": "The program exit with the code '51'"}
{"level": "ERROR", "message": "The program exit with the code '69'"}
如果我的脚本格式不正确,我可以帮助您更新脚本。要读取文件,就像执行
tail
命令一样,我需要执行一个小脚本:
logtodb.py
import json
import os
import time
def tail(stream_file):
""" Read a file like the Unix command `tail`. Code from https://stackoverflow.com/questions/44895527/reading-infinite-stream-tail """
stream_file.seek(0, os.SEEK_END) # Go to the end of file
while True:
if stream_file.closed:
raise StopIteration
line = stream_file.readline()
yield line
def log_to_db(log_path, db):
""" Read log (JSON format) and insert data in db """
with open(log_path, "r") as log_file:
for line in tail(log_file):
try:
log_data = json.loads(line)
except ValueError:
# Bad json format, maybe corrupted...
continue # Read next line
# Do what you want with data:
# db.execute("INSERT INTO ...", log_data["level"], ...)
print(log_data["message"])
import random
import json
import time
import threading
import logtodata
def generate_test_json_log(log_path):
with open(log_path, "w") as log_file:
while True:
log_data = {
"level": "ERROR" if random.random() > 0.5 else "WARNING",
"message": "The program exit with the code '{0}'".format(str(int(random.random() * 200)))
}
log_file.write("{0}\n".format(
json.dumps(log_data, ensure_ascii=False)))
log_file.flush()
time.sleep(0.5) # Sleep 500 ms
if __name__ == "__main__":
log_path = "my-log.json"
generator = threading.Thread(
target=generate_test_json_log, args=(log_path,))
generator.start()
logtodata.log_to_db(log_path, db=None)
和一个测试文件:
test\u logtodb.py
import json
import os
import time
def tail(stream_file):
""" Read a file like the Unix command `tail`. Code from https://stackoverflow.com/questions/44895527/reading-infinite-stream-tail """
stream_file.seek(0, os.SEEK_END) # Go to the end of file
while True:
if stream_file.closed:
raise StopIteration
line = stream_file.readline()
yield line
def log_to_db(log_path, db):
""" Read log (JSON format) and insert data in db """
with open(log_path, "r") as log_file:
for line in tail(log_file):
try:
log_data = json.loads(line)
except ValueError:
# Bad json format, maybe corrupted...
continue # Read next line
# Do what you want with data:
# db.execute("INSERT INTO ...", log_data["level"], ...)
print(log_data["message"])
import random
import json
import time
import threading
import logtodata
def generate_test_json_log(log_path):
with open(log_path, "w") as log_file:
while True:
log_data = {
"level": "ERROR" if random.random() > 0.5 else "WARNING",
"message": "The program exit with the code '{0}'".format(str(int(random.random() * 200)))
}
log_file.write("{0}\n".format(
json.dumps(log_data, ensure_ascii=False)))
log_file.flush()
time.sleep(0.5) # Sleep 500 ms
if __name__ == "__main__":
log_path = "my-log.json"
generator = threading.Thread(
target=generate_test_json_log, args=(log_path,))
generator.start()
logtodata.log_to_db(log_path, db=None)
我假设日志文件如下所示:
{"level": "ERROR", "message": "The program exit with the code '181'"}
{"level": "WARNING", "message": "The program exit with the code '51'"}
{"level": "ERROR", "message": "The program exit with the code '69'"}
如果我的脚本格式不正确,我可以帮助您更新它日志文件的示例是什么?它是一个有效的JSON值流(例如,
{…}{…}…
),还是一个单一的、不完整的值([{…},{…},{…},
)?我猜服务每行发送一个json对象。其他一切都没有意义。你只需要编写一个程序,逐行读取文件,解析每行的json对象,然后用itPS做任何事情:让我补充一点,我个人觉得使用json处理日志消息是错误的,因为它效率非常低但是我知道现在有些人正在这样做。顺便说一句,你可能想看看。这个项目的目的是提供你想要的。日志文件的示例是什么样子的?它是一个有效的JSON值流(例如,{…}{…}…
),还是一个单一的、不完整的值([{…},{…},
)?我猜服务每行发送一个json对象。其他一切都没有意义。你只需要编写一个程序,逐行读取文件,解析每行的json对象,然后用itPS做任何事情:让我补充一点,我个人觉得使用json处理日志消息是错误的,因为它效率非常低但我知道现在有些人正在这样做。顺便说一句,你可能想看看。该项目旨在提供你的想法。我没有仔细审查建议的解决方案中的代码,但基本想法是正确的。是否应该增强该功能,以记录最后解析的偏移量,并在重新启动时寻找该偏移量,这只是pe的问题问问题的人可以回答。我没有仔细检查提议的解决方案中的代码,但基本想法是正确的。是否应该对其进行增强,以记录最后解析的偏移量,并在重新启动时查找到该偏移量,只有问问题的人才能回答。