Python json文件生成器在每1000个元素之间添加`][`
下面是我创建的一个函数,它随机生成Python json文件生成器在每1000个元素之间添加`][`,python,json,Python,Json,下面是我创建的一个函数,它随机生成n个json对象。然后它将这些对象分成1000个组,并将它们写入一个文件。我遇到的问题是,每次写入1000个组时,它都会添加][使json无效。我对python相当陌生,尝试过在每个元素上循环并执行。替换('][',',')),但这不起作用。任何帮助都将不胜感激 import random import json as json import sys import gzip import common.common_functions as common de
n
个json对象。然后它将这些对象分成1000个组,并将它们写入一个文件。我遇到的问题是,每次写入1000个组时,它都会添加][
使json无效。我对python相当陌生,尝试过在每个元素上循环并执行。替换('][',','))
,但这不起作用。任何帮助都将不胜感激
import random
import json as json
import sys
import gzip
import common.common_functions as common
def get_args(p_args):
if "--num-records" in p_args:
num_records = int(p_args[int(p_args.index('--num-records'))+1])
else:
print("number of records defaulting to 1000")
num_records = 1000
if "--num-groups" in p_args:
num_groups = int(p_args[int(p_args.index('--num-groups'))+1])
else:
print("number of groups defaulting to 10")
num_groups = 10
if "--compress" in p_args:
compression = True
else:
compression = False
if "--file-name" in p_args:
file_name = p_args[int(p_args.index('--file-name'))+1]
else:
print("file_name param must be submitted")
exit(-1)
return num_records, num_groups, file_name, compression
def main():
num_records, num_groups, file_name, compression = get_args(sys.argv)
if compression:
write_mode = 'wt'
else:
write_mode = 'w'
state, min_zip, max_zip = common.pick_state()
groups = common.generate_groups(num_groups)
fnames, lnames, mnames, dobs, zips, genders, group_ids, coverages = [], [], [], [], [], [], [], []
for record_num in (num + 1 for num in range(num_records)):
fnames.append(common.pick_fname())
mnames.append(common.gen_mi())
lnames.append(common.pick_lname())
dobs.append(common.gen_date_in_range("1925/01/01","2000/12/31","%Y/%m/%d","%Y/%m/%d"))
zips.append(common.gen_zip(min_zip, max_zip))
genders.append(common.gen_gender())
group_ids.append(common.pick_group(groups))
coverages.append(random.randint(1000,500000))
sample_records = [{"fname": fname, "mname": mname, "lname": lname, "dob:": dob, "zip": zip, "gender": gender, "group_id": group_id, "coverage": coverage} \
for fname, mname, lname, dob, zip, gender, group_id, coverage in zip(fnames,mnames,lnames,dobs,zips,genders,group_ids,coverages)]
if (record_num % 1000 == 0):
if compression:
with gzip.open(file_name, write_mode, encoding='ascii') as zipfile:
json.dump(sample_records, zipfile)
if write_mode == 'wt':
write_mod = 'at'
else:
with open(file_name, write_mode) as json_file:
json.dump(sample_records, json_file)
if write_mode == 'w':
write_mode = 'a'
fnames, lnames, mnames, dobs, zips, genders, group_ids, coverages = [], [], [], [], [], [], [], []
print (record_num)
if compression:
with gzip.open(file_name, write_mode, encoding='ascii') as zipfile:
json.dump(sample_records, zipfile)
else:
with open(file_name, write_mode) as json_file:
json.dump(sample_records, json_file)
main()
以下是输出结果:
{
"fname": "JAMES",
"mname": "W",
"lname": "KIRVIN",
"dob:": "1937/04/25",
"zip": "43650",
"gender": "F",
"group_id": "p28m80z8",
"coverage": 264200
}, {
"fname": "JUSTIN",
"mname": "P",
"lname": "REUTEBUCH",
"dob:": "1941/02/09",
"zip": "44254",
"gender": "M",
"group_id": "6x6olrgc",
"coverage": 251417
}][{ <<<<<<<<<<<<<<<<<< Here is the issue
"fname": "EMILY",
"mname": "U",
"lname": "STEEDLE",
"dob:": "1948/12/30",
"zip": "44524",
"gender": "M",
"group_id": "6x7d4c43",
"coverage": 25278
}, {
{
“fname”:“JAMES”,
“mname”:“W”,
“lname”:“KIRVIN”,
“dob::“1937/04/25”,
“zip”:“43650”,
“性别”:“F”,
“组id”:“p28m80z8”,
“覆盖范围”:264200
}, {
“fname”:“JUSTIN”,
“mname”:“P”,
“lname”:“REUTEBUCH”,
“dob::“1941/02/09”,
“zip”:“44254”,
“性别”:“M”,
“组id”:“6x6olrgc”,
“覆盖范围”:251417
}][{解决这个问题的一种方法:
- 将列表转储到临时json字符串中
- 对于除第一批以外的所有批次:删除前导的
[
- 全部:删除尾部的
]
- 在写入除第一批之外的所有内容之前,请在处理的json字符串之前向文件中写入
,
这将生成有效的json。根据您的代码:
import random
import json as json
import sys
import gzip
import common.common_functions as common
def get_args(p_args):
if "--num-records" in p_args:
num_records = int(p_args[int(p_args.index('--num-records'))+1])
else:
print("number of records defaulting to 1000")
num_records = 1000
if "--num-groups" in p_args:
num_groups = int(p_args[int(p_args.index('--num-groups'))+1])
else:
print("number of groups defaulting to 10")
num_groups = 10
if "--compress" in p_args:
compression = True
else:
compression = False
if "--file-name" in p_args:
file_name = p_args[int(p_args.index('--file-name'))+1]
else:
print("file_name param must be submitted")
exit(-1)
return num_records, num_groups, file_name, compression
def main():
num_records, num_groups, file_name, compression = get_args(sys.argv)
if compression:
write_mode = 'wt'
else:
write_mode = 'w'
state, min_zip, max_zip = common.pick_state()
groups = common.generate_groups(num_groups)
fnames, lnames, mnames, dobs, zips, genders, group_ids, coverages = [], [], [], [], [], [], [], []
for record_num in (num + 1 for num in range(num_records)):
fnames.append(common.pick_fname())
mnames.append(common.gen_mi())
lnames.append(common.pick_lname())
dobs.append(common.gen_date_in_range("1925/01/01","2000/12/31","%Y/%m/%d","%Y/%m/%d"))
zips.append(common.gen_zip(min_zip, max_zip))
genders.append(common.gen_gender())
group_ids.append(common.pick_group(groups))
coverages.append(random.randint(1000,500000))
sample_records = [{"fname": fname, "mname": mname, "lname": lname, "dob:": dob, "zip": zip, "gender": gender, "group_id": group_id, "coverage": coverage} \
for fname, mname, lname, dob, zip, gender, group_id, coverage in zip(fnames,mnames,lnames,dobs,zips,genders,group_ids,coverages)]
if (record_num % 1000 == 0):
text = json.dumps(sample_records)
text = text[:-1] # remove trailing ]
if write_mode[0] == 'a':
text = text[1:] # remove leading [
if compression:
with gzip.open(file_name, write_mode, encoding='ascii') as zipfile:
if write_mode[0] == 'a':
zipfile.write(',')
zipfile.write(text)
if write_mode == 'wt':
write_mode = 'at'
else:
with open(file_name, write_mode) as json_file:
if write_mode[0] == 'a':
json_file.write(',')
json_file.write(text)
if write_mode == 'w':
write_mode = 'a'
fnames, lnames, mnames, dobs, zips, genders, group_ids, coverages = [], [], [], [], [], [], [], []
print (record_num)
if sample_records:
text = json.dumps(sample_records)
text = text[:-1] # remove trailing ]
if write_mode[0] == 'a':
text = text[1:] # remove leading [
if compression:
with gzip.open(file_name, write_mode, encoding='ascii') as zipfile:
if write_mode[0] == 'a':
zipfile.write(',')
zipfile.write(text)
else:
with open(file_name, write_mode) as json_file:
if write_mode[0] == 'a':
json_file.write(',')
json_file.write(text)
# closing "]"
if compression:
with gzip.open(file_name, write_mode, encoding='ascii') as zipfile:
zipfile.write(']')
else:
with open(file_name, write_mode) as json_file:
json_file.write(']')
main()
(未测试-我没有common。common_函数
^^^)请注意,您的代码有一些代码气味。例如,您多次重复代码进行未压缩/压缩写入。并且您每轮都重新创建示例_记录
。只需追加就足够了。还可以查找写入模式
vs写入模式
(你的版本中有一个输入错误,应该在我的版本中修复)。你想要什么?我认为应该用[
替换,
如果你希望输出是有效的JSON,请将所有内容都放在一个列表中,然后立即保存。它不会添加任何内容。列表的JSON以[
开头,以结尾]
][
是一个列表的结尾,然后是下一个列表的开头。您应该创建一个二维列表,然后通过在末尾调用json.dump()
来编写整个列表。