Python json文件生成器在每1000个元素之间添加`][`

Python json文件生成器在每1000个元素之间添加`][`,python,json,Python,Json,下面是我创建的一个函数,它随机生成n个json对象。然后它将这些对象分成1000个组,并将它们写入一个文件。我遇到的问题是,每次写入1000个组时,它都会添加][使json无效。我对python相当陌生,尝试过在每个元素上循环并执行。替换('][',',')),但这不起作用。任何帮助都将不胜感激 import random import json as json import sys import gzip import common.common_functions as common de

下面是我创建的一个函数,它随机生成
n
个json对象。然后它将这些对象分成1000个组,并将它们写入一个文件。我遇到的问题是,每次写入1000个组时,它都会添加
][
使json无效。我对python相当陌生,尝试过在每个元素上循环并执行
。替换('][',','))
,但这不起作用。任何帮助都将不胜感激

import random
import json as json
import sys
import gzip
import common.common_functions as common

def get_args(p_args):
    if "--num-records" in p_args:
        num_records = int(p_args[int(p_args.index('--num-records'))+1])
    else:
        print("number of records defaulting to 1000")
        num_records = 1000

    if "--num-groups" in p_args:
        num_groups = int(p_args[int(p_args.index('--num-groups'))+1])
    else:
        print("number of groups defaulting to 10")
        num_groups = 10

    if "--compress" in p_args:
        compression = True
    else:
        compression = False

    if "--file-name" in p_args:
        file_name = p_args[int(p_args.index('--file-name'))+1]
    else:
        print("file_name param must be submitted")
        exit(-1)

    return num_records, num_groups, file_name, compression


def main():

    num_records, num_groups, file_name, compression = get_args(sys.argv)

    if compression:
        write_mode = 'wt'
    else:
        write_mode = 'w'

    state, min_zip, max_zip = common.pick_state()

    groups = common.generate_groups(num_groups)

    fnames, lnames, mnames, dobs, zips, genders,  group_ids, coverages = [], [], [], [], [], [], [], []

    for record_num in (num + 1 for num in range(num_records)):

        fnames.append(common.pick_fname())
        mnames.append(common.gen_mi())
        lnames.append(common.pick_lname())
        dobs.append(common.gen_date_in_range("1925/01/01","2000/12/31","%Y/%m/%d","%Y/%m/%d"))
        zips.append(common.gen_zip(min_zip, max_zip))
        genders.append(common.gen_gender())
        group_ids.append(common.pick_group(groups))
        coverages.append(random.randint(1000,500000))

        sample_records = [{"fname": fname, "mname": mname, "lname": lname, "dob:": dob, "zip": zip, "gender": gender, "group_id": group_id, "coverage": coverage} \
                          for fname, mname, lname, dob, zip, gender, group_id, coverage in zip(fnames,mnames,lnames,dobs,zips,genders,group_ids,coverages)]

        if (record_num % 1000 == 0):
            if compression:
                with gzip.open(file_name, write_mode, encoding='ascii') as zipfile:
                    json.dump(sample_records, zipfile)
                if write_mode == 'wt':
                    write_mod = 'at'
            else:
                with open(file_name, write_mode) as json_file:
                    json.dump(sample_records, json_file)
                if write_mode == 'w':
                    write_mode = 'a'

            fnames, lnames, mnames, dobs, zips, genders,  group_ids, coverages = [], [], [], [], [], [], [], []

            print (record_num)

    if compression:
        with gzip.open(file_name, write_mode, encoding='ascii') as zipfile:
            json.dump(sample_records, zipfile)
    else:
         with open(file_name, write_mode) as json_file:
            json.dump(sample_records, json_file)


main()
以下是输出结果:

{
    "fname": "JAMES",
    "mname": "W",
    "lname": "KIRVIN",
    "dob:": "1937/04/25",
    "zip": "43650",
    "gender": "F",
    "group_id": "p28m80z8",
    "coverage": 264200
}, {
    "fname": "JUSTIN",
    "mname": "P",
    "lname": "REUTEBUCH",
    "dob:": "1941/02/09",
    "zip": "44254",
    "gender": "M",
    "group_id": "6x6olrgc",
    "coverage": 251417
}][{ <<<<<<<<<<<<<<<<<< Here is the issue
    "fname": "EMILY",
    "mname": "U",
    "lname": "STEEDLE",
    "dob:": "1948/12/30",
    "zip": "44524",
    "gender": "M",
    "group_id": "6x7d4c43",
    "coverage": 25278
}, {
{
“fname”:“JAMES”,
“mname”:“W”,
“lname”:“KIRVIN”,
“dob::“1937/04/25”,
“zip”:“43650”,
“性别”:“F”,
“组id”:“p28m80z8”,
“覆盖范围”:264200
}, {
“fname”:“JUSTIN”,
“mname”:“P”,
“lname”:“REUTEBUCH”,
“dob::“1941/02/09”,
“zip”:“44254”,
“性别”:“M”,
“组id”:“6x6olrgc”,
“覆盖范围”:251417
}][{解决这个问题的一种方法:

  • 将列表转储到临时json字符串中
  • 对于除第一批以外的所有批次:删除前导的
    [
  • 全部:删除尾部的
    ]
  • 在写入除第一批之外的所有内容之前,请在处理的json字符串之前向文件中写入
这将生成有效的json。根据您的代码:

import random
import json as json
import sys
import gzip
import common.common_functions as common

def get_args(p_args):
    if "--num-records" in p_args:
        num_records = int(p_args[int(p_args.index('--num-records'))+1])
    else:
        print("number of records defaulting to 1000")
        num_records = 1000

    if "--num-groups" in p_args:
        num_groups = int(p_args[int(p_args.index('--num-groups'))+1])
    else:
        print("number of groups defaulting to 10")
        num_groups = 10

    if "--compress" in p_args:
        compression = True
    else:
        compression = False

    if "--file-name" in p_args:
        file_name = p_args[int(p_args.index('--file-name'))+1]
    else:
        print("file_name param must be submitted")
        exit(-1)

    return num_records, num_groups, file_name, compression


def main():

    num_records, num_groups, file_name, compression = get_args(sys.argv)

    if compression:
        write_mode = 'wt'
    else:
        write_mode = 'w'

    state, min_zip, max_zip = common.pick_state()

    groups = common.generate_groups(num_groups)

    fnames, lnames, mnames, dobs, zips, genders,  group_ids, coverages = [], [], [], [], [], [], [], []

    for record_num in (num + 1 for num in range(num_records)):

        fnames.append(common.pick_fname())
        mnames.append(common.gen_mi())
        lnames.append(common.pick_lname())
        dobs.append(common.gen_date_in_range("1925/01/01","2000/12/31","%Y/%m/%d","%Y/%m/%d"))
        zips.append(common.gen_zip(min_zip, max_zip))
        genders.append(common.gen_gender())
        group_ids.append(common.pick_group(groups))
        coverages.append(random.randint(1000,500000))

        sample_records = [{"fname": fname, "mname": mname, "lname": lname, "dob:": dob, "zip": zip, "gender": gender, "group_id": group_id, "coverage": coverage} \
                          for fname, mname, lname, dob, zip, gender, group_id, coverage in zip(fnames,mnames,lnames,dobs,zips,genders,group_ids,coverages)]

        if (record_num % 1000 == 0):
            text = json.dumps(sample_records)
            text = text[:-1] # remove trailing ]
            if write_mode[0] == 'a':
                text = text[1:] # remove leading [
            if compression:
                with gzip.open(file_name, write_mode, encoding='ascii') as zipfile:
                    if write_mode[0] == 'a':
                        zipfile.write(',')
                    zipfile.write(text)
                if write_mode == 'wt':
                    write_mode = 'at'
            else:
                with open(file_name, write_mode) as json_file:
                    if write_mode[0] == 'a':
                        json_file.write(',')
                    json_file.write(text)
                if write_mode == 'w':
                    write_mode = 'a'

            fnames, lnames, mnames, dobs, zips, genders,  group_ids, coverages = [], [], [], [], [], [], [], []

            print (record_num)

    if sample_records:
        text = json.dumps(sample_records)
        text = text[:-1] # remove trailing ]
        if write_mode[0] == 'a':
            text = text[1:] # remove leading [

        if compression:
            with gzip.open(file_name, write_mode, encoding='ascii') as zipfile:
                if write_mode[0] == 'a':
                    zipfile.write(',')
                zipfile.write(text)
        else:
            with open(file_name, write_mode) as json_file:
                if write_mode[0] == 'a':
                    json_file.write(',')
                json_file.write(text)


    # closing "]"
    if compression:
        with gzip.open(file_name, write_mode, encoding='ascii') as zipfile:
            zipfile.write(']')
    else:
        with open(file_name, write_mode) as json_file:
            json_file.write(']')

main()

(未测试-我没有
common。common_函数
^^^)请注意,您的代码有一些代码气味。例如,您多次重复代码进行未压缩/压缩写入。并且您每轮都重新创建
示例_记录
。只需追加就足够了。还可以查找
写入模式
vs
写入模式
(你的版本中有一个输入错误,应该在我的版本中修复)。

你想要什么?我认为应该用
[
替换
如果你希望输出是有效的JSON,请将所有内容都放在一个列表中,然后立即保存。它不会添加任何内容。列表的JSON以
[
开头,以
结尾]
][
是一个列表的结尾,然后是下一个列表的开头。您应该创建一个二维列表,然后通过在末尾调用
json.dump()
来编写整个列表。