使用Pandas将嵌套CSV转换为嵌套JSON
我有一个这样的数据帧使用Pandas将嵌套CSV转换为嵌套JSON,json,python-3.x,pandas,dataframe,csv,Json,Python 3.x,Pandas,Dataframe,Csv,我有一个这样的数据帧 org.iden.account,org.iden.id,adress.city,adress.country,person.name.fullname,person.gender,person.birthYear,subs.id,subs.subs1.birthday,subs.subs1.org.address.country,subs.subs1.org.address.strret1,subs.org.buyer.email.address,subs.org.buy
org.iden.account,org.iden.id,adress.city,adress.country,person.name.fullname,person.gender,person.birthYear,subs.id,subs.subs1.birthday,subs.subs1.org.address.country,subs.subs1.org.address.strret1,subs.org.buyer.email.address,subs.org.buyer.phone.number
account123,id123,riga,latvia,laura,female,1990,subs123,1990-12-14T00:00:00Z,latvia,street 1,email1@myorg.com|email2@sanoma.com,+371401234567
account123,id000,riga,latvia,laura,female,1990,subs456,1990-12-14T00:00:00Z,latvia,street 1,email1@myorg.com,+371401234567
account123,id456,riga,latvia,laura,female,1990,subs789,1990-12-14T00:00:00Z,latvia,street 1,email1@myorg.com,+371401234567
json_data = [
{
"org.iden.account": "account123",
"org.iden.id": "id123",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs123",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1@myorg.com|email2@sanoma.com",
"subs.org.buyer.phone.number": 371401234567
},
{
"org.iden.account": "account123",
"org.iden.id": "id000",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs456",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1@myorg.com",
"subs.org.buyer.phone.number": 371407654321
},
{
"org.iden.account": "account123",
"org.iden.id": "id456",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs789",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1@myorg.com",
"subs.org.buyer.phone.number": 371407654321
}
]
我需要将其转换为一个嵌套的JSON,该JSON基于以点(.)分隔的列。因此,对于第一行,预期结果应为
{
"org": {
"iden": {
"account": "account123",
"id": "id123"
}
},
"address": {
"city": "riga",
"country": "country"
},
"person": {
"name": {
"fullname": laura,
},
"gender": "female",
"birthYear": 1990
},
"subs": {
"id": "subs123",
"subs1": {
"birthday": "1990-12-14T00:00:00Z",
"org": {
"address": {
"country": "latvia",
"street1": "street 1"
}
}
},
"org": {
"buyer": {
"email": {
"address": "email1@myorg.com|email2@sanoma.com"
},
"phone": {
"number": "+371401234567"
}
}
}
}
}
当然,所有的记录都是一个列表。我曾尝试使用simple pandas.to_json()
[{"org.iden.account":"account123","org.iden.id":"id123","adress.city":"riga","adress.country":"latvia","person.name.fullname":"laura","person.gender":"female","person.birthYear":1990,"subs.id":"subs123","subs.subs1.birthday":"1990-12-14T00:00:00Z","subs.subs1.org.address.country":"latvia","subs.subs1.org.address.strret1":"street 1","subs.org.buyer.email.address":"email1@myorg.com|email2@sanoma.com","subs.org.buyer.phone.number":371401234567},{"org.iden.account":"account123","org.iden.id":"id000","adress.city":"riga","adress.country":"latvia","person.name.fullname":"laura","person.gender":"female","person.birthYear":1990,"subs.id":"subs456","subs.subs1.birthday":"1990-12-14T00:00:00Z","subs.subs1.org.address.country":"latvia","subs.subs1.org.address.strret1":"street 1","subs.org.buyer.email.address":"email1@myorg.com","subs.org.buyer.phone.number":371407654321},{"org.iden.account":"account123","org.iden.id":"id456","adress.city":"riga","adress.country":"latvia","person.name.fullname":"laura","person.gender":"female","person.birthYear":1990,"subs.id":"subs789","subs.subs1.birthday":"1990-12-14T00:00:00Z","subs.subs1.org.address.country":"latvia","subs.subs1.org.address.strret1":"street 1","subs.org.buyer.email.address":"email1@myorg.com","subs.org.buyer.phone.number":371407654321}]
在此方面的任何帮助都将不胜感激
def df_to_json(row):
tree = {}
for item in row.index:
t = tree
for part in item.split('.'):
prev, t = t, t.setdefault(part, {})
prev[part] = row[item]
return tree
假设您的json
结构如下所示
org.iden.account,org.iden.id,adress.city,adress.country,person.name.fullname,person.gender,person.birthYear,subs.id,subs.subs1.birthday,subs.subs1.org.address.country,subs.subs1.org.address.strret1,subs.org.buyer.email.address,subs.org.buyer.phone.number
account123,id123,riga,latvia,laura,female,1990,subs123,1990-12-14T00:00:00Z,latvia,street 1,email1@myorg.com|email2@sanoma.com,+371401234567
account123,id000,riga,latvia,laura,female,1990,subs456,1990-12-14T00:00:00Z,latvia,street 1,email1@myorg.com,+371401234567
account123,id456,riga,latvia,laura,female,1990,subs789,1990-12-14T00:00:00Z,latvia,street 1,email1@myorg.com,+371401234567
json_data = [
{
"org.iden.account": "account123",
"org.iden.id": "id123",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs123",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1@myorg.com|email2@sanoma.com",
"subs.org.buyer.phone.number": 371401234567
},
{
"org.iden.account": "account123",
"org.iden.id": "id000",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs456",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1@myorg.com",
"subs.org.buyer.phone.number": 371407654321
},
{
"org.iden.account": "account123",
"org.iden.id": "id456",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs789",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1@myorg.com",
"subs.org.buyer.phone.number": 371407654321
}
]
您可以在dict
的基础上嵌套它
def nestify(unnested):
nested = dict()
for k, v in unnested.items():
current_dict = nested
parts = k.split('.')
for i in parts[:-1]:
if i not in current_dict:
current_dict[i] = dict()
current_dict = current_dict[i]
current_dict[parts[-1]] = v
return nested
此函数获取一个未列出的dict
s,遍历键并将值指定给最终深度
评论版
def nestify(unnested):
# this will be our return value
nested = dict()
for k, v in unnested.items():
# current_dict is the current dict were operating on
# gets reset to the base dict on each unnested key
current_dict = nested
parts = k.split('.')
# only create dicts up to the final period
# for example, current_dict is the base
# and creates an empty dict under the org key
# then current_dict is under the org key
# and creates an empty dict under the iden key
# then current_dict is under the iden key
for i in parts[:-1]:
# no reason to create an empty dict if it was
# already created for a prior key
if i not in current_dict:
current_dict[i] = dict()
current_dict = current_dict[i]
# assign the value of the unnested dict
# to each final current_dict
# for example, the final part of the first key is "account"
# so rather than assign an empty dict, assign it "account123"
current_dict[parts[-1]] = v
return nested
然后,您可以在理解中对json_data
列表的每个元素调用它
nested = [nestify(i) for i in json_data]
完整代码:
json_data = [
{
"org.iden.account": "account123",
"org.iden.id": "id123",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs123",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1@myorg.com|email2@sanoma.com",
"subs.org.buyer.phone.number": 371401234567
},
{
"org.iden.account": "account123",
"org.iden.id": "id000",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs456",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1@myorg.com",
"subs.org.buyer.phone.number": 371407654321
},
{
"org.iden.account": "account123",
"org.iden.id": "id456",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs789",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1@myorg.com",
"subs.org.buyer.phone.number": 371407654321
}
]
def nestify(unnested):
nested = dict()
for k, v in unnested.items():
current_dict = nested
parts = k.split('.')
for i in parts[:-1]:
if i not in current_dict:
current_dict[i] = dict()
current_dict = current_dict[i]
current_dict[parts[-1]] = v
return nested
nested = [nestify(i) for i in json_data]
print(nested)
输出:
[
{
'adress': {
'city': 'riga',
'country': 'latvia'
},
'org': {
'iden': {
'account': 'account123',
'id': 'id123'
}
},
'person': {
'birthYear': 1990,
'gender': 'female',
'name': {
'fullname': 'laura'
}
},
'subs': {
'id': 'subs123',
'org': {
'buyer': {
'email': {
'address': 'email1@myorg.com|email2@sanoma.com'
},
'phone': {
'number': 371401234567
}
}
},
'subs1': {
'birthday': '1990-12-14T00:00:00Z',
'org': {
'address': {
'country': 'latvia',
'strret1': 'street 1'
}
}
}
}
},
{
'adress': {
'city': 'riga',
'country': 'latvia'
},
'org': {
'iden': {
'account': 'account123',
'id': 'id000'
}
},
'person': {
'birthYear': 1990,
'gender': 'female',
'name': {
'fullname': 'laura'
}
},
'subs': {
'id': 'subs456',
'org': {
'buyer': {
'email': {
'address': 'email1@myorg.com'
},
'phone': {
'number': 371407654321
}
}
},
'subs1': {
'birthday': '1990-12-14T00:00:00Z',
'org': {
'address': {
'country': 'latvia',
'strret1': 'street 1'
}
}
}
}
},
{
'adress': {
'city': 'riga',
'country': 'latvia'
},
'org': {
'iden': {
'account': 'account123',
'id': 'id456'
}
},
'person': {
'birthYear': 1990,
'gender': 'female',
'name': {
'fullname': 'laura'
}
},
'subs': {
'id': 'subs789',
'org': {
'buyer': {
'email': {
'address': 'email1@myorg.com'
},
'phone': {
'number': 371407654321
}
}
},
'subs1': {
'birthday': '1990-12-14T00:00:00Z',
'org': {
'address': {
'country': 'latvia',
'strret1': 'street 1'
}
}
}
}
}
]
您是否可以使用直接处理json
数据而不是通过pandas
的解决方案?@Axe319当然可以。Pandas实际上在数据帧上提供了很多灵活性,但其他解决方案肯定是受欢迎的