Python 复制数据帧并转换为Json

Python 复制数据帧并转换为Json,python,json,pandas,dataframe,data-science,Python,Json,Pandas,Dataframe,Data Science,基本上,我正在读取熊猫数据帧并将其转换为Json。我是一名编码初学者,但我知道最好使用apply函数,而不是iterrows(我已经尝试过使用apply函数,但在理解语法和找到解决方案方面出现了一些困难) =============================== 我从excel读取的数据 id label id_customer label_customer part_number number_customer product label_

基本上,我正在读取熊猫数据帧并将其转换为Json。我是一名编码初学者,但我知道最好使用
apply
函数,而不是
iterrows
(我已经尝试过使用
apply
函数,但在理解语法和找到解决方案方面出现了一些困难)

===============================

我从excel读取的数据

id     label        id_customer     label_customer    part_number   number_customer   product   label_product    key    country  value_product

6     Sao Paulo      CUST-99992         Brazil          982               10          sho1564       shoes       SH-99   Chile         1.5        

6     Sao Paulo      CUST-99992         Brazil          982               10          sn47282       sneakers    SN-71   Germany       43.8 

6     Sao Paulo      CUST-43535         Argentina       435               15          sk84393       skirt       SK-11   Netherlands   87.1  

92    Hong Hong      CUST-88888         China           785               58          ca40349       cap         CA-82   Russia        3.95
import pandas as pd 
import json

df = pd.read_excel(path)

result = []
for labels, df1 in df.groupby(['id', 'label'],sort=False):
    id_, label = labels
    record = {'id': int(id_), 'label': label, 'Customer': []}
    for inner_labels, df2 in df1.groupby(['id_customer', 'label_customer'],sort=False):
        id_,label = inner_labels
        record['Customer'].append({
            'id': id_,
            'label': label,
            'Number': [{'part': str(p), 'number_customer': str(s)} for p, s in zip(df2['part_number'], df2['number_customer'])]  
            })

    result.append(record)
[
 {
  "id": 6,
  "label": "Sao Paulo",
  "Customer": [
   {
    "id": "CUST-99992",
    "label": "Brazil",
    "Number": [
     {
      "part": "982",
      "number_customer": "10"
     },
     {
      "part": "982",
      "number_customer": "10"
     }
    ]
   },
   {
    "id": "CUST-43535",
    "label": "Argentina",
    "Number": [
     {
      "part": "435",
      "number_customer": "15"
     }
    ]
   }
  ]
 },
 {
  "id": 92,
  "label": "Hong Kong",
  "Customer": [
   {
    "id": "CUST-88888",
    "label": "China",
    "Number": [
     {
      "part": "785",
      "number_customer": "58"
     }
    ]
   }
  ]
 }
]
===============================

代码:

id     label        id_customer     label_customer    part_number   number_customer   product   label_product    key    country  value_product

6     Sao Paulo      CUST-99992         Brazil          982               10          sho1564       shoes       SH-99   Chile         1.5        

6     Sao Paulo      CUST-99992         Brazil          982               10          sn47282       sneakers    SN-71   Germany       43.8 

6     Sao Paulo      CUST-43535         Argentina       435               15          sk84393       skirt       SK-11   Netherlands   87.1  

92    Hong Hong      CUST-88888         China           785               58          ca40349       cap         CA-82   Russia        3.95
import pandas as pd 
import json

df = pd.read_excel(path)

result = []
for labels, df1 in df.groupby(['id', 'label'],sort=False):
    id_, label = labels
    record = {'id': int(id_), 'label': label, 'Customer': []}
    for inner_labels, df2 in df1.groupby(['id_customer', 'label_customer'],sort=False):
        id_,label = inner_labels
        record['Customer'].append({
            'id': id_,
            'label': label,
            'Number': [{'part': str(p), 'number_customer': str(s)} for p, s in zip(df2['part_number'], df2['number_customer'])]  
            })

    result.append(record)
[
 {
  "id": 6,
  "label": "Sao Paulo",
  "Customer": [
   {
    "id": "CUST-99992",
    "label": "Brazil",
    "Number": [
     {
      "part": "982",
      "number_customer": "10"
     },
     {
      "part": "982",
      "number_customer": "10"
     }
    ]
   },
   {
    "id": "CUST-43535",
    "label": "Argentina",
    "Number": [
     {
      "part": "435",
      "number_customer": "15"
     }
    ]
   }
  ]
 },
 {
  "id": 92,
  "label": "Hong Kong",
  "Customer": [
   {
    "id": "CUST-88888",
    "label": "China",
    "Number": [
     {
      "part": "785",
      "number_customer": "58"
     }
    ]
   }
  ]
 }
]
===============================

我得到的Json:

id     label        id_customer     label_customer    part_number   number_customer   product   label_product    key    country  value_product

6     Sao Paulo      CUST-99992         Brazil          982               10          sho1564       shoes       SH-99   Chile         1.5        

6     Sao Paulo      CUST-99992         Brazil          982               10          sn47282       sneakers    SN-71   Germany       43.8 

6     Sao Paulo      CUST-43535         Argentina       435               15          sk84393       skirt       SK-11   Netherlands   87.1  

92    Hong Hong      CUST-88888         China           785               58          ca40349       cap         CA-82   Russia        3.95
import pandas as pd 
import json

df = pd.read_excel(path)

result = []
for labels, df1 in df.groupby(['id', 'label'],sort=False):
    id_, label = labels
    record = {'id': int(id_), 'label': label, 'Customer': []}
    for inner_labels, df2 in df1.groupby(['id_customer', 'label_customer'],sort=False):
        id_,label = inner_labels
        record['Customer'].append({
            'id': id_,
            'label': label,
            'Number': [{'part': str(p), 'number_customer': str(s)} for p, s in zip(df2['part_number'], df2['number_customer'])]  
            })

    result.append(record)
[
 {
  "id": 6,
  "label": "Sao Paulo",
  "Customer": [
   {
    "id": "CUST-99992",
    "label": "Brazil",
    "Number": [
     {
      "part": "982",
      "number_customer": "10"
     },
     {
      "part": "982",
      "number_customer": "10"
     }
    ]
   },
   {
    "id": "CUST-43535",
    "label": "Argentina",
    "Number": [
     {
      "part": "435",
      "number_customer": "15"
     }
    ]
   }
  ]
 },
 {
  "id": 92,
  "label": "Hong Kong",
  "Customer": [
   {
    "id": "CUST-88888",
    "label": "China",
    "Number": [
     {
      "part": "785",
      "number_customer": "58"
     }
    ]
   }
  ]
 }
]
===============================

Json应为:

[
 {
  "id": 6,
  "label": "Sao Paulo",
  "Customer": [
   {
    "id": "CUST-99992",
    "label": "Brazil",
    "Number": [
     {
      "part": "982",
      "number_customer": "10",
      "Procucts": [
       {
        "product": "sho1564",
        "label_product": "shoes",
        "Order": [
        {
         "key": "SH-99",
         "country": "Chile",    
         "value_product": "1.5"
        }   
       ]            
     },
     {
        "product": "sn47282",
        "label_product": "sneakers",
        "Order": [
        {
         "key": "SN-71",
         "country": "Germany",  
         "value_product": "43.8"
        }   
       ] 
      }
      ]
     }
    ] 
   },
   {
    "id": "CUST-43535",
    "label": "Argentina",
    "Number": [
     {
      "part": "435",
      "number_customer": "15",
      "Procucts": [
       {
        "product": "sk84393",
        "label_product": "skirt",
        "Order": [
        {
         "key": "SK-11",
         "country": "Netherlands",  
         "value_product": "87.1"
        }   
       ]            
      }
      ]
     }
    ]
   }
  ]
 },
 {
  "id": 92,
  "label": "Hong Kong",
  "Customer": [
   {
    "id": "CUST-88888",
    "label": "China",
    "Number": [
     {
      "part": "785",
      "number_customer": "58",
      "Procucts": [
       {
        "product": "ca40349",
        "label_product": "cap",
        "Order": [
        {
         "key": "CA-82",
         "country": "Russia",   
         "value_product": "3.95"
        }   
       ]            
      }
      ]
     }
    ]
   }
  ]
 }
]
===============================

查看
id
label
是一组信息,即使
id\u customer
label customer
是另一组,
part\u number
number\u customer
是另一组,
产品
label\u产品
也是另一组,
国家
产品价值
另一个

我期望的Json取决于我的数据帧中的信息

有人能帮我什么忙吗?

希望这有用

import pandas as pd 
import json

df = pd.read_excel(path)

result = []
for labels, df1 in df.groupby(['id', 'label'], sort=False):
    id_, label = labels
    record = {'id': int(id_), 'label': label, 'Customer': []}
    for inner_labels, df2 in df1.groupby(['id_customer', 'label_customer'], sort=False):
        id_, label = inner_labels
        customer = {'id': id_, 'label': label, 'Number': []}
        for inner_labels, df3 in df2.groupby(['part_number', 'number_customer'], sort=False):
            p, s = inner_labels
            number = {'part': str(p), 'number_customer': str(s), 'Products': []}
            for inner_labels, df4 in df3.groupby(['product', 'label_product'], sort=False):
                p, lp = inner_labels
                product = {'product': p, 'label_product': lp, 'Order': []}
                for k, c, v in zip(df4['key'], df4['country'], df4['value_product']):
                    product['Order'].append({'key': k, 'country': c, 'value_product': v})
                number['Products'].append(product)
            customer['Number'].append(number)
        record['Customer'].append(customer)
    result.append(record)
from io import StringIO
import pandas as pd
import json

csv = """id,label,id_customer,label_customer,part_number,number_customer,product,label_product,key,country,value_product
6,Sao Paulo,CUST-99992,Brazil,982,10,sho1564,shoes,SH-99,Chile,1.5
6,Sao Paulo,CUST-99992,Brazil,982,10,sn47282,sneakers,SN-71,Germany,43.8
6,Sao Paulo,CUST-43535,Argentina,435,15,sk84393,skirt,SK-11,Netherlands,87.1
92,Hong Hong,CUST-88888,China,785,58,ca40349,cap,CA-82,Russia,3.95"""
csv = StringIO(csv)

df = pd.read_csv(csv)

def split(df, groupby, json_func):
    for x, group in df.groupby(groupby):
        yield json_func(group, *x)

a = list(split(df, ['id', 'label'], lambda grp, id_, label: {"id": id_, "label": label, "Customer": list(
    split(grp, ['id_customer', 'label_customer'], lambda grp_1, id_cust, label_cust: {"id": id_cust, "label": label_cust, "Number": list(
        split(grp_1, ['part_number', 'number_customer'], lambda grp_2, part, num_cust: {"part": part, "number_customer": num_cust, "Products": list(
            split(grp_2, ['product', 'label_product'], lambda grp_3, product, label_product: {"product": product, "label_product": label_product, "Order": list(
                split(grp_3, ['key', 'country', 'value_product'], lambda _, key, country, value_product: {"key": key, "country": country, "value_product": value_product}))}
            ))})      
)}))}))

display(a)

预期的JSON有点奇怪-其中'Number'是一个只有一个对象的列表,这可能有多个对象吗?是@lan..'“编号”列表可以有多个对象。。这一切都取决于我正在阅读的数据框内的内容。。