Python 重塑数据帧以在嵌套dict中导出

   Category Area               Country Code Function Last Name     LanID  Spend1  Spend2  Spend3  Spend4  Spend5
0      Bisc   EE                  RU02,UA02       Mk     Smith    df3432     1.0     NaN     NaN     NaN     NaN
1      Bisc   EE                       RU02       Mk      Bibs    fdss34     1.0     NaN     NaN     NaN     NaN
2      Bisc   EE               UA02,EURASIA       Mk      Crow   fdsdr43     1.0     NaN     NaN     NaN     NaN
3      Bisc   WE                       FR31       Mk     Ellis   fdssdf3     1.0     NaN     NaN     NaN     NaN
4      Bisc   WE                  BE32,NL31       Mk     Mower   TOZ1720     1.0     NaN     NaN     NaN     NaN
5      Bisc   WE             FR31,BE32,NL31      LKU      Elan   SKY8851     1.0     1.0     1.0     1.0     1.0
6      Bisc   SE                       IT31       Mk    Bobret    3dfsfg     1.0     NaN     NaN     NaN     NaN
7      Bisc   SE                       GR31       Mk   Concept  MOSGX009     1.0     NaN     NaN     NaN     NaN
8      Bisc   SE   RU02,IT31,GR31,PT31,ES31      LKU     Solar   MSS5723     1.0     1.0     1.0     1.0     1.0
9      Bisc   SE        IT31,GR31,PT31,ES31       Mk      Brix    fdgd22     NaN     1.0     NaN     NaN     NaN
10     Choc   CE   RU02,CZ31,SK31,PL31,LT31      Fin    Ocoser    43233d     NaN     1.0     NaN     NaN     NaN
11     Choc   CE        DE31,AT31,HU31,CH31      Fin     Smuth     4rewf     NaN     1.0     NaN     NaN     NaN
12     Choc   CE              BG31,RO31,EMA      Fin    Momocs    hgghg2     NaN     1.0     NaN     NaN     NaN
13     Choc   WE             FR31,BE32,NL31      Fin   Bruntly    ffdd32     NaN     NaN     NaN     NaN     1.0
14     Choc   WE             FR31,BE32,NL31       Mk      Ofer  BROGX011     NaN     1.0     1.0     NaN     NaN
15     Choc   WE             FR31,BE32,NL31       Mk       Hem   NZJ3189     NaN     NaN     NaN     1.0     1.0
16      G&C   NE                  UA02,SE31       Mk       Cre   ORY9499     1.0     NaN     NaN     NaN     NaN
17      G&C   NE                       NO31       Mk      Qlyo   XVM7639     1.0     NaN     NaN     NaN     NaN
18      G&C   NE   GB31,NO31,SE31,IE31,FI31       Mk      Omny   LOX1512     NaN     1.0     1.0     NaN     NaN

    {RU02:  {Bisc:  {EE:    {Mkt:   {Spend1:    {df3432:    Smith}
                                                {fdss34:     Bibs}
            {Bisc:  {SE:    {LKU:   {Spend1:    {MSS5723:   Solar}
                                    {Spend2:    {MSS5723:   Solar}
                                    {Spend3:    {MSS5723:   Solar}
                                    {Spend4:    {MSS5723:   Solar}
                                    {Spend5:    {MSS5723:   Solar}
            {Choc:  {CE:    {Fin:   {Spend2:    {43233d:   Ocoser}

    {UA02:  {Bisc:  {EE:    {Mkt:   {Spend1:    {df3432:    Smith}
                                                {ffdsdr43:   Crow}
            {G&C:   {NE:    {Mkt:   {Spend1:    {ORY9499:     Cre}




  • 基于“,”分隔符:完成”对“国家/地区代码”列的内容进行切片的方法
  • 基于唯一的国家/地区代码创建新列,并在预设该列代码的每行中设置1个:完成
  • 将DataFrame的索引递归地设置为每个新添加的列
  • 将有数据的每个国家/地区代码的每行移到新的数据框中
  • 将所有新数据帧导出到DICT,然后合并它们
  • 但是,我不确定步骤3-6是否是进行此操作的最佳方式,因为我仍然难以理解如何为我的案例配置pd.DataFrame.to_dict(如果可能的话)



    #keeping track of initial order of columns
    initialOrder = list(df.columns.values)
    # split the Country Code by ","
    CCodeNoCommas= [item for items in df['Country Code'].values for item in items.split(",")]
    # add only the UNIQUE Country Codes -via set- as new columns in the DataFrame,
    #with NaN for row values
    df = pd.concat([df,pd.DataFrame(columns=list(set(CCodeNoCommas)))])
    # reordering columns to have the newly added ones at the end
    reordered = initialOrder + [c for c in df.columns if c not in initialOrder]
    df = df[reordered]
    # replace NaN with 1 in the newly added columns (Country Codes), where the same Country code
    # exists in the initial column "Country Code"; do this for each row
    CCodeUniqueOnly = set(CCodeNoCommas)
    for c in CCodeUniqueOnly:   
        CCodeIsPresent_rowIndex = df.index[df['Country Code'].str.contains(c)]
        #print (CCodeIsPresent_rowIndex)
        df.loc[CCodeIsPresent_rowIndex, c] = 1
    # no clue what do do next ??



    df['Country Code'] = df['Country Code'].str.split(',')


    spend_cols = ['Spend1', 'Spend2', 'Spend3', 'Spend4', 'Spend5']
    df = df.groupby('Country Code') \
        .apply(lambda g: g.join(pd.DataFrame(g[spend_cols].stack()) \
        .reset_index(level=1)['level_1'])) \

    def recur_dictify(frame):
        if len(frame.columns) == 1:
            if frame.values.size == 1: return frame.values[0][0]
            return frame.values.squeeze()
        grouped = frame.groupby(frame.columns[0])
        d = {k: recur_dictify(g.ix[:,1:]) for k,g in grouped}
        return d

    cols = ['Country Code', 'Category', 'Area', 'Function', 'level_1', 'LanID', 'Last Name']
    d = recur_dictify(df[cols])


    df['Country Code'] = df['Country Code'].str.split(',')
    s = df.apply(lambda x: pd.Series(x['Country Code']),axis=1) \
        .stack().reset_index(level=1, drop=True) = 'Country Code'
    df = df.drop('Country Code', axis=1).join(s).reset_index(drop=True)
    spend_cols = ['Spend1', 'Spend2', 'Spend3', 'Spend4', 'Spend5']
    df = df.groupby('Country Code') \
        .apply(lambda g: g.join(pd.DataFrame(g[spend_cols].stack()) \
        .reset_index(level=1)['level_1'])) \
    def recur_dictify(frame):
        if len(frame.columns) == 1:
            if frame.values.size == 1: return frame.values[0][0]
            return frame.values.squeeze()
        grouped = frame.groupby(frame.columns[0])
        d = {k: recur_dictify(g.ix[:,1:]) for k,g in grouped}
        return d
    cols = ['Country Code', 'Category', 'Area', 'Function', 'level_1', 'LanID', 'Last Name']
    d = recur_dictify(df[cols])

    df['Country Code'] = df['Country Code'].str.split(',')
    s = df.apply(lambda x: pd.Series(x['Country Code']),axis=1) \
        .stack().reset_index(level=1, drop=True) = 'Country Code'
    df = df.drop('Country Code', axis=1).join(s).reset_index(drop=True)
    spend_cols = ['Spend1', 'Spend2', 'Spend3', 'Spend4', 'Spend5']
    df = df.groupby('Country Code') \
        .apply(lambda g: g.join(pd.DataFrame(g[spend_cols].stack()) \
        .reset_index(level=1)['level_1'])) \
    def recur_dictify(frame):
        if len(frame.columns) == 1:
            if frame.values.size == 1: return frame.values[0][0]
            return frame.values.squeeze()
        grouped = frame.groupby(frame.columns[0])
        d = {k: recur_dictify(g.ix[:,1:]) for k,g in grouped}
        return d
    cols = ['Country Code', 'Category', 'Area', 'Function', 'level_1', 'LanID', 'Last Name']
    d = recur_dictify(df[cols])