Python 从多个数据帧中提取数据

Python 从多个数据帧中提取数据,python,json,pandas,Python,Json,Pandas,我有下面的json文件,我想从更新的数据中重新创建它 {"AAL": {"year": [2012, 2013, 2014, 2015], "eps": [-5.6, -11.25, 4.02, 11.39], "revenue": [24855.0, 26743.0, 42650.0, 40990.0], "op_revenue": [148.0, 1399.0, 4249.0, 6204.0]}, "AAP": {"year":

我有下面的json文件,我想从更新的数据中重新创建它

{"AAL": {"year": [2012, 2013, 2014, 2015], 
         "eps": [-5.6, -11.25, 4.02, 11.39], 
         "revenue": [24855.0, 26743.0, 42650.0, 40990.0], 
         "op_revenue": [148.0, 1399.0, 4249.0, 6204.0]}, 
 "AAP": {"year": [2012, 2013, 2014, 2015], 
         "eps": [5.29, 5.36, 6.75, 6.45], 
         "revenue": [6205.003000000001, 6493.814, 9843.860999999999, 9737.018], 
         "op_revenue": [657.315, 660.318, 851.71, 825.78]}, 
 "AAPL": {"year": [2013, 2014, 2015, 2016], 
          "eps": [40.03, 6.49, 9.28, 8.35], 
          "revenue": [171000.0, 183000.0, 234000.0, 216000.0], 
          "op_revenue": [48999.0, 52503.0, 71230.0, 60024.0]}
 ...}
我的数据来自三个表eps,revenue,op_revenue,它们的形状完全相同。下面是其中一个表的前几行,第一列标题是ticker,其余列是years

  ticker      2012        2013        2014       2015      2016       2017       2018
1      A 938000000   724000000   740000000  713000000 692000000  504000000  381000000
2    AAL 431000000 -1833000000 -1012000000 -752000000 -99000000 2499000000 2951000000
3    AAN 134624000   120666000   108005000   90656000  78813000   78233000   89137000
4   AAOI    390000     -131000      -46000    1873000   3060000    4283000    3523000
5   AAON  37359000    37547000    40229000   39473000  41391000   44158000   42735000
6    AAP 407546000   391758000   417694000  440311000 458658000  493825000  494211000

如何重新创建json文件?

考虑将每个数据帧从宽到长进行融合,即不使用年份数据值作为元素,然后连接并最终在ticker+上运行groupby:

为所有三个数据集输出重复OP发布的数据


重新创建JSON文件是什么意思?创建一个与现有JSON文件形状相同但包含新数据的JSON文件
df_dict = {'eps': eps, 'revenue': revenue, 'op_revenue': op_revenue}

# MELTING WIDE TO LONG
new_df_dict = {k:(pd.melt(v, id_vars = "ticker", var_name = "year", value_name = k)
                    .set_index(["ticker", "year"])
                 ) for k,v in df_dict.items()}

# HORIZONTAL CONCATENATING
final_df = (pd.concat(new_df_dict, axis="columns")
              .sort_index()
              .reset_index()
           )

final_df.columns = final_df.columns.get_level_values(0)

# TICKER GROUPBY DICTIONARY
final_dict = {i: g.drop(columns='ticker').to_dict(orient='list') \
                for i,g in final_df.groupby('ticker')}

# OUTPUT TO JSON
with open('Output.json', 'w') as f:
    f.write(json.dumps(final_dict, indent=3))
{
   "A": {
      "year": [
         "2012",
         "2013",
         "2014",
         "2015",
         "2016",
         "2017",
         "2018"
      ],
      "eps": [
         938000000,
         724000000,
         740000000,
         713000000,
         692000000,
         504000000,
         381000000
      ],
      "op_revenue": [
         938000000,
         724000000,
         740000000,
         713000000,
         692000000,
         504000000,
         381000000
      ],
      "revenue": [
         938000000,
         724000000,
         740000000,
         713000000,
         692000000,
         504000000,
         381000000
      ]
   },
   "AAL": {
      "year": [
         "2012",
         "2013",
         "2014",
         "2015",
         "2016",
         "2017",
         "2018"
      ],
      "eps": [
         431000000,
         -1833000000,
         -1012000000,
         -752000000,
         -99000000,
         2499000000,
         2951000000
      ],
      "op_revenue": [
         431000000,
         -1833000000,
         -1012000000,
         -752000000,
         -99000000,
         2499000000,
         2951000000
      ],
      "revenue": [
         431000000,
         -1833000000,
         -1012000000,
         -752000000,
         -99000000,
         2499000000,
         2951000000
      ]
   },
   "AAN": {
      "year": [
         "2012",
         "2013",
         "2014",
         "2015",
         "2016",
         "2017",
         "2018"
      ],
      "eps": [
         134624000,
         120666000,
         108005000,
         90656000,
         78813000,
         78233000,
         89137000
      ],
      "op_revenue": [
         134624000,
         120666000,
         108005000,
         90656000,
         78813000,
         78233000,
         89137000
      ],
      "revenue": [
         134624000,
         120666000,
         108005000,
         90656000,
         78813000,
         78233000,
         89137000
      ]
   },
   "AAOI": {
      "year": [
         "2012",
         "2013",
         "2014",
         "2015",
         "2016",
         "2017",
         "2018"
      ],
      "eps": [
         390000,
         -131000,
         -46000,
         1873000,
         3060000,
         4283000,
         3523000
      ],
      "op_revenue": [
         390000,
         -131000,
         -46000,
         1873000,
         3060000,
         4283000,
         3523000
      ],
      "revenue": [
         390000,
         -131000,
         -46000,
         1873000,
         3060000,
         4283000,
         3523000
      ]
   },
   "AAON": {
      "year": [
         "2012",
         "2013",
         "2014",
         "2015",
         "2016",
         "2017",
         "2018"
      ],
      "eps": [
         37359000,
         37547000,
         40229000,
         39473000,
         41391000,
         44158000,
         42735000
      ],
      "op_revenue": [
         37359000,
         37547000,
         40229000,
         39473000,
         41391000,
         44158000,
         42735000
      ],
      "revenue": [
         37359000,
         37547000,
         40229000,
         39473000,
         41391000,
         44158000,
         42735000
      ]
   },
   "AAP": {
      "year": [
         "2012",
         "2013",
         "2014",
         "2015",
         "2016",
         "2017",
         "2018"
      ],
      "eps": [
         407546000,
         391758000,
         417694000,
         440311000,
         458658000,
         493825000,
         494211000
      ],
      "op_revenue": [
         407546000,
         391758000,
         417694000,
         440311000,
         458658000,
         493825000,
         494211000
      ],
      "revenue": [
         407546000,
         391758000,
         417694000,
         440311000,
         458658000,
         493825000,
         494211000
      ]
   }
}