Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/variables/2.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python 2.7 在数据帧列表上循环并创建新的数据帧_Python 2.7_Pandas - Fatal编程技术网

Python 2.7 在数据帧列表上循环并创建新的数据帧

Python 2.7 在数据帧列表上循环并创建新的数据帧,python-2.7,pandas,Python 2.7,Pandas,我有一个数据帧列表: data_frames = [sort,sort1,sort2] def df_stats(data_frames): df = pd.DataFrame() for data in data_frames: df['Number'] = data.number.count() df["Total PVs"] = '{0:,.0f}'.format(data.PVs.sum()) df["Average"]

我有一个数据帧列表:

data_frames = [sort,sort1,sort2]
def df_stats(data_frames):
    df = pd.DataFrame()
    for data in data_frames:
        df['Number'] = data.number.count()
        df["Total PVs"] = '{0:,.0f}'.format(data.PVs.sum())
        df["Average"] = '{0:,.0f}'.format(data.PVs.mean())
        df["Median"] = '{0:,.0f}'.format(data.PVs.median())
    return df
我想对它们进行迭代,并将一些统计数据存储在新的
df
中。我觉得这很简单,但是下面的函数返回一个空数据帧
df_concat=df_stats(data_frames)
。我错过了什么?谢谢你的帮助

创建一个示例数据集:

import pandas as pd
data = {'number': [23,56,89], 'PVs': [23456, 34456, 6789]}
sort = pd.DataFrame.from_dict(data)
data1 = {'number': [28,52,12], 'PVs': [3423456, 2334456, 36789]}
sort1 = pd.DataFrame.from_dict(data1)
data2 = {'number': [123,5,86], 'PVs': [2345655, 934456, 16789]}
sort2 = pd.DataFrame.from_dict(data2)
迭代数据帧的函数:

data_frames = [sort,sort1,sort2]
def df_stats(data_frames):
    df = pd.DataFrame()
    for data in data_frames:
        df['Number'] = data.number.count()
        df["Total PVs"] = '{0:,.0f}'.format(data.PVs.sum())
        df["Average"] = '{0:,.0f}'.format(data.PVs.mean())
        df["Median"] = '{0:,.0f}'.format(data.PVs.median())
    return df

我们可以使用
pd.concat
+
groupby
而不是for循环

pd.concat(data_frames,keys=[1,2,3]).groupby(level=0).agg({'number':'count','PVs':['sum','mean','median']})
Out[1117]: 
  number      PVs                       
   count      sum          mean   median
1      3    64701  2.156700e+04    23456
2      3  5794701  1.931567e+06  2334456
3      3  3296900  1.098967e+06   934456
另外,如果您想使用您的函数,您可以将其修复为

df = pd.DataFrame()
for i,data in enumerate(data_frames):
    df.at[i,'Number'] = data.number.count()
    df.at[i,"Total PVs"] = '{0:,.0f}'.format(data.PVs.sum())
    df.at[i,"Average"] = '{0:,.0f}'.format(data.PVs.mean())
    df.at[i,"Median"] = '{0:,.0f}'.format(data.PVs.median())



df
Out[1121]: 
   Number  Total PVs    Average     Median
0     3.0     64,701     21,567     23,456
1     3.0  5,794,701  1,931,567  2,334,456
2     3.0  3,296,900  1,098,967    934,456

以下功能可以正常工作

dict_df ={'df1':sort1,'df':sort,'df2':sort2}


def df_stats(dict_df):
    df = pd.DataFrame(columns=['Number','Total PVs','Average','Median'],index=dict_df.keys())   
    for name,data in dict_df.items():
        df.loc[name,"Number"] = data.number.count()
        df.loc[name,"Total PVs"] = '{0:,.0f}'.format(data.PVs.sum())
        df.loc[name,"Average"] = '{0:,.0f}'.format(data.PVs.mean())
        df.loc[name,"Median"] = '{0:,.0f}'.format(data.PVs.median())
    return df
输出:

          Number    Total PVs   Average     Median
     df2    3      3,296,900    1,098,967   934,456
     df1    3      5,794,701    1,931,567   2,334,456
     df     3      64,701       21,567      23,456
试试这个:

''' Example  DataFrames '''
data1 = pd.DataFrame({'number': [23,56,89], 'PVs': [23456, 34456, 6789]}, 
columns=['number', 'PVs'])
data2 = pd.DataFrame({'number': [28,52,12], 'PVs': [3423456, 2334456, 36789]}, columns=['number', 'PVs'])
data3 = pd.DataFrame({'number': [123,5,86], 'PVs': [2345655, 934456, 16789]}, 
columns=['number', 'PVs'])


''' The function returning the stats '''
def df_stats(dataFrame):
    df = pd.DataFrame({}, columns=['Number', 'Total PVs', 'Average', 'Median'])
    df.loc['Number'] = dataFrame['number'].count()
    df["Total PVs"] = '{0:,.0f}'.format(dataFrame['PVs'].sum())
    df["Average"] = '{0:,.0f}'.format(dataFrame['PVs'].mean())
    df["Median"] = '{0:,.0f}'.format(dataFrame['PVs'].median())
    return df


''' Create a list of DataFrames to iterate over '''
data_frames = [data1, data2, data3]

''' Create an emmpty DataFrame so you can include it in pd.concat() '''
result = pd.DataFrame()


''' Iterate over DataFrame list and concatenate'''
for dataFrame in data_frames:
    tempDF = df_stats(dataFrame)
    result = pd.concat([result,tempDF], ignore_index=True)

result.head(3)
输出为:

    Number  Total PVs   Average    Median
0   3       64,701      21,567     23,456
1   3       5,794,701   1,931,567  2,334,456
2   3       3,296,900   1,098,967  934,456

精彩的!谢谢大家!@aviss yw~:-)编码快乐,节日快乐:-)