Python 如何使用loc方法在数据帧中设置列？_Python_Pandas

Python 如何使用loc方法在数据帧中设置列？

python pandas

Python 如何使用loc方法在数据帧中设置列？,python,pandas,Python,Pandas,如何使用loc方法在数据帧中设置列？我在下面发布了示例代码。我不断得到一个索引错误 pandas.core.index.IndexingError：（切片（无，无，无）， [‘重量’]）我不确定你想做什么，但是考虑把你的最后一行改成火狐。如果你想把所有的权重都改为1，LOC [，“权重”]＝1＜/c>。您可以这样做： df.loc[df['company'] == 'Firefox', 'weightsum'] = 1 df 输出： survey_id person questio

如何使用loc方法在数据帧中设置列？我在下面发布了示例代码。我不断得到一个索引错误

pandas.core.index.IndexingError：（切片（无，无，无）， [‘重量’]）

我不确定你想做什么，但是考虑把你的最后一行改成<代码>火狐。如果你想把所有的权重都改为1，LOC [，“权重”]＝1＜/c>。您可以这样做：

df.loc[df['company'] == 'Firefox', 'weightsum'] = 1
df

输出：

    survey_id person question_num question_buckets question_short  company  rating  weight  weightsum
0         101     Ty           Q1      performance         timely    Opera       4   0.120        NaN
1         101     Ty           Q2      performance      diversity    Opera       5   0.215        NaN
2         101     Ty           Q3      performance      knowledge    Opera       3   0.200        NaN
3         101     Ty           Q4      performance   transparency    Opera       3   0.150        NaN
4         101     Ty           Q5             cost      budgeting    Opera       3   0.135        NaN
5         101     Ty           Q6             cost            ROI    Opera       3   0.180        NaN
6         102     Bo           Q1      performance         timely  Firefox       4   0.120        1.0
7         102     Bo           Q2      performance      diversity  Firefox       5   0.215        1.0
8         102     Bo           Q3      performance      knowledge  Firefox       5   0.200        1.0
9         102     Bo           Q4      performance   transparency  Firefox       4   0.150        1.0
10        102     Bo           Q5             cost      budgeting  Firefox       5   0.135        1.0
11        102     Bo           Q6             cost            ROI  Firefox       5   0.180        1.0
12        103     Al           Q1      performance         timely   Safari       1   0.120        NaN
13        103     Al           Q2      performance      diversity   Safari       4   0.215        NaN
14        103     Al           Q3      performance      knowledge   Safari       1   0.200        NaN
15        103     Al           Q4      performance   transparency   Safari       2   0.150        NaN
16        103     Al           Q5             cost      budgeting   Safari       1   0.135        NaN
17        103     Al           Q6             cost            ROI   Safari       2   0.180        NaN
18        104     Bo           Q1      performance         timely   Chrome       1   0.120        NaN
19        104     Bo           Q2      performance      diversity   Chrome       5   0.215        NaN
20        104     Bo           Q3      performance      knowledge   Chrome       1   0.200        NaN
21        104     Bo           Q4      performance   transparency   Chrome       5   0.150        NaN
22        104     Bo           Q5             cost      budgeting   Chrome       1   0.135        NaN
23        104     Bo           Q6             cost            ROI   Chrome       5   0.180        NaN

这是肯定的

import pandas as pd
data = {"survey_id": [101, 101, 101, 101, 101, 101,
                      102, 102, 102, 102, 102, 102,
                      103, 103, 103, 103, 103, 103,
                      104, 104, 104, 104, 104, 104],
        "person": ['Ty', 'Ty', 'Ty', 'Ty', 'Ty', 'Ty',
                   'Bo', 'Bo', 'Bo', 'Bo', 'Bo', 'Bo',
                   'Al', 'Al', 'Al', 'Al', 'Al', 'Al',
                   'Bo', 'Bo', 'Bo', 'Bo', 'Bo', 'Bo'],
        'question_num': ['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6',
                         'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6',
                         'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6',
                         'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6'],
        'question_buckets': ['performance', 'performance', 'performance', 'performance', 'cost', 'cost',
                             'performance', 'performance', 'performance', 'performance', 'cost', 'cost',
                             'performance', 'performance', 'performance', 'performance', 'cost', 'cost',
                             'performance', 'performance', 'performance', 'performance', 'cost', 'cost'],
        'question_short': ['timely', 'diversity', 'knowledge', 'transparency', 'budgeting', 'ROI',
                           'timely', 'diversity', 'knowledge', 'transparency', 'budgeting', 'ROI',
                           'timely', 'diversity', 'knowledge', 'transparency', 'budgeting', 'ROI',
                           'timely', 'diversity', 'knowledge', 'transparency', 'budgeting', 'ROI'],
        'company': ['Opera', 'Opera', 'Opera', 'Opera', 'Opera', 'Opera',
                    'Firefox', 'Firefox', 'Firefox', 'Firefox', 'Firefox', 'Firefox',
                    'Safari', 'Safari', 'Safari', 'Safari', 'Safari', 'Safari',
                    'Chrome', 'Chrome', 'Chrome', 'Chrome', 'Chrome', 'Chrome'],
        "rating": [4, 5, 3, 3, 3, 3,
                   4, 5, 5, 4, 5, 5,
                   1, 4, 1, 2, 1, 2,
                   1, 5, 1, 5, 1, 5],
        'weight': [.12, .215, .2, .15, .135, .18,
                   .12, .215, .2, .15, .135, .18,
                   .12, .215, .2, .15, .135, .18,
                   .12, .215, .2, .15, .135, .18]
        }
df = pd.DataFrame(data)
def CalcNewRatings(row):
    return row['rating'] * row['weight']

PersonGroup = df.groupby('person')

def calc(name):
    name.loc[:,'weightsum'] = 1
    name['ratingtimesweight'] = name.apply(CalcNewRatings, axis=1)
    name['totalrating'] = name['ratingtimesweight'].sum(axis=0,skipna = True)
    ms=name.groupby(['company']).groups.keys()

    for comp in ms:
        name.loc[name['company']== comp ,'totalrating']=name.loc[name['company']== comp ,'ratingtimesweight'].sum()
    name['finalgrade'] = name['totalrating'].div(name['weightsum'])

    return name

newData=pd.DataFrame()
for perso in PersonGroup.groups.keys():
    someone = PersonGroup.get_group(perso)
    newData=newData.append(calc(someone), ignore_index=True )

print(newData)

期望的输出是什么？期望的输出是firefox中的每一行['weightsum']=1您应该查看数据框中是否存在该列。如果是这样，firefox.loc[：，'weightsum']=1应该可以工作。您正在从一列（weightsum）中调用一列（weight），所以它当然会给您一个错误

import pandas as pd
data = {"survey_id": [101, 101, 101, 101, 101, 101,
                      102, 102, 102, 102, 102, 102,
                      103, 103, 103, 103, 103, 103,
                      104, 104, 104, 104, 104, 104],
        "person": ['Ty', 'Ty', 'Ty', 'Ty', 'Ty', 'Ty',
                   'Bo', 'Bo', 'Bo', 'Bo', 'Bo', 'Bo',
                   'Al', 'Al', 'Al', 'Al', 'Al', 'Al',
                   'Bo', 'Bo', 'Bo', 'Bo', 'Bo', 'Bo'],
        'question_num': ['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6',
                         'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6',
                         'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6',
                         'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6'],
        'question_buckets': ['performance', 'performance', 'performance', 'performance', 'cost', 'cost',
                             'performance', 'performance', 'performance', 'performance', 'cost', 'cost',
                             'performance', 'performance', 'performance', 'performance', 'cost', 'cost',
                             'performance', 'performance', 'performance', 'performance', 'cost', 'cost'],
        'question_short': ['timely', 'diversity', 'knowledge', 'transparency', 'budgeting', 'ROI',
                           'timely', 'diversity', 'knowledge', 'transparency', 'budgeting', 'ROI',
                           'timely', 'diversity', 'knowledge', 'transparency', 'budgeting', 'ROI',
                           'timely', 'diversity', 'knowledge', 'transparency', 'budgeting', 'ROI'],
        'company': ['Opera', 'Opera', 'Opera', 'Opera', 'Opera', 'Opera',
                    'Firefox', 'Firefox', 'Firefox', 'Firefox', 'Firefox', 'Firefox',
                    'Safari', 'Safari', 'Safari', 'Safari', 'Safari', 'Safari',
                    'Chrome', 'Chrome', 'Chrome', 'Chrome', 'Chrome', 'Chrome'],
        "rating": [4, 5, 3, 3, 3, 3,
                   4, 5, 5, 4, 5, 5,
                   1, 4, 1, 2, 1, 2,
                   1, 5, 1, 5, 1, 5],
        'weight': [.12, .215, .2, .15, .135, .18,
                   .12, .215, .2, .15, .135, .18,
                   .12, .215, .2, .15, .135, .18,
                   .12, .215, .2, .15, .135, .18]
        }
df = pd.DataFrame(data)
def CalcNewRatings(row):
    return row['rating'] * row['weight']

PersonGroup = df.groupby('person')

def calc(name):
    name.loc[:,'weightsum'] = 1
    name['ratingtimesweight'] = name.apply(CalcNewRatings, axis=1)
    name['totalrating'] = name['ratingtimesweight'].sum(axis=0,skipna = True)
    ms=name.groupby(['company']).groups.keys()

    for comp in ms:
        name.loc[name['company']== comp ,'totalrating']=name.loc[name['company']== comp ,'ratingtimesweight'].sum()
    name['finalgrade'] = name['totalrating'].div(name['weightsum'])

    return name

newData=pd.DataFrame()
for perso in PersonGroup.groups.keys():
    someone = PersonGroup.get_group(perso)
    newData=newData.append(calc(someone), ignore_index=True )

print(newData)