Python 基于其他列中的值将列添加到数据帧

Python 基于其他列中的值将列添加到数据帧,python,pandas,dataframe,lambda,Python,Pandas,Dataframe,Lambda,我想根据其中一列中的值向DataFrame添加一列 import pandas as pd import numpy as np Records = 100 df = pd.DataFrame ( {'ID' : range(1, Records + 1), 'Group' : np.random.choice(range(1, 41), Records, replace = True) } ) def Age(x):

我想根据其中一列中的值向DataFrame添加一列

import pandas as pd
import numpy as np

Records = 100

df = pd.DataFrame (
        {'ID' : range(1, Records + 1),
         'Group' : np.random.choice(range(1, 41), Records, replace = True)
         }
        )

def Age(x):
    a = list()
    for i in x:
        if (i >= 14 and i <= 20) or (i >= 34 and i <= 40):
            a.append('65+')
        else:
            a.append('65-')
    return a

df['Age'] = Age(df.Group)

print(df.head(10))

    Group  ID  Age
0      11   1  65-
1       1   2  65-
2       6   3  65-
3      32   4  65-
4      31   5  65-
5      39   6  65+
6      26   7  65-
7      38   8  65+
8      26   9  65-
9      31  10  65-
将熊猫作为pd导入
将numpy作为np导入
记录=100
df=pd.DataFrame(
{'ID':范围(1,记录+1),
“组”:np.random.choice(范围(1,41),Records,replace=True)
}
)
def年龄(x):
a=列表()
对于x中的i:
如果(i>=14,i=34,i使用非常快速的矢量化函数:

m = ((df['Group'] >= 14) & (df['Group'] <= 20)) | ((df['Group'] >= 34) & (df['Group'] <= 40))
df['new'] = np.where(m, '65+','65-')
print (df)
   Group  ID  Age  new
0     11   1  65-  65-
1      1   2  65-  65-
2      6   3  65-  65-
3     32   4  65-  65-
4     31   5  65-  65-
5     39   6  65+  65+
6     26   7  65-  65-
7     38   8  65+  65+
8     26   9  65-  65-
9     31  10  65-  65-
m=((df['Group']>=14)和(df['Group']=34)和(df['Group']=14)和(df['Group']=34)以及(df['Group']=14和x=34和x使用非常快速的矢量化函数:

m = ((df['Group'] >= 14) & (df['Group'] <= 20)) | ((df['Group'] >= 34) & (df['Group'] <= 40))
df['new'] = np.where(m, '65+','65-')
print (df)
   Group  ID  Age  new
0     11   1  65-  65-
1      1   2  65-  65-
2      6   3  65-  65-
3     32   4  65-  65-
4     31   5  65-  65-
5     39   6  65+  65+
6     26   7  65-  65-
7     38   8  65+  65+
8     26   9  65-  65-
9     31  10  65-  65-

m=((df['Group']>=14)和(df['Group']=34)和(df['Group']=14)和(df['Group']=34)以及(df['Group']=14和x=34和x应用于
df.Group
系列

Records = 100

df = pd.DataFrame (
        {'ID' : range(1, Records + 1),
         'Group' : np.random.choice(range(1, 41), Records, replace = True)
         }
        )

#Here is the apply:
df['Age'] = df['Group'].apply(lambda x: '65+' if ((x >= 14 and x <= 20) or
                                                  (x >= 34 and x <= 40)) else '65-')
print(df.head())

应用于
df.Group
系列

Records = 100

df = pd.DataFrame (
        {'ID' : range(1, Records + 1),
         'Group' : np.random.choice(range(1, 41), Records, replace = True)
         }
        )

#Here is the apply:
df['Age'] = df['Group'].apply(lambda x: '65+' if ((x >= 14 and x <= 20) or
                                                  (x >= 34 and x <= 40)) else '65-')
print(df.head())

选项1
重新考虑条件。
请注意,两个间隔都是宽度
6

请注意,间隔之间的中点是
27

cats = np.array(['65-', '65+'])
cond = df.Group.sub(27).abs().pipe(lambda x: x.ge(7) & x.le(13)).astype(int)
df.assign(Age=cats[cond])

   Group  ID  Age
0     11   1  65-
1      1   2  65-
2      6   3  65-
3     32   4  65-
4     31   5  65-
5     39   6  65+
6     26   7  65-
7     38   8  65+
8     26   9  65-
9     31  10  65-

我们可以通过使用底层数组来加快速度

cats = np.array(['65-', '65+'])
arr1 = np.abs(df.Group.values - 27)
cond = ((arr1 >= 7) & (arr1 <= 13)).astype(int)
df.assign(Age=cats[cond])

   Group  ID  Age
0     11   1  65-
1      1   2  65-
2      6   3  65-
3     32   4  65-
4     31   5  65-
5     39   6  65+
6     26   7  65-
7     38   8  65+
8     26   9  65-
9     31  10  65-

选项1
重新考虑条件。
请注意,两个间隔都是宽度
6

请注意,间隔之间的中点是
27

cats = np.array(['65-', '65+'])
cond = df.Group.sub(27).abs().pipe(lambda x: x.ge(7) & x.le(13)).astype(int)
df.assign(Age=cats[cond])

   Group  ID  Age
0     11   1  65-
1      1   2  65-
2      6   3  65-
3     32   4  65-
4     31   5  65-
5     39   6  65+
6     26   7  65-
7     38   8  65+
8     26   9  65-
9     31  10  65-

我们可以通过使用底层数组来加快速度

cats = np.array(['65-', '65+'])
arr1 = np.abs(df.Group.values - 27)
cond = ((arr1 >= 7) & (arr1 <= 13)).astype(int)
df.assign(Age=cats[cond])

   Group  ID  Age
0     11   1  65-
1      1   2  65-
2      6   3  65-
3     32   4  65-
4     31   5  65-
5     39   6  65+
6     26   7  65-
7     38   8  65+
8     26   9  65-
9     31  10  65-