Python 具有逻辑功能的多索引和掩码
我有4个索引。门、loc、geo和block。我需要创建用于操作的遮罩,以便创建遮罩并执行如下操作:Python 具有逻辑功能的多索引和掩码,python,pandas,indexing,division,multi-index,Python,Pandas,Indexing,Division,Multi Index,我有4个索引。门、loc、geo和block。我需要创建用于操作的遮罩,以便创建遮罩并执行如下操作: data1 data2 mun loc geo block 0 0 0 0 12 12 1 0 0 0 20 20 1 1 0 0 10 10 1 1 1 0 10 10 1 1
data1 data2
mun loc geo block
0 0 0 0 12 12
1 0 0 0 20 20
1 1 0 0 10 10
1 1 1 0 10 10
1 1 1 1 3 3/4
1 1 1 2 4 4/4
1 1 2 0 30 30
1 1 2 1 1 1/3
1 1 2 2 3 3/3
1 1 0 0 4 4
1 2 1 1 10 10/12
1 2 1 2 12 12/12
2 0 0 0 60 60
2 1 1 1 123 123/123
2 1 1 2 7 7/123
2 1 2 1 6 6/6
2 1 2 2 1 1/6
也就是说,取层次结构中的最大值,并将每个元素除以它。关于第一个问题,我得到了帮助,但是我在掌握多索引方面遇到了很多问题。任何帮助我都将不胜感激。这不容易。但主要用于选择条件值: 级别
块
:
print (df)
data1 data2
mun loc geo block
0 0 0 0 12 12
1 0 0 0 20 20
1 0 0 10 10
1 0 10 10
1 3 3/4
2 4 4/4
2 0 30 30
1 1 1/3
2 3 3/3
0 0 4 4
2 1 1 10 10/12
2 12 12/12
2 0 0 0 60 60
1 1 1 123 123/123
2 7 7/123
2 1 6 6/6
2 1 1/6
mask3 = (df.index.get_level_values('mun') != 0) & \
(df.index.get_level_values('loc') != 0 ) & \
(df.index.get_level_values('geo') != 0) & \
(df.index.get_level_values('block') != 0 )
print (mask3)
[False False False False True True False True True False True True
False True True True True]
df2 = df.ix[mask3, 'data1'].groupby(level=['mun','loc','geo']).max()
#print (df2)
df2 = df2.reindex(df.reset_index(level=3, drop=True).index).mask(~mask3).fillna(1)
#print (df2)
print (df)
data1 data2
mun loc geo block
0 0 0 0 12 12
1 0 0 0 20 20
1 0 0 10 10
1 0 10 10/30
1 4 4
2 0 30 30/30
2 1 0 2 2/3
2 0 3 3/3
3 0 1 1/3
2 0 0 0 60 60
1 1 0 12 12/88
1 1 1
2 0 88 88/88
1 9 9
df1 = df.reset_index(drop=True, level='block')
mask3 = (df.index.get_level_values('mun') != 0) & \
(df.index.get_level_values('loc') != 0 ) & \
(df.index.get_level_values('geo') != 0) & \
(df.index.get_level_values('block') == 0 )
print (mask3)
[False False False True False True True True True False True False
True False]
df2 = df1.ix[mask3, 'data1'].groupby(level=['mun','loc']).max()
df2=df2.reindex(df.reset_index(level=['geo','block'], drop=True).index).mask(~mask3).fillna(1)
print (df2)
df['new'] = df['data1'].div(df2.values,axis=0)
print (df)
data1 data2
mun loc geo block
0 0 0 0 14 14
1 0 0 0 12 12
1 0 0 20 20/20
1 0 10 10
1 31 31
2 0 0 15 15/20
1 1 11 11
2 0 0 0 80 80
1 0 0 100 100/100
1 2 7 7
2 0 0 11 11/100
df1 = df.reset_index(drop=True, level=['block', 'geo'])
mask3 = (df.index.get_level_values('mun') != 0) & \
(df.index.get_level_values('loc') != 0 ) & \
(df.index.get_level_values('geo') == 0) & \
(df.index.get_level_values('block') == 0 )
print (mask3)
[False False True False False True False False True False True]
df2 = df1.ix[mask3, 'data1'].groupby(level=['mun']).max()
#print (df2)
df2 =df2.reindex(df.reset_index(level=['geo','block', 'loc'], drop=True).index).mask(~mask3).fillna(1)
#print (df2)
print (df)
data1 data2
mun loc geo block
0 0 0 0 55 55
1 0 0 0 70 70/70
1 0 0 12 12
1 0 13 13
2 0 0 0 60 60/70
1 1 1 12 12
2 1 6 6
3 0 0 0 12 12/70
mask3 = (df.index.get_level_values('mun') != 0) & \
(df.index.get_level_values('loc') == 0 ) & \
(df.index.get_level_values('geo') == 0) & \
(df.index.get_level_values('block') == 0 )
print (mask3)
[False True False False True False False True]
df2 = df.ix[mask3, 'data1'].max()
#print (df2)
df2 = pd.Series(df2, index=df.index).mask(~mask3).fillna(1)
#print (df2)
标高
geo
:
print (df)
data1 data2
mun loc geo block
0 0 0 0 12 12
1 0 0 0 20 20
1 0 0 10 10
1 0 10 10
1 3 3/4
2 4 4/4
2 0 30 30
1 1 1/3
2 3 3/3
0 0 4 4
2 1 1 10 10/12
2 12 12/12
2 0 0 0 60 60
1 1 1 123 123/123
2 7 7/123
2 1 6 6/6
2 1 1/6
mask3 = (df.index.get_level_values('mun') != 0) & \
(df.index.get_level_values('loc') != 0 ) & \
(df.index.get_level_values('geo') != 0) & \
(df.index.get_level_values('block') != 0 )
print (mask3)
[False False False False True True False True True False True True
False True True True True]
df2 = df.ix[mask3, 'data1'].groupby(level=['mun','loc','geo']).max()
#print (df2)
df2 = df2.reindex(df.reset_index(level=3, drop=True).index).mask(~mask3).fillna(1)
#print (df2)
print (df)
data1 data2
mun loc geo block
0 0 0 0 12 12
1 0 0 0 20 20
1 0 0 10 10
1 0 10 10/30
1 4 4
2 0 30 30/30
2 1 0 2 2/3
2 0 3 3/3
3 0 1 1/3
2 0 0 0 60 60
1 1 0 12 12/88
1 1 1
2 0 88 88/88
1 9 9
df1 = df.reset_index(drop=True, level='block')
mask3 = (df.index.get_level_values('mun') != 0) & \
(df.index.get_level_values('loc') != 0 ) & \
(df.index.get_level_values('geo') != 0) & \
(df.index.get_level_values('block') == 0 )
print (mask3)
[False False False True False True True True True False True False
True False]
df2 = df1.ix[mask3, 'data1'].groupby(level=['mun','loc']).max()
df2=df2.reindex(df.reset_index(level=['geo','block'], drop=True).index).mask(~mask3).fillna(1)
print (df2)
df['new'] = df['data1'].div(df2.values,axis=0)
print (df)
data1 data2
mun loc geo block
0 0 0 0 14 14
1 0 0 0 12 12
1 0 0 20 20/20
1 0 10 10
1 31 31
2 0 0 15 15/20
1 1 11 11
2 0 0 0 80 80
1 0 0 100 100/100
1 2 7 7
2 0 0 11 11/100
df1 = df.reset_index(drop=True, level=['block', 'geo'])
mask3 = (df.index.get_level_values('mun') != 0) & \
(df.index.get_level_values('loc') != 0 ) & \
(df.index.get_level_values('geo') == 0) & \
(df.index.get_level_values('block') == 0 )
print (mask3)
[False False True False False True False False True False True]
df2 = df1.ix[mask3, 'data1'].groupby(level=['mun']).max()
#print (df2)
df2 =df2.reindex(df.reset_index(level=['geo','block', 'loc'], drop=True).index).mask(~mask3).fillna(1)
#print (df2)
print (df)
data1 data2
mun loc geo block
0 0 0 0 55 55
1 0 0 0 70 70/70
1 0 0 12 12
1 0 13 13
2 0 0 0 60 60/70
1 1 1 12 12
2 1 6 6
3 0 0 0 12 12/70
mask3 = (df.index.get_level_values('mun') != 0) & \
(df.index.get_level_values('loc') == 0 ) & \
(df.index.get_level_values('geo') == 0) & \
(df.index.get_level_values('block') == 0 )
print (mask3)
[False True False False True False False True]
df2 = df.ix[mask3, 'data1'].max()
#print (df2)
df2 = pd.Series(df2, index=df.index).mask(~mask3).fillna(1)
#print (df2)
标高
loc
:
print (df)
data1 data2
mun loc geo block
0 0 0 0 12 12
1 0 0 0 20 20
1 0 0 10 10
1 0 10 10
1 3 3/4
2 4 4/4
2 0 30 30
1 1 1/3
2 3 3/3
0 0 4 4
2 1 1 10 10/12
2 12 12/12
2 0 0 0 60 60
1 1 1 123 123/123
2 7 7/123
2 1 6 6/6
2 1 1/6
mask3 = (df.index.get_level_values('mun') != 0) & \
(df.index.get_level_values('loc') != 0 ) & \
(df.index.get_level_values('geo') != 0) & \
(df.index.get_level_values('block') != 0 )
print (mask3)
[False False False False True True False True True False True True
False True True True True]
df2 = df.ix[mask3, 'data1'].groupby(level=['mun','loc','geo']).max()
#print (df2)
df2 = df2.reindex(df.reset_index(level=3, drop=True).index).mask(~mask3).fillna(1)
#print (df2)
print (df)
data1 data2
mun loc geo block
0 0 0 0 12 12
1 0 0 0 20 20
1 0 0 10 10
1 0 10 10/30
1 4 4
2 0 30 30/30
2 1 0 2 2/3
2 0 3 3/3
3 0 1 1/3
2 0 0 0 60 60
1 1 0 12 12/88
1 1 1
2 0 88 88/88
1 9 9
df1 = df.reset_index(drop=True, level='block')
mask3 = (df.index.get_level_values('mun') != 0) & \
(df.index.get_level_values('loc') != 0 ) & \
(df.index.get_level_values('geo') != 0) & \
(df.index.get_level_values('block') == 0 )
print (mask3)
[False False False True False True True True True False True False
True False]
df2 = df1.ix[mask3, 'data1'].groupby(level=['mun','loc']).max()
df2=df2.reindex(df.reset_index(level=['geo','block'], drop=True).index).mask(~mask3).fillna(1)
print (df2)
df['new'] = df['data1'].div(df2.values,axis=0)
print (df)
data1 data2
mun loc geo block
0 0 0 0 14 14
1 0 0 0 12 12
1 0 0 20 20/20
1 0 10 10
1 31 31
2 0 0 15 15/20
1 1 11 11
2 0 0 0 80 80
1 0 0 100 100/100
1 2 7 7
2 0 0 11 11/100
df1 = df.reset_index(drop=True, level=['block', 'geo'])
mask3 = (df.index.get_level_values('mun') != 0) & \
(df.index.get_level_values('loc') != 0 ) & \
(df.index.get_level_values('geo') == 0) & \
(df.index.get_level_values('block') == 0 )
print (mask3)
[False False True False False True False False True False True]
df2 = df1.ix[mask3, 'data1'].groupby(level=['mun']).max()
#print (df2)
df2 =df2.reindex(df.reset_index(level=['geo','block', 'loc'], drop=True).index).mask(~mask3).fillna(1)
#print (df2)
print (df)
data1 data2
mun loc geo block
0 0 0 0 55 55
1 0 0 0 70 70/70
1 0 0 12 12
1 0 13 13
2 0 0 0 60 60/70
1 1 1 12 12
2 1 6 6
3 0 0 0 12 12/70
mask3 = (df.index.get_level_values('mun') != 0) & \
(df.index.get_level_values('loc') == 0 ) & \
(df.index.get_level_values('geo') == 0) & \
(df.index.get_level_values('block') == 0 )
print (mask3)
[False True False False True False False True]
df2 = df.ix[mask3, 'data1'].max()
#print (df2)
df2 = pd.Series(df2, index=df.index).mask(~mask3).fillna(1)
#print (df2)
级别
mun
:
print (df)
data1 data2
mun loc geo block
0 0 0 0 12 12
1 0 0 0 20 20
1 0 0 10 10
1 0 10 10
1 3 3/4
2 4 4/4
2 0 30 30
1 1 1/3
2 3 3/3
0 0 4 4
2 1 1 10 10/12
2 12 12/12
2 0 0 0 60 60
1 1 1 123 123/123
2 7 7/123
2 1 6 6/6
2 1 1/6
mask3 = (df.index.get_level_values('mun') != 0) & \
(df.index.get_level_values('loc') != 0 ) & \
(df.index.get_level_values('geo') != 0) & \
(df.index.get_level_values('block') != 0 )
print (mask3)
[False False False False True True False True True False True True
False True True True True]
df2 = df.ix[mask3, 'data1'].groupby(level=['mun','loc','geo']).max()
#print (df2)
df2 = df2.reindex(df.reset_index(level=3, drop=True).index).mask(~mask3).fillna(1)
#print (df2)
print (df)
data1 data2
mun loc geo block
0 0 0 0 12 12
1 0 0 0 20 20
1 0 0 10 10
1 0 10 10/30
1 4 4
2 0 30 30/30
2 1 0 2 2/3
2 0 3 3/3
3 0 1 1/3
2 0 0 0 60 60
1 1 0 12 12/88
1 1 1
2 0 88 88/88
1 9 9
df1 = df.reset_index(drop=True, level='block')
mask3 = (df.index.get_level_values('mun') != 0) & \
(df.index.get_level_values('loc') != 0 ) & \
(df.index.get_level_values('geo') != 0) & \
(df.index.get_level_values('block') == 0 )
print (mask3)
[False False False True False True True True True False True False
True False]
df2 = df1.ix[mask3, 'data1'].groupby(level=['mun','loc']).max()
df2=df2.reindex(df.reset_index(level=['geo','block'], drop=True).index).mask(~mask3).fillna(1)
print (df2)
df['new'] = df['data1'].div(df2.values,axis=0)
print (df)
data1 data2
mun loc geo block
0 0 0 0 14 14
1 0 0 0 12 12
1 0 0 20 20/20
1 0 10 10
1 31 31
2 0 0 15 15/20
1 1 11 11
2 0 0 0 80 80
1 0 0 100 100/100
1 2 7 7
2 0 0 11 11/100
df1 = df.reset_index(drop=True, level=['block', 'geo'])
mask3 = (df.index.get_level_values('mun') != 0) & \
(df.index.get_level_values('loc') != 0 ) & \
(df.index.get_level_values('geo') == 0) & \
(df.index.get_level_values('block') == 0 )
print (mask3)
[False False True False False True False False True False True]
df2 = df1.ix[mask3, 'data1'].groupby(level=['mun']).max()
#print (df2)
df2 =df2.reindex(df.reset_index(level=['geo','block', 'loc'], drop=True).index).mask(~mask3).fillna(1)
#print (df2)
print (df)
data1 data2
mun loc geo block
0 0 0 0 55 55
1 0 0 0 70 70/70
1 0 0 12 12
1 0 13 13
2 0 0 0 60 60/70
1 1 1 12 12
2 1 6 6
3 0 0 0 12 12/70
mask3 = (df.index.get_level_values('mun') != 0) & \
(df.index.get_level_values('loc') == 0 ) & \
(df.index.get_level_values('geo') == 0) & \
(df.index.get_level_values('block') == 0 )
print (mask3)
[False True False False True False False True]
df2 = df.ix[mask3, 'data1'].max()
#print (df2)
df2 = pd.Series(df2, index=df.index).mask(~mask3).fillna(1)
#print (df2)
这需要更多的细节。很明显,你只是将某些元素除以一个最大值,而其他元素则保持不变,请解释你的选择规则,仅仅从数据上看,它们并不清楚。谢谢兄弟。我认为这一条有错误。它不是在做除法。。。如果我想把所有的结果统一到一列中呢?我试着找出问题所在。如果需要新列,请使用
df['new']
=(df['data1'].div(df2.values,axis=0))`,如果它返回NaN
,请使用df['new']
=(df['data1'].div(df2.values,axis=0.values)`我做了,但它会覆盖上一次计算的顶部。如果我执行阻塞,那么geo将写入,因此我在该索引级别的data2中获得相同的data1值对不起,geo是错误的,现在它是正确的。是否要从输出中删除列data2
或用新值替换data2
?