如何在Python中从列表中找到最佳组合后从数据集中删除行

如何在Python中从列表中找到最佳组合后从数据集中删除行,python,pandas,loops,numpy,matrix,Python,Pandas,Loops,Numpy,Matrix,假设N=7。 我从数据集M中提取了两个矩阵Y和Z。第一列是索引ID。 Z矩阵是通过从具有最大det值的Y矩阵(共8行)中提取7行的组合来创建的。 我想得到组合中省略的那一行,然后从我的数据集M中删除那一行 import pandas as pd import numpy as np import itertools as it import io def brute_force(A): return max(map(list,it.combinations(range(len(Y)),

假设
N=7
。 我从数据集M中提取了两个矩阵Y和Z。第一列是索引
ID
。 Z矩阵是通过从具有最大det值的Y矩阵(共8行)中提取7行的组合来创建的。 我想得到组合中省略的那一行,然后从我的数据集M中删除那一行

import pandas as pd
import numpy as np
import itertools as it
import io

def brute_force(A):
    return max(map(list,it.combinations(range(len(Y)),N)),
               key=lambda s: np.linalg.det(A[s].T@A[s]))

data =''' 
ID,M,N,O,P,Q,R
5362,0.974,-0.404,-0.763,0.868,-0.5,0.16
485,-0.659,0.531,0.623,0.402,0.772,0.506
582,0.045,0.994,0.762,-0.036,0.117,-0.355
99,0.777,0.537,0.391,0.456,0.329,0.108
75,-0.44,0.522,0.856,-0.04,0.656,-0.935
474,0.357,0.81,0.135,0.389,0.055,0.224
594,-0.291,0.031,0.742,-0.332,0.815,0.983
597,0.968,-0.357,0.591,0.892,0.375,0.88
124,0.737,0.611,0.764,0.289,0.298,-0.705
635,0.883,0.96,-0.987,0.29,0.997,0.186
7894,-0.045,0.047,0.523,0.068,-0.9,0.356
1268,0.561,0.736,-0.375,0.465,0.908,0.2
38,0.465,0.623,0.734,0.145,0.489,0.759
88,0.029,0.166,0.098,0.285,0.18,0.829
887,0.464,0.652,-0.896,0.07,0.772,-0.268
994,-0.611,0.986,0.708,-0.195,0.938,0.166
478,0.109,0.664,0.977,0.2,-0.466,0.676
693,0.893,0.536,0.827,0,0.658,-0.31
455,0.745,0.851,0.025,0.667,0.094,0.127
874,0.036,-0.212,0.879,0.966,0.788,0.719
417,0.316,0.553,0.872,-0.274,0.946,0.238
44,0.517,-0.113,0.992,0.521,0.595,0.674
101,0.699,0.095,0.269,0.628,-0.711,-0.141
60,0.993,0.348,-0.44,0.807,0.013,0.325
8741,-0.319,0.535,0.717,-0.89,0.334,0.279
9635,0.363,0.812,0.77,0.715,0.34,0.327
2563,0.649,-0.788,0.405,0.056,0.25,0.08
5463,0.491,0.414,0.084,0.173,0.397,-0.499
1044,-0.669,0.288,0.424,-0.324,0.491,-0.581
999,0.208,0.082,-0.425,0.916,0.582,0.45
'''
df = pd.read_csv(io.StringIO(data),index_col=0)
M = df.iloc[:,:]
N = int(input( '\nNo. of rows: '))

Y = M.iloc[0 : N+1 ,:]
print(Y.index)
YB = brute_force(Y.to_numpy())
Z = Y.iloc[YB]
print(Z.index)
所以我想从我的原始数据集M中删除第8行

import pandas as pd
import numpy as np
import itertools as it
import io

def brute_force(A):
    return max(map(list,it.combinations(range(len(Y)),N)),
               key=lambda s: np.linalg.det(A[s].T@A[s]))

data =''' 
ID,M,N,O,P,Q,R
5362,0.974,-0.404,-0.763,0.868,-0.5,0.16
485,-0.659,0.531,0.623,0.402,0.772,0.506
582,0.045,0.994,0.762,-0.036,0.117,-0.355
99,0.777,0.537,0.391,0.456,0.329,0.108
75,-0.44,0.522,0.856,-0.04,0.656,-0.935
474,0.357,0.81,0.135,0.389,0.055,0.224
594,-0.291,0.031,0.742,-0.332,0.815,0.983
597,0.968,-0.357,0.591,0.892,0.375,0.88
124,0.737,0.611,0.764,0.289,0.298,-0.705
635,0.883,0.96,-0.987,0.29,0.997,0.186
7894,-0.045,0.047,0.523,0.068,-0.9,0.356
1268,0.561,0.736,-0.375,0.465,0.908,0.2
38,0.465,0.623,0.734,0.145,0.489,0.759
88,0.029,0.166,0.098,0.285,0.18,0.829
887,0.464,0.652,-0.896,0.07,0.772,-0.268
994,-0.611,0.986,0.708,-0.195,0.938,0.166
478,0.109,0.664,0.977,0.2,-0.466,0.676
693,0.893,0.536,0.827,0,0.658,-0.31
455,0.745,0.851,0.025,0.667,0.094,0.127
874,0.036,-0.212,0.879,0.966,0.788,0.719
417,0.316,0.553,0.872,-0.274,0.946,0.238
44,0.517,-0.113,0.992,0.521,0.595,0.674
101,0.699,0.095,0.269,0.628,-0.711,-0.141
60,0.993,0.348,-0.44,0.807,0.013,0.325
8741,-0.319,0.535,0.717,-0.89,0.334,0.279
9635,0.363,0.812,0.77,0.715,0.34,0.327
2563,0.649,-0.788,0.405,0.056,0.25,0.08
5463,0.491,0.414,0.084,0.173,0.397,-0.499
1044,-0.669,0.288,0.424,-0.324,0.491,-0.581
999,0.208,0.082,-0.425,0.916,0.582,0.45
'''
df = pd.read_csv(io.StringIO(data),index_col=0)
M = df.iloc[:,:]
N = int(input( '\nNo. of rows: '))

Y = M.iloc[0 : N+1 ,:]
print(Y.index)
YB = brute_force(Y.to_numpy())
Z = Y.iloc[YB]
print(Z.index)
输出:

No. of rows: 7
Y.index :  Int64Index([5362, 485, 582, 99, 75, 474, 594, 597], dtype='int64', name='ID')
Z.index :  Int64Index([5362, 485, 582, 99, 75, 594, 597], dtype='int64', name='ID')

这里,在从Y矩阵中找到组合后,索引
474
不存在于Z矩阵中。因此,我想从原始数据集M中删除完整的一行。我想使用缩减后的数据集M进行进一步循环。

使用一个集合差来查找缺少的索引,并将其从
M

to_drop=(set(Y.index)-set(Z.index)).pop()
M=M.落差(至落差,轴=0)