Python 使用GridsearchCV时访问验证数据Keras
我想使用GridsearchCV构建一个参数调优管道。我的模型是一个(二进制)分类器,我使用Keras Sequential()构建它 由于我处理的是一个倾斜数据集(大约6/7标签为0,剩下1/7部分的数据集标记为1),因此我添加了一个回调,用于在每个历元结束时计算f1、召回率和精度指标,我希望将其用作验证模型的指标 为此,我在Keras中使用了一个回调函数,它需要在模型的fit()实例中指定我的验证数据集。这反过来又使得访问验证集和使用GridsearchCV变得非常困难 我通过构建一种DIY简历程序成功地克服了这一问题,但我想知道是否可以结合GridsearchCV更有效地实现这一点。 这是我的密码: 设置不同的分类阈值 输入:x,神经网络学习产生的预测向量 thr,我们用于将预测分类为0或1类的阈值。 输出:包含0和1的向量,预测消息的标签 然后,我使用以下方法创建模型:Python 使用GridsearchCV时访问验证数据Keras,python,tensorflow,keras,callback,gridsearchcv,Python,Tensorflow,Keras,Callback,Gridsearchcv,我想使用GridsearchCV构建一个参数调优管道。我的模型是一个(二进制)分类器,我使用Keras Sequential()构建它 由于我处理的是一个倾斜数据集(大约6/7标签为0,剩下1/7部分的数据集标记为1),因此我添加了一个回调,用于在每个历元结束时计算f1、召回率和精度指标,我希望将其用作验证模型的指标 为此,我在Keras中使用了一个回调函数,它需要在模型的fit()实例中指定我的验证数据集。这反过来又使得访问验证集和使用GridsearchCV变得非常困难 我通过构建一种DIY
def create_model(optimizer="adam", dropout=0.1, init='uniform'):
model = Sequential()
model.add(Dense(1,input_shape=(N_FEATURES,), kernel_initializer=init,))
model.add(Activation('sigmoid'))
model.compile(loss = 'binary_crossentropy', optimizer = OPTIMIZER,
#metrics=['accuracy','binary_accuracy']
)
return model
model = KerasClassifier(build_fn=create_model, verbose=1)
N_FEATURES = X_train.shape[1]
thresholds = [0.2,0.3,0.5] #0.15
EPOCHS =200
BATCH_SIZE = 256
VERBOSE = 1
OPTIMIZER = Adadelta()
N_HIDDEN = 2000
cv_repetitions = 5
现在,我希望优化的代码部分:
是否有一种方法可以使用GridsearchCV将这些循环括起来,并为交叉验证部分包含更多参数
提前感谢您的阅读和帮助
class Metrics(Callback):
def on_train_begin(self, logs={}):
self.val_f1s = []
self.val_recalls = []
self.val_precisions = []
def on_epoch_end(self, epoch, logs={}):
val_predict = pred_round(self.model.predict(self.validation_data[0]), threshold)
val_targ = self.validation_data[1]
_val_precision, _val_recall, _val_f1, dummy = precision_recall_fscore_support(val_targ, val_predict,beta = 1.0,average = 'binary')
self.val_f1s.append(_val_f1)
self.val_recalls.append(_val_recall)
self.val_precisions.append(_val_precision)
metrics = Metrics()
def create_model(optimizer="adam", dropout=0.1, init='uniform'):
model = Sequential()
model.add(Dense(1,input_shape=(N_FEATURES,), kernel_initializer=init,))
model.add(Activation('sigmoid'))
model.compile(loss = 'binary_crossentropy', optimizer = OPTIMIZER,
#metrics=['accuracy','binary_accuracy']
)
return model
model = KerasClassifier(build_fn=create_model, verbose=1)
N_FEATURES = X_train.shape[1]
thresholds = [0.2,0.3,0.5] #0.15
EPOCHS =200
BATCH_SIZE = 256
VERBOSE = 1
OPTIMIZER = Adadelta()
N_HIDDEN = 2000
cv_repetitions = 5
for threshold in thresholds:
i=0
f1_cv_scores = np.zeros(EPOCHS)
recall_cv_scores = np.zeros(EPOCHS)
precision_cv_scores = np.zeros(EPOCHS)
loss_train_scores = np.zeros(EPOCHS)
loss_cv_scores = np.zeros(EPOCHS)
for i in range(cv_repetitions):
i+=1
X_train_NN, X_val_NN, y_train_NN, y_val_NN = train_test_split(X_train,y_train,test_size= 0.2,stratify = y_train)
history = model.fit( x= X_train_NN, y= y_train_NN,
batch_size = BATCH_SIZE,
validation_data = (X_val_NN,y_val_NN),
epochs = EPOCHS,
verbose = VERBOSE,
callbacks = [metrics]
)
loss_train_scores += history.history['loss']
loss_cv_scores += history.history['val_loss']
f1_cv_scores += metrics.val_f1s
recall_cv_scores += metrics.val_recalls
precision_cv_scores += metrics.val_precisions
loss_train_scores = loss_test_scores/cv_repetitions
loss_cv_scores = loss_cv_scores/cv_repetitions
f1_cv_scores = f1_cv_scores /cv_repetitions
recall_cv_scores = recall_cv_scores/cv_repetitions
precision_cv_scores = precision_cv_scores/cv_repetitions