Python 即使CNN、优化器etec发生变化,训练损失仍保持不变

Python 即使CNN、优化器etec发生变化,训练损失仍保持不变,python,tensorflow,keras,Python,Tensorflow,Keras,我正在尝试用OSU RGB热数据集训练CNN,以预测行人的边界框。我收集了只有一个人在框架中行走的图像,并编译了数据集。我已经写了一个简单的CNN与图像输入和边界框输出。代码如下所示 import os,cv2 import numpy as np import matplotlib.pyplot as plt import matplotlib.patches as patches #from cairocffi import * import pandas as pd import csv

我正在尝试用OSU RGB热数据集训练CNN,以预测行人的边界框。我收集了只有一个人在框架中行走的图像,并编译了数据集。我已经写了一个简单的CNN与图像输入和边界框输出。代码如下所示

import os,cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
#from cairocffi import *
import pandas as pd
import csv

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
#from sklearn.metrics import mean_poisson_deviance
from sklearn.metrics import mean_squared_error
from sklearn.metrics import median_absolute_error
from sklearn.metrics import explained_variance_score
from keras import backend as K
K.set_image_data_format('channels_first')

from keras.utils import np_utils
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import SGD,RMSprop,Adam
from keras.models import Model
from keras.layers import GlobalAveragePooling2D,BatchNormalization
import tensorflow as tf
import keras.backend.tensorflow_backend as tfback

def _get_available_gpus():
    """Get a list of available gpu devices (formatted as strings).

    # Returns
        A list of available GPU devices.
    """
    #global _LOCAL_DEVICES
    if tfback._LOCAL_DEVICES is None:
        devices = tf.config.list_logical_devices()
        tfback._LOCAL_DEVICES = [x.name for x in devices]
    return [x for x in tfback._LOCAL_DEVICES if 'device:gpu' in x.lower()]

tfback._get_available_gpus = _get_available_gpus

PATH = os.getcwd()
# Define data path
data_path = PATH + '/frames2'
data_dir_list = os.listdir(data_path)


num_epoch=100
num_channel = 1
num_classes = 1
Y = np.zeros((1934,4))


i2=0  
with open('unified.csv') as File:
    reader = csv.reader(File, delimiter=',', quotechar=',',
                        quoting=csv.QUOTE_MINIMAL)
    for row in reader:
        Y[i2][:]=row
        




img_data_list=[]
#img_data=[]
#t = 1
# note I have to give two folder structure for this code to work so "frames/dzire"
for dataset in data_dir_list:
    img_list=sorted(os.listdir(data_path+'/'+ dataset))
    #print(data_path+'/'+dataset)
    print ('Loaded the images of dataset-'+'{}\n'.format(dataset))
    for img in img_list:
        input_img=cv2.imread(data_path + '/'+ dataset + '/'+ img ) 
        #input_img = input_img[0:201,0:201] 
        #print(input_img.shape)               
        input_img=cv2.cvtColor(input_img, cv2.COLOR_BGR2GRAY)   
        #print(input_img.shape)  
        img_data_list.append(input_img)
        #print(t)
        #t+=1
        

#print(t)
print(input_img.shape) # 1080 X 1920

print(img_data_list[0].shape)
img_data = (np.array(img_data_list))
print(img_data.shape)
img_data = img_data.astype('float32')   # problem occurs in this statement when the all input images are not of common shape(first check)
img_data /= 255
img_data= np.expand_dims(img_data, axis=1) 
print (img_data.shape)


x,y = shuffle(img_data,Y, random_state=42)
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

print((X_train.shape),(y_train.shape),(X_test.shape),(y_test.shape))

# Defining the model



input_shape=img_data[0].shape   
print(input_shape)              
model = Sequential()
model.add(Convolution2D(64, kernel_size = 3, kernel_initializer = 'glorot_normal', strides = 1, input_shape = input_shape))
model.add(Activation("relu"))
model.add(Convolution2D(64, kernel_size = 3, kernel_initializer = 'glorot_uniform',))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Convolution2D(32,kernel_size = 3, kernel_initializer = 'glorot_normal',))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Convolution2D(32, kernel_size = 3, kernel_initializer = 'glorot_uniform',))
model.add(Activation("relu"))
#model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(64, kernel_initializer = 'glorot_normal',))
model.add(Activation("relu"))
model.add(Dense(4))


opt = RMSprop(lr=0.001)
model.compile(loss= "mean_squared_error",optimizer=opt,metrics=[])

model.summary()
hist = model.fit(X_train, y_train, batch_size=1, epochs=num_epoch, verbose=1, validation_data=(X_test, y_test))


model.save("unified_cnn_v1.h5")
print("Saved model to disk")

#plt.plot(hist.history['loss'])
#plt.show()
无论我如何更改CNN参数,输出损耗始终锁定在“19.2..”,验证损耗始终接近零

Train on 1353 samples, validate on 581 samples
Epoch 1/100
2020-08-31 22:00:38.787590: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cublas64_10.dll
2020-08-31 22:00:39.047776: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudnn64_7.dll
2020-08-31 22:00:40.059817: W tensorflow/stream_executor/gpu/redzone_allocator.cc:312] Internal: Invoking GPU asm compilation is supported on Cuda non-Windows platforms only
Relying on driver to perform ptx compilation. This message will be only logged once.
1353/1353 [==============================] - 30s 22ms/step - loss: 94.4755 - val_loss: 1.1403e-05
Epoch 2/100
1353/1353 [==============================] - 28s 21ms/step - loss: 19.2036 - val_loss: 2.5019e-07
Epoch 3/100
1353/1353 [==============================] - 28s 21ms/step - loss: 19.3207 - val_loss: 2.4980e-07
Epoch 4/100
我没有改变任何层,尝试了不同的优化程序和不同的lr,但损失总是一样的。我无法对这种行为作出合理解释。我检查了各种博客等,主要是提到数据集不正确等。我检查了数据集并分配了bbox坐标。它们是合适的。坐标正确地将目标包含在所有图像中。
我想了解在什么样的情况下,数据总是给出相同的输出。在我的例子中,不同CNN、优化器等的“19.2..”损失。不同CNN的输出不是恒定的,而是相同的常数“19.2…”。。。“任何改变。任何洞察都是值得高度赞赏的。

你为什么说它没有改变?94.4755->19.2036->19.3207?它总是在19.2。。在第二个历元中,从第三个或第四个小数点变化到下一个98个历元。我建议您首先检查验证数据集是否真的得到了很好的结果,然后采取行动。@lincr我不明白。我有一个包含1900个图像的数据集。我正在用0.7:0.3分割数据集以进行培训和验证。一旦网络经过训练,我就可以测试其他数据集。你能详细说明一下吗?理想情况下,你应该把数据分成三部分。列车组、验证组和测试组。验证集用于参数调整,而测试集用于测试调整后的模型。