Python 图像边缘检测Keras模型丢失没有改善_Python_Image Processing_Machine Learning_Keras_Edge Detection

Python 图像边缘检测Keras模型丢失没有改善

python image-processing machine-learning keras

Python 图像边缘检测Keras模型丢失没有改善,python,image-processing,machine-learning,keras,edge-detection,Python,Image Processing,Machine Learning,Keras,Edge Detection,我有一个水滴的视频。我已经采取了第一帧和手动标记的边缘。我将图像分割成更小的图像。然后，我试着根据小标记图像训练小未标记图像的keras模型我试过使用“密集”层。模型可以训练，但损失没有改善。当我尝试使用该模型时，它只会给我一个黑色图像输出标记分割图像输入图像（第1帧）模型摘要我希望输出的图像与标记的边缘相匹配。相反，我得到了一个黑色图像输出。您使用了错误的损耗/度量组合。你的问题是分类还是回归？MSE用于回归，而分类交叉熵（或稀疏，或二进制）用于分类我通过使用图像的7x7部分

我有一个水滴的视频。我已经采取了第一帧和手动标记的边缘。我将图像分割成更小的图像。然后，我试着根据小标记图像训练小未标记图像的keras模型

我试过使用“密集”层。模型可以训练，但损失没有改善。当我尝试使用该模型时，它只会给我一个黑色图像输出

标记分割图像

输入图像（第1帧）

模型摘要

我希望输出的图像与标记的边缘相匹配。相反，我得到了一个黑色图像输出。

您使用了错误的损耗/度量组合。你的问题是分类还是回归？MSE用于回归，而分类交叉熵（或稀疏，或二进制）用于分类

我通过使用图像的7x7部分将该部分的中心像素分类为油或水（1或0），解决了这个问题。然后，我使用二元交叉熵损失函数来训练模型

通过让一个7x7的部分在主图像上一次移动一个像素，我可以得到比仅仅分割主图像多得多的训练数据

我以前曾尝试从另一个7x7映像获取7x7映像，这使问题变得更加困难

#IMPORT AND SPLIT

from cam_img_split import cam_img_split
from cam_pad import cam_pad
from cam_img_bow import cam_img_bow
import cv2
import numpy as np

img_tr_in=cv2.imread('frame 1.png',0)[0:767,0:767]/255
img_tr_out=cv2.imread('frame 1 so far bnw 2.png',0)[0:767,0:767]/255
img_tr_out=(cam_img_bow(img_tr_out,0.5)).astype(np.uint8)

seg_shape=[15,15] #needs to be odd and equal to each other

pl_max=img_tr_in.shape[0:2]
pl=np.array([0.15*pl_max[0],pl_max[1]]).astype(np.uint32)

pad_in=int(np.floor(seg_shape[0]/2))

img_tr_in_pad=cam_pad(img_tr_in,pad_in)

tr_in=np.zeros([pl[0],pl[1],seg_shape[0],seg_shape[1]])

for n1 in range(0,pl[0]):
        for n2 in range(0,pl[1]):
                tr_in[n1,n2]=img_tr_in_pad[n1:n1+seg_shape[0],n2:n2+seg_shape[1]]


##################### NEURAL NETWORK

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense,Dropout,Conv2D, MaxPooling2D, Flatten
from keras.optimizers import adam
from keras.utils import to_categorical
import matplotlib.pyplot as plt

pad=4

input_shape=(seg_shape[0]+2*pad,seg_shape[1]+2*pad,1)
output_shape=(1,1,1)

model = Sequential()
model.add(Conv2D(32, (3, 3),input_shape=input_shape, activation='relu'))
model.add(Conv2D(64,(3, 3), activation='relu'))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(units=2, activation='softmax'))

model.compile(optimizer=adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])


##################### FITTING THE MODEL

tr_in_flat=tr_in.reshape([pl[0]*pl[1],seg_shape[0],seg_shape[1],1])
tr_out_flat=img_tr_out.reshape([pl_max[0]*pl_max[1]])

tr_in_flat_pad=np.zeros(tr_in_flat.shape+np.array([0,2*pad,2*pad,0]))

for n3 in range(0,tr_in_flat.shape[0]):
        tr_in_flat_pad[n3,:,:,0]=cam_pad(tr_in_flat[n3,:,:,0], pad)

model.fit(tr_in_flat_pad, to_categorical(tr_out_flat[0:pl[0]*pl[1]]), epochs=5, batch_size=int(16*pl[0]),shuffle=True)


##################### PLOTTING PREDICTIONS

tr_in_full=np.zeros([pl_max[0],pl_max[1],seg_shape[0]+2*pad,seg_shape[1]+2*pad])

for n1 in range(0,pl_max[0]):
        for n2 in range(0,pl_max[1]):
                tr_in_full[n1,n2]=cam_pad(img_tr_in_pad[n1:n1+seg_shape[0],n2:n2+seg_shape[1]],pad)


tr_in_full_flat=tr_in_full.reshape([pl_max[0]*pl_max[1],seg_shape[0]+2*pad,seg_shape[1]+2*pad,1])

pred = model.predict(tr_in_full_flat)

pred_img=np.zeros(pred.shape[0])

for n1 in range(0,pred.shape[0]):
        pred_img[n1]=round(pred[n1,0])

pred_img_out=(pred_img.reshape([pl_max[0],pl_max[1]]))

plt.subplot(1,2,1)
plt.imshow(pred_img_out)

plt.subplot(1,2,2)
plt.imshow(img_tr_in)

plt.show()

您不应该使用softmax作为隐藏层的激活。谢谢，我应该使用哪种激活功能？通常情况下，坚持使用ReLU激活效果很好，除非您考虑了一些特定的转换。另外，由于您基本上是在寻找边，卷积难道不比密集层更理想吗？您也可以使用sigmoid，可以打印（model.summary（））吗？谢谢！我在顶部的链接中添加了模型摘要的图像。我不想分类。我期待着采取一个图像，并把它变成另一个图像。

#IMPORT AND SPLIT

from cam_img_split import cam_img_split
from cam_pad import cam_pad
from cam_img_bow import cam_img_bow
import cv2
import numpy as np

img_tr_in=cv2.imread('frame 1.png',0)[0:767,0:767]/255
img_tr_out=cv2.imread('frame 1 so far bnw 2.png',0)[0:767,0:767]/255
img_tr_out=(cam_img_bow(img_tr_out,0.5)).astype(np.uint8)

seg_shape=[15,15] #needs to be odd and equal to each other

pl_max=img_tr_in.shape[0:2]
pl=np.array([0.15*pl_max[0],pl_max[1]]).astype(np.uint32)

pad_in=int(np.floor(seg_shape[0]/2))

img_tr_in_pad=cam_pad(img_tr_in,pad_in)

tr_in=np.zeros([pl[0],pl[1],seg_shape[0],seg_shape[1]])

for n1 in range(0,pl[0]):
        for n2 in range(0,pl[1]):
                tr_in[n1,n2]=img_tr_in_pad[n1:n1+seg_shape[0],n2:n2+seg_shape[1]]


##################### NEURAL NETWORK

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense,Dropout,Conv2D, MaxPooling2D, Flatten
from keras.optimizers import adam
from keras.utils import to_categorical
import matplotlib.pyplot as plt

pad=4

input_shape=(seg_shape[0]+2*pad,seg_shape[1]+2*pad,1)
output_shape=(1,1,1)

model = Sequential()
model.add(Conv2D(32, (3, 3),input_shape=input_shape, activation='relu'))
model.add(Conv2D(64,(3, 3), activation='relu'))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(units=2, activation='softmax'))

model.compile(optimizer=adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])


##################### FITTING THE MODEL

tr_in_flat=tr_in.reshape([pl[0]*pl[1],seg_shape[0],seg_shape[1],1])
tr_out_flat=img_tr_out.reshape([pl_max[0]*pl_max[1]])

tr_in_flat_pad=np.zeros(tr_in_flat.shape+np.array([0,2*pad,2*pad,0]))

for n3 in range(0,tr_in_flat.shape[0]):
        tr_in_flat_pad[n3,:,:,0]=cam_pad(tr_in_flat[n3,:,:,0], pad)

model.fit(tr_in_flat_pad, to_categorical(tr_out_flat[0:pl[0]*pl[1]]), epochs=5, batch_size=int(16*pl[0]),shuffle=True)


##################### PLOTTING PREDICTIONS

tr_in_full=np.zeros([pl_max[0],pl_max[1],seg_shape[0]+2*pad,seg_shape[1]+2*pad])

for n1 in range(0,pl_max[0]):
        for n2 in range(0,pl_max[1]):
                tr_in_full[n1,n2]=cam_pad(img_tr_in_pad[n1:n1+seg_shape[0],n2:n2+seg_shape[1]],pad)


tr_in_full_flat=tr_in_full.reshape([pl_max[0]*pl_max[1],seg_shape[0]+2*pad,seg_shape[1]+2*pad,1])

pred = model.predict(tr_in_full_flat)

pred_img=np.zeros(pred.shape[0])

for n1 in range(0,pred.shape[0]):
        pred_img[n1]=round(pred[n1,0])

pred_img_out=(pred_img.reshape([pl_max[0],pl_max[1]]))

plt.subplot(1,2,1)
plt.imshow(pred_img_out)

plt.subplot(1,2,2)
plt.imshow(img_tr_in)

plt.show()