Python 用keras-CNN模型读取多个手写数字
我使用crohme数据集提供的一些数据构建了一个用于数字识别的CNN。它在>200000张图像上进行训练,应该识别数字0-9和+,-,=,(,),因为这个项目的目标是读取一个简单的方程,然后计算结果。这意味着我正在将包含多个数字/算术运算符的图像馈送给一个函数,然后该函数将剪切出平方位,并让keras模型预测它 这些图像可以如下所示,例如: 我得到的这个图像的结果是Python 用keras-CNN模型读取多个手写数字,python,image,keras,conv-neural-network,digits,Python,Image,Keras,Conv Neural Network,Digits,我使用crohme数据集提供的一些数据构建了一个用于数字识别的CNN。它在>200000张图像上进行训练,应该识别数字0-9和+,-,=,(,),因为这个项目的目标是读取一个简单的方程,然后计算结果。这意味着我正在将包含多个数字/算术运算符的图像馈送给一个函数,然后该函数将剪切出平方位,并让keras模型预测它 这些图像可以如下所示,例如: 我得到的这个图像的结果是结果:['1','1','-','1']。因此,它得到了正确的答案,但其余的都大错特错了。对于1s,它预测网络是59%、76%和5
结果:['1','1','-','1']
。因此,它得到了正确的答案,但其余的都大错特错了。对于1s,它预测网络是59%、76%和57%确定的
当然,我剪下的方块并不总是以完全居中的方式显示数字,我理解为什么这很难正确预测。但举例来说,第一个数字被很好地删掉了,仍然没有正确预测
因此,我的问题是:
# import keras
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.layers import Dropout
from tensorflow.python.keras.layers import Flatten
from tensorflow.python.keras.layers.convolutional import Conv2D
from tensorflow.python.keras.layers.convolutional import MaxPooling2D
from tensorflow.python.keras.models import model_from_json
#from keras.utils import np_utils
# numpy is necessary since keras uses numpy arrays
import numpy as np
# imports for pictures
import matplotlib.pyplot as plt
import PIL
#import cv2
# imports for tests
import random
import os
class neuralNetwork():
def prepare_image(self, img, show = False):
""" prepares the training and testing as well as
the partial images used in partial_img_rec by transforming them
into numpy arrays that the network will be able to process """
# convert to greyscale
img = img.convert("L")
# rescale image to 45 * 45 dimension if neccessary
if img.size != (45,45):
img = img.resize((45,45), PIL.Image.ANTIALIAS)
# transform to vector
img = np.asarray(img, "float32")
img = img / 255.
# threshold eliminates background noise
img[img < 0.1] = 0.
if show:
plt.imshow(img, cmap = "Greys")
img = img.reshape((1, 45, 45, 1))
return img
def __init__(self, newModel = False):
""" initialized the neural network either by creating and training a new model or by loading a saved one """
# paths to data
self.train_data = ".//train"
self.test_data = ".//test"
self.test_img, self.test_res = self.prepare_data(self.test_data)
""" The following code is based on existing code that can be found on the following websites :
https://machinelearningmastery.com/handwritten-digit-recognition-using-convolutional-neural-networks-python-keras/
and https://machinelearningmastery.com/save-load-keras-deep-learning-models/
"""
num_classes = 15
""" This is a CNN (Convolutional Neural Network) which is more efficient than the standard NN """
if newModel or not os.path.exists("model.json") or not os.path.exists("model.h5"):
# only load test data if needed
self.train_img, self.train_res = self.prepare_data(self.train_data, shuffle = True)
print("creating new model")
# create a new model
self.model = Sequential()
self.model.add(Conv2D(30, (5, 5), input_shape=(45, 45, 1), activation='relu'))
self.model.add(MaxPooling2D())
self.model.add(Conv2D(15, (3, 3), activation='relu'))
self.model.add(MaxPooling2D())
self.model.add(Dropout(0.2))
self.model.add(Flatten())
self.model.add(Dense(128, activation='relu'))
self.model.add(Dense(50, activation='relu'))
self.model.add(Dense(num_classes, activation='softmax'))
#compile model
self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# train the model
self.model.fit(self.train_img, self.train_res, validation_data=(self.test_img, self.test_res), epochs=10, batch_size=64, verbose=2)
# save model for later use
model_json = self.model.to_json()
with open("model.json", "w") as json_file:
json_file.write(model_json)
# save weights
self.model.save_weights("model.h5")
print("Saved trained model to disk")
else:
# load the model from disk
json_file = open("model.json", "r")
loaded_model = json_file.read()
json_file.close()
self.model = model_from_json(loaded_model)
# load weights from disk
self.model.load_weights("model.h5")
#compile model
self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print("Loaded model and weights from disk")
def predict_result(self, img, show = False):
""" predicts the number in a picture (vector) """
assert type(img) == np.ndarray and img.shape == (1, 45, 45, 1)
# 1, 45, 45, 1 is the image shape the input layer demands. 45, 45 are the dimensions and 1 stands for greyscale (channel)
if show:
img = img.reshape((45, 45))
# show the picture
plt.imshow(img, cmap='Greys')
plt.show()
img = img.reshape((1, 45, 45, 1))
# the probabilities
res_probabilities = self.model.predict(img)
# the value with the hightest probability
res_number = np.argmax(res_probabilities)
return (list(self.assignments.keys())[list(self.assignments.values()).index(res_number)], res_probabilities.tolist()[0]) # we only need the first dimension since the array only has one
def partial_img_rec(self, image, upper_left, lower_right, results=[], show = True):
""" passes square parts of images to predict_result """
left_x, left_y = upper_left
right_x, right_y = lower_right
print("current test part: ", upper_left, lower_right)
print("results: ", results)
# condition to stop recursion: we've reached the full width of the picture
width, height = image.size
if right_x > width:
return results
partial = image.crop((left_x, left_y, right_x, right_y))
if show:
partial.show()
partial = self.prepare_image(partial)
# is there a number or operator in this part of the image?
res, prop = self.predict_result(partial)
print("result: ", res, ". probabilities: ", prop)
# only count this result if the network is at least 40% sure
if prop[self.assignments[res]] >= 0.4:
results.append(res)
# step is 80% of the partial image's size (which is equivalent to the original image's height)
step = int(height * 0.8)
print("found valid result")
else:
# if there is no number or operator found we take smaller steps
step = height // 20
print("step: ", step)
# recursive call with modified positions ( move on step variables )
return self.partial_img_rec(image, (left_x + step, left_y), (right_x + step, right_y), results = results)
def individual_digits(self, img):
""" uses partial_img_rec to predict individual digits in square images """
assert type(img) == PIL.JpegImagePlugin.JpegImageFile or type(img) == PIL.PngImagePlugin.PngImageFile or type(img) == PIL.Image.Image
if img.size[0] != img.size[1]:
print(img, " has the wrong proportions: ", img.size,". It has to be a square.")
return self.partial_img_rec(img, (0,0), (img.size[0], img.size[1]), results=[])
def multiple_digits(self, imgName):
""" takes as input an image without unnecessary whitespace surrounding the digits """
assert type(imgName) == str
#img = cuttingImage(imgName)
img = PIL.Image.open(imgName)
width, height = img.size
# start with the first square part of the image. This can work because there is no unneccessary whitespace.
res_list = self.partial_img_rec(img, (0,0),(height ,height), results = [])
res_str = ""
for elem in res_list:
res_str += str(elem)
return res_str
如果您想在我的网络上试用,请注意,为了更好的可读性,我省略了一些数据准备工作,比如热编码。您还可以获得包含经过培训的模型的两个文件。您解决了问题吗?我很好奇result@Farhad不。这是一个大学项目,结果证明我的成绩很好。我仍然想知道出了什么问题,但我们必须继续前进。你有类似的问题吗?
{
"0" : 0,
"1" : 1,
"2" : 2,
"3" : 3,
"4" : 4,
"5" : 5,
"6" : 6,
"7" : 7,
"8" : 8,
"9" : 9,
"-" : 10,
"+" : 11,
"=" : 12,
"(" : 13,
")" : 14
}