Python 用keras-CNN模型读取多个手写数字_Python_Image_Keras_Conv Neural Network_Digits

Python 用keras-CNN模型读取多个手写数字

python image keras

Python 用keras-CNN模型读取多个手写数字,python,image,keras,conv-neural-network,digits,Python,Image,Keras,Conv Neural Network,Digits,我使用crohme数据集提供的一些数据构建了一个用于数字识别的CNN。它在>200000张图像上进行训练，应该识别数字0-9和+，-，=，（，），因为这个项目的目标是读取一个简单的方程，然后计算结果。这意味着我正在将包含多个数字/算术运算符的图像馈送给一个函数，然后该函数将剪切出平方位，并让keras模型预测它这些图像可以如下所示，例如：我得到的这个图像的结果是结果：['1'，'1'，'-'，'1']。因此，它得到了正确的答案，但其余的都大错特错了。对于1s，它预测网络是59%、76%和5

我使用crohme数据集提供的一些数据构建了一个用于数字识别的CNN。它在>200000张图像上进行训练，应该识别数字0-9和+，-，=，（，），因为这个项目的目标是读取一个简单的方程，然后计算结果。这意味着我正在将包含多个数字/算术运算符的图像馈送给一个函数，然后该函数将剪切出平方位，并让keras模型预测它
这些图像可以如下所示，例如：

我得到的这个图像的结果是
结果：['1'，'1'，'-'，'1']
。因此，它得到了正确的答案，但其余的都大错特错了。对于1s，它预测网络是59%、76%和57%确定的
当然，我剪下的方块并不总是以完全居中的方式显示数字，我理解为什么这很难正确预测。但举例来说，第一个数字被很好地删掉了，仍然没有正确预测
因此，我的问题是：

个别或多或少显示良好的数字/运算符没有得到很好的识别。当我尝试使用数据集中的测试图像时，网络得到了99%的正确率，因此这一定是图像准备问题

你能帮我改进我的函数partial\u img\u rec吗？它控制方形部分的切割，然后将切割的方形部分传递给keras模型？这些部分的大小都与原始图片的高度相同，并且它们向右移动，这取决于是否预测了一个数字，其概率至少为40%. 如果是这样的话，我将沿着正方形宽度的80%移动，如果不是这样，我将采取更小的步骤。这是一个递归函数，它一直向前移动，直到到达图像的右端
我曾经问过一个类似的问题，关于个人数字照片的问题，我得到了一些关于数字在图片中居中方式的有用建议。请注意，当我有一张包含多个数字的图片时，我对此没有太多的控制
这是我的代码：

# import keras from tensorflow.python.keras.models import Sequential from tensorflow.python.keras.layers import Dense from tensorflow.python.keras.layers import Dropout from tensorflow.python.keras.layers import Flatten from tensorflow.python.keras.layers.convolutional import Conv2D from tensorflow.python.keras.layers.convolutional import MaxPooling2D from tensorflow.python.keras.models import model_from_json #from keras.utils import np_utils # numpy is necessary since keras uses numpy arrays import numpy as np # imports for pictures import matplotlib.pyplot as plt import PIL #import cv2 # imports for tests import random import os class neuralNetwork(): def prepare_image(self, img, show = False): """ prepares the training and testing as well as the partial images used in partial_img_rec by transforming them into numpy arrays that the network will be able to process """ # convert to greyscale img = img.convert("L") # rescale image to 45 * 45 dimension if neccessary if img.size != (45,45): img = img.resize((45,45), PIL.Image.ANTIALIAS) # transform to vector img = np.asarray(img, "float32") img = img / 255. # threshold eliminates background noise img[img < 0.1] = 0. if show: plt.imshow(img, cmap = "Greys") img = img.reshape((1, 45, 45, 1)) return img def __init__(self, newModel = False): """ initialized the neural network either by creating and training a new model or by loading a saved one """ # paths to data self.train_data = ".//train" self.test_data = ".//test" self.test_img, self.test_res = self.prepare_data(self.test_data) """ The following code is based on existing code that can be found on the following websites : https://machinelearningmastery.com/handwritten-digit-recognition-using-convolutional-neural-networks-python-keras/ and https://machinelearningmastery.com/save-load-keras-deep-learning-models/ """ num_classes = 15 """ This is a CNN (Convolutional Neural Network) which is more efficient than the standard NN """ if newModel or not os.path.exists("model.json") or not os.path.exists("model.h5"): # only load test data if needed self.train_img, self.train_res = self.prepare_data(self.train_data, shuffle = True) print("creating new model") # create a new model self.model = Sequential() self.model.add(Conv2D(30, (5, 5), input_shape=(45, 45, 1), activation='relu')) self.model.add(MaxPooling2D()) self.model.add(Conv2D(15, (3, 3), activation='relu')) self.model.add(MaxPooling2D()) self.model.add(Dropout(0.2)) self.model.add(Flatten()) self.model.add(Dense(128, activation='relu')) self.model.add(Dense(50, activation='relu')) self.model.add(Dense(num_classes, activation='softmax')) #compile model self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # train the model self.model.fit(self.train_img, self.train_res, validation_data=(self.test_img, self.test_res), epochs=10, batch_size=64, verbose=2) # save model for later use model_json = self.model.to_json() with open("model.json", "w") as json_file: json_file.write(model_json) # save weights self.model.save_weights("model.h5") print("Saved trained model to disk") else: # load the model from disk json_file = open("model.json", "r") loaded_model = json_file.read() json_file.close() self.model = model_from_json(loaded_model) # load weights from disk self.model.load_weights("model.h5") #compile model self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print("Loaded model and weights from disk") def predict_result(self, img, show = False): """ predicts the number in a picture (vector) """ assert type(img) == np.ndarray and img.shape == (1, 45, 45, 1) # 1, 45, 45, 1 is the image shape the input layer demands. 45, 45 are the dimensions and 1 stands for greyscale (channel) if show: img = img.reshape((45, 45)) # show the picture plt.imshow(img, cmap='Greys') plt.show() img = img.reshape((1, 45, 45, 1)) # the probabilities res_probabilities = self.model.predict(img) # the value with the hightest probability res_number = np.argmax(res_probabilities) return (list(self.assignments.keys())[list(self.assignments.values()).index(res_number)], res_probabilities.tolist()[0]) # we only need the first dimension since the array only has one def partial_img_rec(self, image, upper_left, lower_right, results=[], show = True): """ passes square parts of images to predict_result """ left_x, left_y = upper_left right_x, right_y = lower_right print("current test part: ", upper_left, lower_right) print("results: ", results) # condition to stop recursion: we've reached the full width of the picture width, height = image.size if right_x > width: return results partial = image.crop((left_x, left_y, right_x, right_y)) if show: partial.show() partial = self.prepare_image(partial) # is there a number or operator in this part of the image? res, prop = self.predict_result(partial) print("result: ", res, ". probabilities: ", prop) # only count this result if the network is at least 40% sure if prop[self.assignments[res]] >= 0.4: results.append(res) # step is 80% of the partial image's size (which is equivalent to the original image's height) step = int(height * 0.8) print("found valid result") else: # if there is no number or operator found we take smaller steps step = height // 20 print("step: ", step) # recursive call with modified positions ( move on step variables ) return self.partial_img_rec(image, (left_x + step, left_y), (right_x + step, right_y), results = results) def individual_digits(self, img): """ uses partial_img_rec to predict individual digits in square images """ assert type(img) == PIL.JpegImagePlugin.JpegImageFile or type(img) == PIL.PngImagePlugin.PngImageFile or type(img) == PIL.Image.Image if img.size[0] != img.size[1]: print(img, " has the wrong proportions: ", img.size,". It has to be a square.") return self.partial_img_rec(img, (0,0), (img.size[0], img.size[1]), results=[]) def multiple_digits(self, imgName): """ takes as input an image without unnecessary whitespace surrounding the digits """ assert type(imgName) == str #img = cuttingImage(imgName) img = PIL.Image.open(imgName) width, height = img.size # start with the first square part of the image. This can work because there is no unneccessary whitespace. res_list = self.partial_img_rec(img, (0,0),(height ,height), results = []) res_str = "" for elem in res_list: res_str += str(elem) return res_str

如果您想在我的网络上试用，请注意，为了更好的可读性，我省略了一些数据准备工作，比如热编码。您还可以获得包含经过培训的模型的两个文件。
您解决了问题吗？我很好奇result@Farhad不。这是一个大学项目，结果证明我的成绩很好。我仍然想知道出了什么问题，但我们必须继续前进。你有类似的问题吗？
{ "0" : 0, "1" : 1, "2" : 2, "3" : 3, "4" : 4, "5" : 5, "6" : 6, "7" : 7, "8" : 8, "9" : 9, "-" : 10, "+" : 11, "=" : 12, "(" : 13, ")" : 14 }