Python-Ocr与Knn算法扩展testdata_Python_Opencv_Machine Learning_Computer Vision_Ocr

Python-Ocr与Knn算法扩展testdata

python opencv machine-learning computer-vision

Python-Ocr与Knn算法扩展testdata,python,opencv,machine-learning,computer-vision,ocr,Python,Opencv,Machine Learning,Computer Vision,Ocr,我尝试使用OpenCV和Knn算法在Python中为数字编写Ocr。代码运行得很好，但我想扩展我的输入数据，以转移到手写数字。数据的训练/输入是这样工作的：你运行脚本，图像的路径在脚本中，然后图像以一个围绕数字的矩形打开，你必须按下键盘上的数字。最后，它将分类和展平图像保存在文本文件中问题是它会覆盖旧的文本文件，因此这些数据会丢失是否可以/是否可以附加新文本文件和旧文本文件代码如下： import sys import numpy as np import cv2 import os M

我尝试使用OpenCV和Knn算法在Python中为数字编写Ocr。代码运行得很好，但我想扩展我的输入数据，以转移到手写数字。数据的训练/输入是这样工作的：你运行脚本，图像的路径在脚本中，然后图像以一个围绕数字的矩形打开，你必须按下键盘上的数字。最后，它将分类和展平图像保存在文本文件中

问题是它会覆盖旧的文本文件，因此这些数据会丢失

是否可以/是否可以附加新文本文件和旧文本文件

代码如下：

import sys
import numpy as np
import cv2
import os

MIN_CONTOUR_AREA = 35

RESIZED_IMAGE_WIDTH = 20
RESIZED_IMAGE_HEIGHT = 30

def main():
    imgTrainingNumbers = cv2.imread("training_chars.png")   

if imgTrainingNumbers is None:                          # if image was not read successfully
    print "error: image not read from file \n\n"        # print error message to std out
    os.system("pause")                                  # pause so user can see error message
    return                                              # and exit function (which exits program)
# end if

imgGray = cv2.cvtColor(imgTrainingNumbers, cv2.COLOR_BGR2GRAY)          # get grayscale image
imgBlurred = cv2.GaussianBlur(imgGray, (5,5), 0)                        # blur

                                                    # filter image from grayscale to black and white
imgThresh = cv2.adaptiveThreshold(imgBlurred,                           # input image
                                  255,                                  # make pixels that pass the threshold full white
                                  cv2.ADAPTIVE_THRESH_GAUSSIAN_C,       # use gaussian rather than mean, seems to give better results
                                  cv2.THRESH_BINARY_INV,                # invert so foreground will be white, background will be black
                                  11,                                   # size of a pixel neighborhood used to calculate threshold value
                                  2)                                    # constant subtracted from the mean or weighted mean

cv2.imshow("imgThresh", imgThresh)      # show threshold image for reference

imgThreshCopy = imgThresh.copy()        # make a copy of the thresh image, this in necessary b/c findContours modifies the image

imgContours, npaContours, npaHierarchy = cv2.findContours(imgThreshCopy,        # input image, make sure to use a copy since the function will modify this image in the course of finding contours
                                             cv2.RETR_EXTERNAL,                 # retrieve the outermost contours only
                                             cv2.CHAIN_APPROX_SIMPLE)           # compress horizontal, vertical, and diagonal segments and leave only their end points

                            # declare empty numpy array, we will use this to write to file later
                            # zero rows, enough cols to hold all image data
npaFlattenedImages =  np.empty((0, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))

intClassifications = []         # declare empty classifications list, this will be our list of how we are classifying our chars from user input, we will write to file at the end

                                # possible chars we are interested in are digits 0 through 9, put these in list intValidChars
intValidChars = [ord('0'), ord('1'), ord('2'), ord('3'), ord('4'), ord('5'), ord('6'), ord('7'), ord('8'), ord('9'),
                 ord('-'), ord('o'), ord('c')]

for npaContour in npaContours:                          # for each contour
    if cv2.contourArea(npaContour) > MIN_CONTOUR_AREA:          # if contour is big enough to consider
        [intX, intY, intW, intH] = cv2.boundingRect(npaContour)         # get and break out bounding rect

                                            # draw rectangle around each contour as we ask user for input
        cv2.rectangle(imgTrainingNumbers,           # draw rectangle on original training image
                      (intX, intY),                 # upper left corner
                      (intX+intW,intY+intH),        # lower right corner
                      (0, 0, 255),                  # red
                      2)                            # thickness

        imgROI = imgThresh[intY:intY+intH, intX:intX+intW]                                  # crop char out of threshold image
        imgROIResized = cv2.resize(imgROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT))     # resize image, this will be more consistent for recognition and storage

        cv2.imshow("imgROI", imgROI)                    # show cropped out char for reference
        cv2.imshow("imgROIResized", imgROIResized)      # show resized image for reference
        cv2.imshow("training_numbers.png", imgTrainingNumbers)      # show training numbers image, this will now have red rectangles drawn on it

        intChar = cv2.waitKey(0)                     # get key press

        if intChar == 27:                   # if esc key was pressed
            sys.exit()                      # exit program
        elif intChar in intValidChars:      # else if the char is in the list of chars we are looking for . . .

            intClassifications.append(intChar)                                                # append classification char to integer list of chars (we will convert to float later before writing to file)

            npaFlattenedImage = imgROIResized.reshape((1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))  # flatten image to 1d numpy array so we can write to file later
            npaFlattenedImages = np.append(npaFlattenedImages, npaFlattenedImage, 0)                    # add current flattened impage numpy array to list of flattened image numpy arrays
        # end if
    # end if
# end for

fltClassifications = np.array(intClassifications, np.float32)                   # convert classifications list of ints to numpy array of floats

npaClassifications = fltClassifications.reshape((fltClassifications.size, 1))   # flatten numpy array of floats to 1d so we can write to file later

print "\n\ntraining complete !!\n"

np.savetxt("classifications.txt", npaClassifications)           # write flattened images to file
np.savetxt("flattened_images.txt", npaFlattenedImages)          #

cv2.destroyAllWindows()             

return

谢谢

这应该能解决问题

print "\n\ntraining complete !!\n"

if os.path.exists("classifications.txt") == False: 
    np.savetxt("classifications.txt", npaClassifications) 
    np.savetxt("flattened_images.txt", npaFlattenedImages) 


elif os.stat("classifications.txt").st_size == 0:  
    np.savetxt("classifications.txt", npaClassifications)  
    np.savetxt("flattened_images.txt", npaFlattenedImages)  #

else:  
    f = open("classifications.txt", 'ab')
    np.savetxt(f, npaClassifications)
    f.close()
    e = open("flattened_images.txt", 'ab')
    np.savetxt(e, npaFlattenedImages)

我猜您已经考虑过将新文本文件和旧文本文件附加到某种文件或数据库中。那么您在这里面临的挑战是什么呢？我现在尝试了以下代码：用open（“classification.txt”，“a”）作为myfile:np.savetxt（myfile，npatagrations）用open（“flatted_images.txt”，“a”）作为myfile:np.savetxt（myfile，npatagrations）这似乎是可行的，但我不能完全确定分类和扁平化图像文件是否比以前工作得更好。