Python 3.x 表格(图像文件)到excel
下面是我的代码的样子Python 3.x 表格(图像文件)到excel,python-3.x,pandas,opencv,image-processing,Python 3.x,Pandas,Opencv,Image Processing,下面是我的代码的样子 outer = [] for i in range(len(finalboxes)): for j in range(len(finalboxes[i])): inner = '' if (len(finalboxes[i][j]) == 0): outer.append(' ') else: for k in range(len(finalboxes[i][j])):
outer = []
for i in range(len(finalboxes)):
for j in range(len(finalboxes[i])):
inner = ''
if (len(finalboxes[i][j]) == 0):
outer.append(' ')
else:
for k in range(len(finalboxes[i][j])):
y, x, w, h = finalboxes[i][j][k][0], finalboxes[i][j][k][1], finalboxes[i][j][k][2], \
finalboxes[i][j][k][3]
finalimg = bitnot[x:x + h, y:y + w]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 1))
border = cv2.copyMakeBorder(finalimg, 2, 2, 2, 2, cv2.BORDER_CONSTANT, value=[255, 255])
resizing = cv2.resize(border, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
dilation = cv2.dilate(resizing, kernel, iterations=1)
erosion = cv2.erode(dilation, kernel, iterations=1)
out = pytesseract.image_to_string(erosion)
if (len(out) == 0):
out = pytesseract.image_to_string(erosion, config='--psm 3')
inner = inner + " " + out
outer.append(inner)
#Creating a dataframe of the generated OCR list
arr = np.array(outer)
dataframe = pd.DataFrame(arr.reshape(len(row),countcol))
print(dataframe)
data = dataframe.style.set_properties(align="left")
ILLEGAL_CHARACTERS_RE = re.compile(r'[\000-\010]|[\013-\014]|[\016-\037]')
data = data.applymap(lambda x: ILLEGAL_CHARACTERS_RE.sub(r'', x) if isinstance(x, str) else x)
#Converting it in a excel-file
data.to_excel("output3.xlsx")
这显然是代码的一部分,我认为问题的根源就在这里
为了制作一个表格(附上截图),我打开了一个word文件,制作了一个4x4
表格,并用随机条目填充它。拍了一张截图。
现在,在转换之后,下面是我的excel的外观
outer = []
for i in range(len(finalboxes)):
for j in range(len(finalboxes[i])):
inner = ''
if (len(finalboxes[i][j]) == 0):
outer.append(' ')
else:
for k in range(len(finalboxes[i][j])):
y, x, w, h = finalboxes[i][j][k][0], finalboxes[i][j][k][1], finalboxes[i][j][k][2], \
finalboxes[i][j][k][3]
finalimg = bitnot[x:x + h, y:y + w]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 1))
border = cv2.copyMakeBorder(finalimg, 2, 2, 2, 2, cv2.BORDER_CONSTANT, value=[255, 255])
resizing = cv2.resize(border, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
dilation = cv2.dilate(resizing, kernel, iterations=1)
erosion = cv2.erode(dilation, kernel, iterations=1)
out = pytesseract.image_to_string(erosion)
if (len(out) == 0):
out = pytesseract.image_to_string(erosion, config='--psm 3')
inner = inner + " " + out
outer.append(inner)
#Creating a dataframe of the generated OCR list
arr = np.array(outer)
dataframe = pd.DataFrame(arr.reshape(len(row),countcol))
print(dataframe)
data = dataframe.style.set_properties(align="left")
ILLEGAL_CHARACTERS_RE = re.compile(r'[\000-\010]|[\013-\014]|[\016-\037]')
data = data.applymap(lambda x: ILLEGAL_CHARACTERS_RE.sub(r'', x) if isinstance(x, str) else x)
#Converting it in a excel-file
data.to_excel("output3.xlsx")
有人能解释一下上面的箭头是什么吗?我认为存在一些编码问题
我附上了完整代码的链接。如果有帮助的话。(我错误地将其保存为txt文件)