Issue
Hello Stack Overflow community,
Im facing an issue with the Python-tesseract OCR tool when trying to recognize the word "ADRIEL." The OCR output is incorrectly segmenting it into two separate words: [ADRI, EL.
Im using the following code snippet:
import pytesseract
import cv2
image = cv2.imread("cnh_nome2.png")
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
pytesseract.pytesseract.tesseract_cmd = r"C:\Users\..."
text = pytesseract.image_to_string(gray_image, lang="por")
print(text)
The Output is: [ADRI EL
Solution
I get with this settings:
import pytesseract
import cv2
image_file = "Adriel.png"
# load the input image, convert it from BGR to RGB channel ordering,
image = cv2.imread(image_file, cv2.IMREAD_UNCHANGED)
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#print('Original Dimensions : ',image.shape)
scale_percent = 90 # percent of original size
width = int(rgb.shape[1] * scale_percent / 100)
height = int(rgb.shape[0] * scale_percent / 100)
dim = (width, height)
# resize image
resized = cv2.resize(rgb, dim, interpolation = cv2.INTER_AREA)
#print('Resized Dimensions : ',resized.shape)
grayImage = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
(thresh, blackAndWhiteImage) = cv2.threshold(grayImage, 150, 255, cv2.THRESH_BINARY) # 0...255
bw = cv2.imshow('Black white image', blackAndWhiteImage)
# Configuration
options = r'--psm 6 --oem 3 -c tessedit_char_whitelist={21eNOMESBRADIL_" "} ".format(args["whitelist"]")'
# OCR the input image using Tesseract
text_bw = pytesseract.image_to_string(blackAndWhiteImage, config=options)
print(text_bw)
with open("numbers.txt", 'w') as f:
f.writelines(text_bw)
cv2.imshow('Resized', resized)
cv2.waitKey(0)
cv2.destroyAllWindows()
Output:
2e 1 NOME SOBRENOME
ADRIEL
Answered By - Hermann12
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.