give me a solution for this problem.It is urgent
--
You received this message because you are subscribed to the Google Groups
"tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To view this discussion on the web visit
https://groups.google.com/d/msgid/tesseract-ocr/4666ae9f-d9cd-42f7-b85a-62172aa1298fn%40googlegroups.com.
import io
import json
import re
import pytesseract as pt
from matplotlib import pyplot as plt
import matplotlib.image as Image
import cv2
import cv2 as cv
import sys
import numpy as np
from PIL import Image,ImageEnhance
pt.pytesseract.tesseract_cmd = r'C:\Program Files
(x86)\Tesseract-OCR\tesseract.exe'
path='C:\Windows\DigitalLocker\p1.jpeg'
img=cv2.imread(path)
blur = cv2.GaussianBlur(img,(5,5),0)
median = cv2.medianBlur(blur,5)
blur = cv2.bilateralFilter(median,9,75,75)
text=pt.pytesseract.image_to_string(blur)
def findword(textlist, wordstring):
lineno = -1
for wordline in textlist:
xx = wordline.split( )
if ([w for w in xx if re.search(wordstring, w)]):
lineno = textlist.index(wordline)
textlist = textlist[lineno+1:]
return textlist
return textlist
name = None
fname = None
dob = None
pan = None
nameline = []
dobline = []
panline = []
text0 = []
text1 = []
text2 = []
lines = text.split('\n')
for lin in lines:
s = lin.strip()
s = lin.replace('\n','')
s = s.rstrip()
s = s.lstrip()
text1.append(s)
text1 = list(filter(None,text1))
lineno = 0
for wordline in text1:
xx = wordline.split('\n')
if ([w for w in xx if
re.search('(INCOMETAXDEPARWENT|INCOME|TAX|GOW|GOVT|GOVERNMENT|OVERNMENT|VERNMENT|DEPARTMENT|EPARTMENT|PARTMENT|ARTMENT|INDIA|NDIA)$',
w)]):
text1 = list(text1)
lineno = text1.index(wordline)
break
text0 = text1[lineno+1:]
try:
# Cleaning first names
name = text0[0]
name = name.rstrip()
name = name.lstrip()
name = name.replace("8", "B")
name = name.replace("0", "D")
name = name.replace("6", "G")
name = name.replace("1", "I")
name = re.sub('[^a-zA-Z] +', ' ', name)
# Cleaning Father's name
fname = text0[1]
fname = fname.rstrip()
fname = fname.lstrip()
fname = fname.replace("8", "S")
fname = fname.replace("0", "O")
fname = fname.replace("6", "G")
fname = fname.replace("1", "I")
fname = fname.replace("\"", "A")
fname = re.sub('[^a-zA-Z] +', ' ', fname)
# Cleaning DOB
dob = text0[2][:10]
dob = dob.rstrip()
dob = dob.lstrip()
dob = dob.replace('l', '/')
dob = dob.replace('L', '/')
dob = dob.replace('I', '/')
dob = dob.replace('i', '/')
dob = dob.replace('|', '/')
dob = dob.replace('\"', '/1')
dob = dob.replace(" ", "")
# Cleaning PAN Card details
text0 = findword(text1,
'(Pormanam|Number|umber|Account|ccount|count|Permanent|ermanent|manent|wumm)$')
panline = text0[0]
pan = panline.rstrip()
pan = pan.lstrip()
pan = pan.replace(" ", "")
pan = pan.replace("\"", "")
pan = pan.replace(";", "")
pan = pan.replace("%", "L")
except:
pass
data = {}
data['Name'] = name
data['Father Name'] = fname
data['Date of Birth'] = dob
data['PAN'] = pan
data['ID Type'] = "PAN"
print(data)
def findword(textlist, wordstring):
lineno = -1
for wordline in textlist:
xx = wordline.split( )
if ([w for w in xx if re.search(wordstring, w)]):
lineno = textlist.index(wordline)
textlist = textlist[lineno+1:]
return textlist
return textlist
try:
to_unicode = unicode
except NameError:
to_unicode = str
with io.open('info1.json', 'w', encoding='utf-8') as outfile:
data = json.dumps(data, indent=4, sort_keys=True, separators=(',', ': '),
ensure_ascii=False)
outfile.write(to_unicode(data))
with open('info1.json', encoding='utf-8') as data:
data_loaded = json.load(data)
if data_loaded['ID Type'] == 'PAN':
print("\n---------- PAN Details ----------")
print("\nPAN Number: ",data_loaded['PAN'])
print("\nName: ",data_loaded['Name'])
print("\nFather's Name: ",data_loaded['Father Name'])
print("\nDate Of Birth: ", data_loaded['Date of Birth'])