Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv13287
Modified Files:
ImageStripper.py
Log Message:
Bug(or feature?) in ocrad keeps it from emitting an export file when the -s
flag is used. Just count the number of lines in the output instead.
Index: ImageStripper.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/ImageStripper.py,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** ImageStripper.py 5 Nov 2006 19:13:26 -0000 1.7
--- ImageStripper.py 5 Nov 2006 19:34:16 -0000 1.8
***************
*** 175,181 ****
def extract_ocr_info(self, pnmfiles):
- fd, orf = tempfile.mkstemp()
- os.close(fd)
-
textbits = []
tokens = Set()
--- 175,178 ----
***************
*** 189,209 ****
else:
self.misses += 1
! ocr = os.popen("%s -s %s -c %s -x %s -f %s 2>%s" %
(find_program("ocrad"), scale, charset,
! orf, pnmfile, os.path.devnull))
ctext = ocr.read().lower()
ocr.close()
ctokens = set()
! for line in open(orf):
! if line.startswith("lines"):
! nlines = int(line.split()[1])
! if nlines:
! ctokens.add("image-text-lines:%d" %
! int(log2(nlines)))
self.cache[fhash] = (ctext, ctokens)
textbits.append(ctext)
tokens |= ctokens
os.unlink(pnmfile)
- os.unlink(orf)
return "\n".join(textbits), tokens
--- 186,202 ----
else:
self.misses += 1
! ocr = os.popen("%s -s %s -c %s -f %s 2>%s" %
(find_program("ocrad"), scale, charset,
! pnmfile, os.path.devnull))
ctext = ocr.read().lower()
ocr.close()
ctokens = set()
! nlines = len(ctext.strip().split("\n"))
! if nlines:
! ctokens.add("image-text-lines:%d" % int(log2(nlines)))
self.cache[fhash] = (ctext, ctokens)
textbits.append(ctext)
tokens |= ctokens
os.unlink(pnmfile)
return "\n".join(textbits), tokens
_______________________________________________
Spambayes-checkins mailing list
[email protected]
http://mail.python.org/mailman/listinfo/spambayes-checkins