Update of /cvsroot/spambayes/spambayes/spambayes
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv13287

Modified Files:
        ImageStripper.py 
Log Message:
Bug(or feature?) in ocrad keeps it from emitting an export file when the -s
flag is used.  Just count the number of lines in the output instead.


Index: ImageStripper.py
===================================================================
RCS file: /cvsroot/spambayes/spambayes/spambayes/ImageStripper.py,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** ImageStripper.py    5 Nov 2006 19:13:26 -0000       1.7
--- ImageStripper.py    5 Nov 2006 19:34:16 -0000       1.8
***************
*** 175,181 ****
  
      def extract_ocr_info(self, pnmfiles):
-         fd, orf = tempfile.mkstemp()
-         os.close(fd)
- 
          textbits = []
          tokens = Set()
--- 175,178 ----
***************
*** 189,209 ****
              else:
                  self.misses += 1
!                 ocr = os.popen("%s -s %s -c %s -x %s -f %s 2>%s" %
                                 (find_program("ocrad"), scale, charset,
!                                 orf, pnmfile, os.path.devnull))
                  ctext = ocr.read().lower()
                  ocr.close()
                  ctokens = set()
!                 for line in open(orf):
!                     if line.startswith("lines"):
!                         nlines = int(line.split()[1])
!                         if nlines:
!                             ctokens.add("image-text-lines:%d" %
!                                         int(log2(nlines)))
                  self.cache[fhash] = (ctext, ctokens)
              textbits.append(ctext)
              tokens |= ctokens
              os.unlink(pnmfile)
-         os.unlink(orf)
  
          return "\n".join(textbits), tokens
--- 186,202 ----
              else:
                  self.misses += 1
!                 ocr = os.popen("%s -s %s -c %s -f %s 2>%s" %
                                 (find_program("ocrad"), scale, charset,
!                                 pnmfile, os.path.devnull))
                  ctext = ocr.read().lower()
                  ocr.close()
                  ctokens = set()
!                 nlines = len(ctext.strip().split("\n"))
!                 if nlines:
!                     ctokens.add("image-text-lines:%d" % int(log2(nlines)))
                  self.cache[fhash] = (ctext, ctokens)
              textbits.append(ctext)
              tokens |= ctokens
              os.unlink(pnmfile)
  
          return "\n".join(textbits), tokens

_______________________________________________
Spambayes-checkins mailing list
[email protected]
http://mail.python.org/mailman/listinfo/spambayes-checkins

Reply via email to