Update of /cvsroot/spambayes/spambayes/Outlook2000/sandbox
In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv8117/sandbox

Added Files:
        dump_email.py score.py 
Log Message:
Integrate OCR with outlook plugin


--- NEW FILE: dump_email.py ---
"""dump one or more items as an 'email object' to stdout."""
import sys, os
import optparse

from win32com.mapi import mapi, mapiutil
from win32com.mapi.mapitags import *
import win32clipboard

try:
    from manager import BayesManager
except ImportError:
    if hasattr(sys, "frozen"):
        raise
    sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 
'..')))
    from manager import BayesManager

import mapi_driver
from cStringIO import StringIO

def Dump(driver, manager, mapi_folder, subject, stream=None):
    for item in driver.GetItemsWithValue(mapi_folder, PR_SUBJECT_A, subject):
        hr, props = item.GetProps((PR_ENTRYID,PR_STORE_ENTRYID), 0)
        (tag, eid), (tag, store_eid) = props
        eid = mapi.HexFromBin(eid)
        store_eid = mapi.HexFromBin(store_eid)
        print >> stream, "Dumping message with ID %s/%s" % (store_eid, eid)
        msm = manager.message_store.GetMessage((store_eid, eid))
        ob = msm.GetEmailPackageObject()

        print >> stream, ob.as_string()
        print >> stream

def main():
    driver = mapi_driver.MAPIDriver()

    parser = optparse.OptionParser("%prog [options] [path ...]",
                                   description=__doc__)
    parser.add_option("-q", "--quiet",
                      action="store_true", dest="quiet", default=False,
                      help="don't print status messages to stdout")

    parser.add_option("-f", "--folder",
                      action="store", default="Inbox",
                      help="folder to search")

    parser.add_option("-c", "--clipboard",
                      action="store",
                      help="write results to the clipboard")

    options, args = parser.parse_args()

    subject = " ".join(args)
    try:
        folder = driver.FindFolder(options.folder)
    except ValueError, details:
        parser.error(details)

    stream = None
    if options.clipboard:
        stream = StringIO()

    Dump(driver, BayesManager(), folder, subject, stream)

    if options.clipboard:
        win32clipboard.OpenClipboard()
        win32clipboard.EmptyClipboard()
        win32clipboard.SetClipboardText(stream.getvalue())
        print "Output successfuly written to the Windows clipboard"

if __name__=='__main__':
    main()

--- NEW FILE: score.py ---
"""Scores one or more items in your Outlook store."""
# score one or more items, write results to stdout.
# Helps test new features (eg, OCR) outside the Outlook environment.
import sys, os
import optparse

from win32com.mapi import mapi, mapiutil
from win32com.mapi.mapitags import *
import win32clipboard

try:
    from manager import BayesManager
except ImportError:
    if hasattr(sys, "frozen"):
        raise
    sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 
'..')))
    from manager import BayesManager

from addin import GetClues

import mapi_driver
from cStringIO import StringIO

def Score(driver, manager, mapi_folder, subject, options, stream=None):
    num = 0
    if options.all:
        getter = driver.GetAllItems
        getter_args = (mapi_folder,)
    else:
        getter = driver.GetItemsWithValue
        getter_args = (mapi_folder, PR_SUBJECT_A, subject)
    for item in getter(*getter_args):
        num += 1
        if num % 1000 == 0:
            print >> sys.stderr, "Processed", num, "items..."
        hr, props = item.GetProps((PR_ENTRYID,PR_STORE_ENTRYID, PR_SUBJECT_A), 
0)
        (tag, eid), (tag, store_eid), (tag, sub) = props
        eid = mapi.HexFromBin(eid)
        store_eid = mapi.HexFromBin(store_eid)
        try:
            msm = manager.message_store.GetMessage((store_eid, eid))

            manager.classifier_data.message_db.load_msg(msm)
            score = manager.score(msm)
            if not options.quiet: print "Message %r scored %g" % (sub, score)
            if options.show_clues:
                clues = GetClues(manager, msm)
                if not options.quiet: print >> stream, clues
            if options.quiet:
                continue

            if options.show_image_info:
                eob = msm.GetEmailPackageObject()
                # Show what the OCR managed to extract.
                from spambayes.ImageStripper import crack_images
                from spambayes.tokenizer import imageparts
                image_text, image_toks = crack_images(imageparts(eob))
                print >> stream, "Image text:", repr(image_text)
                print >> stream, "Image tokens:", repr(image_toks)

            print >> stream # blank lines between messages
        except:
            print >> sys.stderr, "FAILED to convert message:", sub
            raise
    print >> stream, "Scored", num, "messages."

def main():
    driver = mapi_driver.MAPIDriver()

    parser = optparse.OptionParser("%prog [options] subject of message ...",
                                   description=__doc__)
    parser.add_option("-q", "--quiet",
                      action="store_true", dest="quiet", default=False,
                      help="don't print score info - useful for testing")

    parser.add_option("-f", "--folder",
                      action="store", default="Inbox",
                      help="folder to search")

    parser.add_option("", "--clipboard",
                      action="store_true",
                      help="write results to the clipboard")

    parser.add_option("-c", "--show-clues",
                      action="store_true",
                      help="also write the clues for the message")

    parser.add_option("-a", "--all",
                      action="store_true",
                      help="ignore the subject and score all items in the 
folder")

    parser.add_option("-i", "--show-image-info",
                      action="store_true",
                      help="show the information we can extract from images "
                           "in the mail")

    options, args = parser.parse_args()

    subject = " ".join(args)
    try:
        folder = driver.FindFolder(options.folder)
    except ValueError, details:
        parser.error(details)

    stream = None
    if options.clipboard:
        stream = StringIO()

    Score(driver, BayesManager(), folder, subject, options, stream)

    if options.clipboard:
        win32clipboard.OpenClipboard()
        win32clipboard.EmptyClipboard()
        win32clipboard.SetClipboardText(stream.getvalue())
        print "Output successfuly written to the Windows clipboard"

if __name__=='__main__':
    main()

_______________________________________________
Spambayes-checkins mailing list
[email protected]
http://mail.python.org/mailman/listinfo/spambayes-checkins

Reply via email to