#!/usr/bin/python


#  mailman 'search capability'
#  version 0.0001
#
#  -  a quick-and-very-dirty hack to provide mailman with
#     the capability to search its archive
#
#  -  search is only limited to a specific year-month
#     to make the search faster and more 'economical'
#     to computer resources (at least that's what
#     I think .. I'm very new to Python - in fact, this
#     is my very first program in Python - skipped the
#     Hello, World part ;) ).  having an index
#     would probably speed-up things (and be more of a
#     learning experience in Python) but then again,
#     at a cost to computer resources (specifically
#     disk space).  Besides there's always the next
#     version :)
#
#
#  last modified:  jan-20-2002
#  by:  j. san juan <jsanjuan@hotmail.com>
#  
#  GPL, LGPL, ABCs? :) - don't know much about licenses
#  so this code is being released to the public-domain
#
#  TODO:
#  -  lots :)
#  -  cleaner, correct code
#  -  strip out HTML from results
#  -  parse out subject, sender from results and display
#  -  etc. etc.



import cgi
import os
import string
import re


header_template = '''\

	<h1>%(listname)s Archive Search Results for %(yrmo)s</h1>

'''

found_template = '''\

	<h3><a href="/pipermail/%(hlink)s">%(fname)s</a></h3>
	<p>
	%(snippet)s	
	<br>


'''

rcount_template = '''\

	<h3>Got %(rcount)s matches </h3>


'''

#  search keyword
srch = ""

#  path to the html archives
path = ""

#  year-month of the archive to search
ayrmo = ""

#  list real name
lrname = ""

#  list internal (directory?) name
liname = ""

found = 0


def visit(arg, dir, names):

    global srch
    global liname
    global found

    #  archives of posts are in the form
    #  xxxxx.html (x are digits 0-9)
    #  search only those files
    #
    regex = "\d+.html"
    cregex = re.compile(regex)

    for i in names:
        #  if it's an archived post in html format
        #  search it otherwise skip it
        #
        if cregex.match(i) != None:
            absfname = os.path.join(dir, i)
            f = open(absfname, "r")
            l = f.readlines()
            f.close()
            for j in l:
                k = str(j)
                if string.find(k, srch) != -1:

                        #  is there a ++ shortcut ?
                        #
                        found = found + 1

			url = liname + "/" + ayrmo + "/" + i
			d = { "hlink":  url,
			      "fname":  i,
                              "snippet": k }
			print found_template % d

                        break
 


def main():

	global srch
	global path
	global ayrmo
	global lrname
	global liname
	global found

	print "Content-type: text/html\n"

	form = cgi.FieldStorage()
	
	#  i wish i knew what the python equivalent
	#  to perl's (or was it javascript?) trim is :)
	#

	#  kword - search keyword
	#  yrmo  - archive year-month to search in
	#  adir  - archive directory
	#
	if form.has_key("kword") and form["kword"].value != "" and \
           form.has_key("yrmo") and form["yrmo"].value != "" and \
	   form.has_key("adir") and form["adir"].value != "":

		print "<html>"
		print "<head>"
		print "<title>Test Results</title>"
		print "<body>"

		apath = form["adir"].value
		srch = form["kword"].value
		ayrmo = form["yrmo"].value
		liname = form["lstiname"].value
		lrname = form["lstrname"].value

		path = os.path.join(apath, ayrmo)
	
		d = {"yrmo": ayrmo,
                     "listname": lrname }
		print header_template % d

		os.path.walk(path, visit, None)

    		e = { "rcount": found }
    		print rcount_template % e

		print "</body>"
		print "</html>"

	else:
		print "Error!  You must enter a search keyword and"
		print " the year-month of the archive"



main()



