? Makefile
? __init__.py
? diff
Index: Retriever.py
===================================================================
RCS file: /cvs/plucker/plucker_src/parser/python/PyPlucker/Retriever.py,v
retrieving revision 1.28
diff -U3 -r1.28 Retriever.py
--- Retriever.py	17 Dec 2003 03:46:01 -0000	1.28
+++ Retriever.py	11 Jan 2004 18:11:57 -0000
@@ -266,6 +266,23 @@
                 # Now get the contents
                 contents = webdoc.read ()
 
+                # NKF, [2002/02/26] rubikitch<rubikitch@ruby-lang.org>
+                filter = self._configuration.get_string ('filter')
+                filterpath = self._configuration.get_string ('filterpath')
+                if filter is not None and filterpath is not None:
+                    import tempfile
+                    temppath = tempfile.mktemp()
+                    tempf = open( temppath, "wb" )
+                    tempf.write( contents )
+                    tempf.close()
+
+                    command = filterpath + "/" + filter + " " + headers_dict['content-type'] + " " + temppath
+
+                    pipe = os.popen( command, "r" )
+                    contents =  pipe.read()
+                    os.unlink( temppath )
+                    pipe.close()
+
                 # Check if encoded contents...
                 if headers_dict.has_key ('content-encoding'):
                     encoding = headers_dict['content-encoding']
Index: Spider.py
===================================================================
RCS file: /cvs/plucker/plucker_src/parser/python/PyPlucker/Spider.py,v
retrieving revision 1.88
diff -U3 -r1.88 Spider.py
--- Spider.py	28 Oct 2003 09:25:23 -0000	1.88
+++ Spider.py	11 Jan 2004 18:12:01 -0000
@@ -1522,6 +1522,11 @@
             pluckerdir = pluckerhome
             config.set ('pluckerdir', pluckerdir)
 
+    filterpath = pluckerdir + "/filter"
+    # sys.path = [filterpath] + sys.path
+    if os.path.exists (filterpath) and not os.path.isdir (filterpath):
+        config.set( 'filterpath', filterpath )
+
     if use_file is None and use_cache is None:
         if config.get_string ('db_file') is not None and config.get_bool ('use_cache'):
             usage ("Config files specify both a 'db_file' and a 'use_cache=1'.\nYou must decide by specifiying an argument!")
