Author: dmeyer
Date: Thu Mar 30 12:53:15 2006
New Revision: 1366

Modified:
   trunk/beacon/src/crawl.py

Log:
add doc

Modified: trunk/beacon/src/crawl.py
==============================================================================
--- trunk/beacon/src/crawl.py   (original)
+++ trunk/beacon/src/crawl.py   Thu Mar 30 12:53:15 2006
@@ -1,23 +1,63 @@
+# -*- coding: iso-8859-1 -*-
+# -----------------------------------------------------------------------------
+# crawl.py - Crawl filesystem and monitor it
+# -----------------------------------------------------------------------------
+# $Id: monitor.py 1345 2006-03-25 19:24:22Z dmeyer $
+#
+# -----------------------------------------------------------------------------
+# kaa-beacon - A virtual filesystem with metadata
+# Copyright (C) 2005 Dirk Meyer
+#
+# First Edition: Dirk Meyer <[EMAIL PROTECTED]>
+# Maintainer:    Dirk Meyer <[EMAIL PROTECTED]>
+#
+# Please see the file AUTHORS for a complete list of authors.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
+# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# -----------------------------------------------------------------------------
+
+# python imports
 import os
 import logging
 
+# kaa imports
 from kaa.notifier import Timer, OneShotTimer
 
-import parser
+# kaa.beacon imports
+from parser import parse
 from inotify import INotify
 from directory import Directory
 
+# get logging object
 log = logging.getLogger('crawler')
 
-class Crawler(object):
 
+class Crawler(object):
+    """
+    Class to crawl through a filesystem and check for changes. If inotify
+    support is enabled in the kernel, this class will use it to avoid
+    polling the filesystem.
+    """
     PARSE_TIMER  = 0.02
     UPDATE_TIMER = 0.03
 
     active = 0
     nextid = 0
     
-    def __init__(self, db):
+    def __init__(self, db, use_inotify=True):
         """
         Init the Crawler.
         Parameter db is a beacon.db.Database object.
@@ -30,41 +70,23 @@
         self.update_items = []
         Crawler.nextid += 1
         self.num = Crawler.nextid
-        try:
-            self.inotify = INotify()
-        except SystemError, e:
-            log.warning('%s', e)
+        if use_inotify:
+            try:
+                self.inotify = INotify()
+            except SystemError, e:
+                log.warning('%s', e)
+                self.inotify = None
+        else:
             self.inotify = None
         self.timer = None
         self.restart_timer = None
         self.restart_args = []
 
 
-    def inotify_callback(self, mask, name):
-        if mask & INotify.WATCH_MASK:
-            item = self.db.query(filename=name)
-            if os.path.exists(name):
-                # created or modified, we don't care
-                if item._beacon_isdir:
-                    self.scan_directory_items.append(item)
-                self.check_mtime_items.append(item)
-                if not self.timer:
-                    Crawler.active += 1
-                    self.check_mtime()
-            else:
-                # deleted
-                item = self.db.query(filename=name)
-                if item._beacon_id:
-                    self.db.delete_object(item._beacon_id, 
beacon_immediately=True)
-                if name + '/' in self.monitoring:
-                    for m in self.monitoring[:]:
-                        if m.startswith(name + '/'):
-                            if self.inotify:
-                                self.inotify.ignore(m)
-                                log.info('remove inotify for %s', m)
-                            self.monitoring.remove(m)
-
     def append(self, item):
+        """
+        Append a directory to be crawled and monitored.
+        """
         log.info('crawl %s', item)
         self.check_mtime_items.append(item)
         self.scan_directory_items.append(item)
@@ -75,7 +97,64 @@
             self.check_mtime()
 
 
+    def stop(self):
+        """
+        Stop the crawler and remove the inotify watching.
+        """
+        self.finished()
+        self.monitoring = []
+        self.inotify = None
+        
+
+    # -------------------------------------------------------------------------
+    # Internal functions
+    # -------------------------------------------------------------------------
+    
+    def inotify_callback(self, mask, name):
+        """
+        Callback for inotify.
+        """
+        if not mask & INotify.WATCH_MASK:
+            # TODO: maybe check more types of callbacks
+            return True
+        
+        item = self.db.query(filename=name)
+        if os.path.exists(name):
+            # The file exists. So it is either created or modified, we don't 
care
+            # right now. Later it would be nice to check in detail about 
MOVE_MASK.
+            # At this point we add the new file and delete the old one but it 
would
+            # be much faster if we can handle move.
+            if item._beacon_isdir:
+                self.scan_directory_items.append(item)
+            self.check_mtime_items.append(item)
+            if not self.timer:
+                Crawler.active += 1
+                self.check_mtime()
+            return True
+
+        # The file does not exist, we need to delete it in the database
+        # (if it is still in there)
+        item = self.db.query(filename=name)
+        if item._beacon_id:
+            # Still in the db, delete it
+            self.db.delete_object(item._beacon_id, beacon_immediately=True)
+        if name + '/' in self.monitoring:
+            # remove directory and all subdirs from the notifier. The directory
+            # is gone, so all subdirs are invalid, too.
+            for m in self.monitoring[:]:
+                if not m.startswith(name + '/'):
+                    continue
+                if self.inotify:
+                    self.inotify.ignore(m)
+                    log.info('remove inotify for %s', m)
+                self.monitoring.remove(m)
+
+
     def finished(self):
+        """
+        Crawler is finished with all directories and subdirectories and all
+        files are now up to date.
+        """
         if not self.timer:
             return
         log.info('crawler %s finished', self.num)
@@ -87,19 +166,23 @@
         self.update_items = []
         self.db.commit()
         if not self.inotify:
+            # Inotify is not in use. This means we have to start crawling
+            # the filesystem again in 10 seconds using the restart function.
+            # The restart function will crawl with a much higher intervall to
+            # keep the load on the system down.
             log.info('schedule rescan')
             self.restart_timer = OneShotTimer(self.restart).start(10)
                 
 
-    def stop(self):
-        self.finished()
-        self.monitoring = []
-        self.inotify = None
-        
-        
     def restart(self):
+        """
+        Restart the crawler when inotify is not enabled.
+        """
+        # set parser time to one second to keep load down
         self.PARSE_TIMER = 1
 
+        # reset self.monitoring and add all directories once passed to
+        # this object with 'append' again.
         self.monitoring = []
         for item in self.restart_args:
             self.check_mtime_items.append(item)
@@ -110,6 +193,10 @@
 
         
     def scan_directory(self):
+        """
+        Scan a directory for changes add all subitems to check_mtime. All 
subdirs
+        are also added to scan_directory_items to be checked by this function 
later.
+        """
         if not self.timer:
             return False
 
@@ -124,9 +211,18 @@
                 self.monitoring.remove(item.filename + '/')
             return True
 
-        log.debug('check %s', item)
+        if not item.filename in self.monitoring and self.inotify:
+            # add directory to the inotify list. Do that before the real 
checking
+            # to avoid changes we would miss between checking and adding the
+            # inotifier.
+            log.info('add inotify for %s' % item.filename)
+            
self.inotify.watch(item.filename[:-1]).connect(self.inotify_callback)
+
+        # log.info('check %s', item)
         for child in self.db.query(parent=item):
             if child._beacon_isdir:
+                # A directory. Check if it is already scanned or in the list of
+                # items to be scanned. If not, add it.
                 for fname in [ f.filename for f in self.scan_directory_items ] 
+ \
                         self.monitoring:
                     if child.filename == fname:
@@ -136,22 +232,33 @@
                     self.check_mtime_items.append(child)
                     self.scan_directory_items.append(child)
                 continue
+            # add file to the list of items to be checked
             self.check_mtime_items.append(child)
+
         if not item.filename in self.monitoring:
-            if self.inotify:
-                log.info('add inotify for %s' % item.filename)
-                
self.inotify.watch(item.filename[:-1]).connect(self.inotify_callback)
+            # add directory to list of files we scanned.
             self.monitoring.append(item.filename)
+
+        # start checking the mtime of files
         self.check_mtime()
         return True
 
 
     def check_mtime(self):
+        """
+        Check the modification time of all items in self.check_mtime_items.
+        This function will start a timer for check_mtime_step.
+        """
         self.timer = Timer(self.check_mtime_step)
         self.timer.start(self.PARSE_TIMER / Crawler.active)
 
         
     def check_mtime_step(self):
+        """
+        Check the next up to 30 items for mtime changes. This function is 
called
+        in a timer and will check all items in self.check_mtime_items. If it is
+        done, it will call self.update to update all changed items.
+        """
         if not self.timer:
             return False
         counter = 0
@@ -161,26 +268,38 @@
                 return False
             item = self.check_mtime_items.pop(0)
             counter += 1
-            if item._beacon_data['mtime'] != item._beacon_mtime():
+            # log.info('mtime %s', item)
+            if item._beacon_changed():
                 self.update_items.append(item)
             if counter == 20 and len(self.check_mtime_items) > 10:
                 return True
 
 
     def update(self):
+        """
+        Update all items that are changed. If no items is changed (anymore), 
call
+        self.scan_directory to keep on crawling. This function will start a 
timer
+        for update_step.
+        """
         if self.update_items:
             self.timer = Timer(self.update_step)
             self.timer.start(self.UPDATE_TIMER / Crawler.active)
         else:
-            self.scan_directory()
+            self.timer = OneShotTimer(self.scan_directory)
+            self.timer.start(0.01)
         
 
     def update_step(self):
+        """
+        Update (parse) the first item in self.update_items. If the list is 
empty,
+        call self.scan_directory to keep on crawling.
+        """
         if not self.timer:
             return False
         if not self.update_items:
             self.scan_directory()
             return False
-        item = self.update_items.pop(0)
-        parser.parse(self.db, item)
+        # parse next item using parse from parser.py
+        # log.info('update %s', self.update_items[0])
+        parse(self.db, self.update_items.pop(0))
         return True


-------------------------------------------------------
This SF.Net email is sponsored by xPML, a groundbreaking scripting language
that extends applications into web and mobile media. Attend the live webcast
and join the prime developer group breaking into this new coding territory!
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=110944&bid=241720&dat=121642
_______________________________________________
Freevo-cvslog mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freevo-cvslog

Reply via email to