Author: dmeyer
Date: Sun Mar 26 18:45:45 2006
New Revision: 1354

Added:
   trunk/beacon/src/crawl.py
Modified:
   trunk/beacon/src/client.py
   trunk/beacon/src/db.py
   trunk/beacon/src/directory.py
   trunk/beacon/src/parser.py
   trunk/beacon/src/server.py

Log:
add crawler to auto check the db

Modified: trunk/beacon/src/client.py
==============================================================================
--- trunk/beacon/src/client.py  (original)
+++ trunk/beacon/src/client.py  Sun Mar 26 18:45:45 2006
@@ -65,6 +65,7 @@
         # monitor function from the server to start a new monitor for a query
         self._server = kaa.ipc.IPCClient('beacon').get_object('beacon')(db)
         self._server_monitor = self._server.monitor
+        self._crawler = self._server.crawl
         # read only version of the database
         self.database = Database(db, self)
         # connect to server notifications
@@ -86,6 +87,7 @@
         self._queries = []
         self._server = None
         self._server_monitor = None
+        self._crawler = None
         self.database = None
         
         
@@ -100,7 +102,12 @@
         """
         Return an object for the given filename.
         """
-        return Query(self, filename=os.path.realpath(filename)).result
+        filename = os.path.realpath(filename)
+        result = Query(self, filename=filename).result
+        if result and result._beacon_isdir and \
+               (not result._beacon_id or not result._beacon_data['mtime']):
+            self._crawler(filename, __ipc_oneway=True, __ipc_noproxy_args=True)
+        return result
 
 
     def query(self, **query):

Added: trunk/beacon/src/crawl.py
==============================================================================
--- (empty file)
+++ trunk/beacon/src/crawl.py   Sun Mar 26 18:45:45 2006
@@ -0,0 +1,144 @@
+import os
+import logging
+
+from kaa.notifier import Timer, OneShotTimer
+
+import parser
+
+log = logging.getLogger('crawler')
+
+DIRECTORY_BLACKLIST  = [ '/usr/', '/bin/' ]
+DIRECTORY_QUICKCHECK = [ '/', '/home/', os.path.expanduser("~/") ]
+
+CHECK_TIMER  = 0.03
+PARSE_TIMER  = 0.02
+UPDATE_TIMER = 0.03
+
+_crawling = []
+
+class Crawler(object):
+
+    active = 0
+    nextid = 0
+    
+    def __init__(self, db):
+        self.db = db
+        self._checked = []
+        self._tocheck = []
+        self._toparse = []
+        self._toupdate = []
+        Crawler.nextid += 1
+        self.num = Crawler.nextid
+        
+
+    def crawl(self, item):
+        if not item.filename in DIRECTORY_QUICKCHECK + DIRECTORY_BLACKLIST:
+            items = [ item ]
+        else:
+            items = self.search(item)
+            
+        for child in items:
+            for c in _crawling:
+                if child.filename.startswith(c):
+                    break
+            else:
+                self._toparse.append(child)
+                self._tocheck.append(child)
+                _crawling.append(child.filename)
+        if not self._toparse:
+            return
+        Crawler.active += 1
+        log.info('start crawler %s for %s' % (self.num, [ x.filename for x in 
items]))
+        self.timer = Timer(self.parse)
+        self.timer.start(PARSE_TIMER / Crawler.active)
+
+
+    def stop(self):
+        if not self.timer:
+            return
+        log.info('crawler %s finished', self.num)
+        Crawler.active -= 1
+        self.timer.stop()
+        self.timer = None
+        for child in self._tocheck:
+            if child.filename in _crawling:
+                _crawling.remove(child.filename)
+        self._tocheck = self._toparse = self._toupdate = []
+        self.db.commit()
+
+        
+    def search(self, object):
+        if not object._beacon_isdir or object.filename in DIRECTORY_BLACKLIST:
+            return []
+        if object._beacon_data['mtime'] and \
+               not object.filename in DIRECTORY_QUICKCHECK:
+            return [ object ]
+        ret = []
+        for child in self.db.query(parent=object):
+            if not child._beacon_id:
+                continue
+            ret += self.search(child)
+        return ret
+
+
+    def check(self):
+        if not self.timer:
+            return False
+
+        if not self._tocheck:
+            self.stop()
+            return False
+
+        item = self._tocheck.pop(0)
+        self._checked.append(item)
+        log.debug('check %s', item)
+        if item.filename in _crawling:
+            _crawling.remove(item.filename)
+        for child in self.db.query(parent=item):
+            if child._beacon_isdir:
+                for x in self._tocheck + self._checked:
+                    if child.filename == x.filename:
+                        self._toparse.append(child)
+                        break
+                else:
+                    self._toparse.append(child)
+                    self._tocheck.append(child)
+                    _crawling.append(child.filename)
+                continue
+            self._toparse.append(child)
+        self.timer = Timer(self.parse)
+        self.timer.start(PARSE_TIMER / Crawler.active)
+        return True
+
+
+    def parse(self):
+        if not self.timer:
+            return False
+        counter = 0
+        while True:
+            if not self._toparse:
+                if self._toupdate:
+                    self.timer = Timer(self.update)
+                    self.timer.start(UPDATE_TIMER / Crawler.active)
+                else:
+                    self.timer = OneShotTimer(self.check)
+                    self.timer.start(CHECK_TIMER / Crawler.active)
+                return False
+            item = self._toparse.pop(0)
+            counter += 1
+            if item._beacon_data['mtime'] != item._beacon_mtime():
+                self._toupdate.append(item)
+            if counter == 20 and len(self._toparse) > 10:
+                return True
+
+
+    def update(self):
+        if not self.timer:
+            return False
+        if not self._toupdate:
+            self.timer = OneShotTimer(self.check)
+            self.timer.start(CHECK_TIMER / Crawler.active)
+            return False
+        item = self._toupdate.pop(0)
+        parser.parse(self.db, item)
+        return True

Modified: trunk/beacon/src/db.py
==============================================================================
--- trunk/beacon/src/db.py      (original)
+++ trunk/beacon/src/db.py      Sun Mar 26 18:45:45 2006
@@ -312,7 +312,12 @@
         A query to get all files in a directory. The parameter parent is a
         directort object.
         """
-        dirname = parent.filename[:-1]
+        if parent._beacon_islink:
+            # WARNING: parent is a link, we need to follow it
+            dirname = os.path.realpath(parent.filename)
+            parent = self._query_filename(dirname)
+        else:
+            dirname = parent.filename[:-1]
         items = []
         for i in self._db.query(parent = parent._beacon_id):
             if i['type'] == 'dir':

Modified: trunk/beacon/src/directory.py
==============================================================================
--- trunk/beacon/src/directory.py       (original)
+++ trunk/beacon/src/directory.py       Sun Mar 26 18:45:45 2006
@@ -78,8 +78,9 @@
             parent = None
             self.filename = media.directory
 
+        self._beacon_islink = False
         if os.path.islink(self.filename[:-1]):
-            self.filename = os.path.realpath(self.filename) + '/'
+            self._beacon_islink = True
 
         Item.__init__(self, id, 'file://' + self.filename, data, parent, media)
         self._beacon_overlay = False

Modified: trunk/beacon/src/parser.py
==============================================================================
--- trunk/beacon/src/parser.py  (original)
+++ trunk/beacon/src/parser.py  Sun Mar 26 18:45:45 2006
@@ -47,7 +47,7 @@
 log = logging.getLogger('beacon')
 
 def parse(db, item, store=False):
-    log.info('check %s', item.url)
+    log.debug('check %s', item.url)
     mtime = item._beacon_mtime()
     if not mtime:
         log.info('oops, no mtime %s' % item)

Modified: trunk/beacon/src/server.py
==============================================================================
--- trunk/beacon/src/server.py  (original)
+++ trunk/beacon/src/server.py  Sun Mar 26 18:45:45 2006
@@ -47,6 +47,7 @@
 import parser
 from db import *
 from monitor import Monitor
+from crawl import Crawler
 
 # get logging object
 log = logging.getLogger('beacon')
@@ -111,12 +112,18 @@
         # add root mountpoint
         self.add_mountpoint(None, '/')
         self.set_mountpoint('/', 'kaa.beacon.root')
-        
+
         # commit and wait for the results (there are no results,
         # this code is only used to force waiting until the db is
         # set up.
         self._db.commit()
-        
+
+        # check the mountpoint for db changes
+        for m in self._db.get_mountpoints(True):
+            if m.directory == '/':
+                Crawler(self._db).crawl(self._db._get_dir('/', m))
+                break
+
 
     def register_file_type_attrs(self, name, **kwargs):
         """
@@ -162,6 +169,23 @@
         return None
     
 
+    def crawl(self, directory):
+        """
+        Start crawling a directory.
+        """
+        self._db.commit()
+        data = self._db.query(filename=directory)
+        items = []
+        for i in data._beacon_tree():
+            if i._beacon_id:
+                break
+            items.append(i)
+        while items:
+            parser.parse(self._db, items.pop(), store=True)
+        self._db.commit()
+        Crawler(self._db).crawl(data)
+        
+        
     def add_mountpoint(self, device, directory):
         """
         Add a mountpoint to the system.


-------------------------------------------------------
This SF.Net email is sponsored by xPML, a groundbreaking scripting language
that extends applications into web and mobile media. Attend the live webcast
and join the prime developer group breaking into this new coding territory!
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=110944&bid=241720&dat=121642
_______________________________________________
Freevo-cvslog mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freevo-cvslog

Reply via email to