Author: dmeyer
Date: Sun Mar 26 18:45:45 2006
New Revision: 1354
Added:
trunk/beacon/src/crawl.py
Modified:
trunk/beacon/src/client.py
trunk/beacon/src/db.py
trunk/beacon/src/directory.py
trunk/beacon/src/parser.py
trunk/beacon/src/server.py
Log:
add crawler to auto check the db
Modified: trunk/beacon/src/client.py
==============================================================================
--- trunk/beacon/src/client.py (original)
+++ trunk/beacon/src/client.py Sun Mar 26 18:45:45 2006
@@ -65,6 +65,7 @@
# monitor function from the server to start a new monitor for a query
self._server = kaa.ipc.IPCClient('beacon').get_object('beacon')(db)
self._server_monitor = self._server.monitor
+ self._crawler = self._server.crawl
# read only version of the database
self.database = Database(db, self)
# connect to server notifications
@@ -86,6 +87,7 @@
self._queries = []
self._server = None
self._server_monitor = None
+ self._crawler = None
self.database = None
@@ -100,7 +102,12 @@
"""
Return an object for the given filename.
"""
- return Query(self, filename=os.path.realpath(filename)).result
+ filename = os.path.realpath(filename)
+ result = Query(self, filename=filename).result
+ if result and result._beacon_isdir and \
+ (not result._beacon_id or not result._beacon_data['mtime']):
+ self._crawler(filename, __ipc_oneway=True, __ipc_noproxy_args=True)
+ return result
def query(self, **query):
Added: trunk/beacon/src/crawl.py
==============================================================================
--- (empty file)
+++ trunk/beacon/src/crawl.py Sun Mar 26 18:45:45 2006
@@ -0,0 +1,144 @@
+import os
+import logging
+
+from kaa.notifier import Timer, OneShotTimer
+
+import parser
+
+log = logging.getLogger('crawler')
+
+DIRECTORY_BLACKLIST = [ '/usr/', '/bin/' ]
+DIRECTORY_QUICKCHECK = [ '/', '/home/', os.path.expanduser("~/") ]
+
+CHECK_TIMER = 0.03
+PARSE_TIMER = 0.02
+UPDATE_TIMER = 0.03
+
+_crawling = []
+
+class Crawler(object):
+
+ active = 0
+ nextid = 0
+
+ def __init__(self, db):
+ self.db = db
+ self._checked = []
+ self._tocheck = []
+ self._toparse = []
+ self._toupdate = []
+ Crawler.nextid += 1
+ self.num = Crawler.nextid
+
+
+ def crawl(self, item):
+ if not item.filename in DIRECTORY_QUICKCHECK + DIRECTORY_BLACKLIST:
+ items = [ item ]
+ else:
+ items = self.search(item)
+
+ for child in items:
+ for c in _crawling:
+ if child.filename.startswith(c):
+ break
+ else:
+ self._toparse.append(child)
+ self._tocheck.append(child)
+ _crawling.append(child.filename)
+ if not self._toparse:
+ return
+ Crawler.active += 1
+ log.info('start crawler %s for %s' % (self.num, [ x.filename for x in
items]))
+ self.timer = Timer(self.parse)
+ self.timer.start(PARSE_TIMER / Crawler.active)
+
+
+ def stop(self):
+ if not self.timer:
+ return
+ log.info('crawler %s finished', self.num)
+ Crawler.active -= 1
+ self.timer.stop()
+ self.timer = None
+ for child in self._tocheck:
+ if child.filename in _crawling:
+ _crawling.remove(child.filename)
+ self._tocheck = self._toparse = self._toupdate = []
+ self.db.commit()
+
+
+ def search(self, object):
+ if not object._beacon_isdir or object.filename in DIRECTORY_BLACKLIST:
+ return []
+ if object._beacon_data['mtime'] and \
+ not object.filename in DIRECTORY_QUICKCHECK:
+ return [ object ]
+ ret = []
+ for child in self.db.query(parent=object):
+ if not child._beacon_id:
+ continue
+ ret += self.search(child)
+ return ret
+
+
+ def check(self):
+ if not self.timer:
+ return False
+
+ if not self._tocheck:
+ self.stop()
+ return False
+
+ item = self._tocheck.pop(0)
+ self._checked.append(item)
+ log.debug('check %s', item)
+ if item.filename in _crawling:
+ _crawling.remove(item.filename)
+ for child in self.db.query(parent=item):
+ if child._beacon_isdir:
+ for x in self._tocheck + self._checked:
+ if child.filename == x.filename:
+ self._toparse.append(child)
+ break
+ else:
+ self._toparse.append(child)
+ self._tocheck.append(child)
+ _crawling.append(child.filename)
+ continue
+ self._toparse.append(child)
+ self.timer = Timer(self.parse)
+ self.timer.start(PARSE_TIMER / Crawler.active)
+ return True
+
+
+ def parse(self):
+ if not self.timer:
+ return False
+ counter = 0
+ while True:
+ if not self._toparse:
+ if self._toupdate:
+ self.timer = Timer(self.update)
+ self.timer.start(UPDATE_TIMER / Crawler.active)
+ else:
+ self.timer = OneShotTimer(self.check)
+ self.timer.start(CHECK_TIMER / Crawler.active)
+ return False
+ item = self._toparse.pop(0)
+ counter += 1
+ if item._beacon_data['mtime'] != item._beacon_mtime():
+ self._toupdate.append(item)
+ if counter == 20 and len(self._toparse) > 10:
+ return True
+
+
+ def update(self):
+ if not self.timer:
+ return False
+ if not self._toupdate:
+ self.timer = OneShotTimer(self.check)
+ self.timer.start(CHECK_TIMER / Crawler.active)
+ return False
+ item = self._toupdate.pop(0)
+ parser.parse(self.db, item)
+ return True
Modified: trunk/beacon/src/db.py
==============================================================================
--- trunk/beacon/src/db.py (original)
+++ trunk/beacon/src/db.py Sun Mar 26 18:45:45 2006
@@ -312,7 +312,12 @@
A query to get all files in a directory. The parameter parent is a
directort object.
"""
- dirname = parent.filename[:-1]
+ if parent._beacon_islink:
+ # WARNING: parent is a link, we need to follow it
+ dirname = os.path.realpath(parent.filename)
+ parent = self._query_filename(dirname)
+ else:
+ dirname = parent.filename[:-1]
items = []
for i in self._db.query(parent = parent._beacon_id):
if i['type'] == 'dir':
Modified: trunk/beacon/src/directory.py
==============================================================================
--- trunk/beacon/src/directory.py (original)
+++ trunk/beacon/src/directory.py Sun Mar 26 18:45:45 2006
@@ -78,8 +78,9 @@
parent = None
self.filename = media.directory
+ self._beacon_islink = False
if os.path.islink(self.filename[:-1]):
- self.filename = os.path.realpath(self.filename) + '/'
+ self._beacon_islink = True
Item.__init__(self, id, 'file://' + self.filename, data, parent, media)
self._beacon_overlay = False
Modified: trunk/beacon/src/parser.py
==============================================================================
--- trunk/beacon/src/parser.py (original)
+++ trunk/beacon/src/parser.py Sun Mar 26 18:45:45 2006
@@ -47,7 +47,7 @@
log = logging.getLogger('beacon')
def parse(db, item, store=False):
- log.info('check %s', item.url)
+ log.debug('check %s', item.url)
mtime = item._beacon_mtime()
if not mtime:
log.info('oops, no mtime %s' % item)
Modified: trunk/beacon/src/server.py
==============================================================================
--- trunk/beacon/src/server.py (original)
+++ trunk/beacon/src/server.py Sun Mar 26 18:45:45 2006
@@ -47,6 +47,7 @@
import parser
from db import *
from monitor import Monitor
+from crawl import Crawler
# get logging object
log = logging.getLogger('beacon')
@@ -111,12 +112,18 @@
# add root mountpoint
self.add_mountpoint(None, '/')
self.set_mountpoint('/', 'kaa.beacon.root')
-
+
# commit and wait for the results (there are no results,
# this code is only used to force waiting until the db is
# set up.
self._db.commit()
-
+
+ # check the mountpoint for db changes
+ for m in self._db.get_mountpoints(True):
+ if m.directory == '/':
+ Crawler(self._db).crawl(self._db._get_dir('/', m))
+ break
+
def register_file_type_attrs(self, name, **kwargs):
"""
@@ -162,6 +169,23 @@
return None
+ def crawl(self, directory):
+ """
+ Start crawling a directory.
+ """
+ self._db.commit()
+ data = self._db.query(filename=directory)
+ items = []
+ for i in data._beacon_tree():
+ if i._beacon_id:
+ break
+ items.append(i)
+ while items:
+ parser.parse(self._db, items.pop(), store=True)
+ self._db.commit()
+ Crawler(self._db).crawl(data)
+
+
def add_mountpoint(self, device, directory):
"""
Add a mountpoint to the system.
-------------------------------------------------------
This SF.Net email is sponsored by xPML, a groundbreaking scripting language
that extends applications into web and mobile media. Attend the live webcast
and join the prime developer group breaking into this new coding territory!
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=110944&bid=241720&dat=121642
_______________________________________________
Freevo-cvslog mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freevo-cvslog