Author: dmeyer
Date: Thu Apr 20 18:19:49 2006
New Revision: 1427
Modified:
trunk/beacon/src/crawl.py
Log:
handle growing files
Modified: trunk/beacon/src/crawl.py
==============================================================================
--- trunk/beacon/src/crawl.py (original)
+++ trunk/beacon/src/crawl.py Thu Apr 20 18:19:49 2006
@@ -31,6 +31,7 @@
# python imports
import os
+import time
import logging
# kaa imports
@@ -44,6 +45,15 @@
# get logging object
log = logging.getLogger('crawler')
+try:
+ WATCH_MASK = INotify.MODIFY | INotify.CLOSE_WRITE | INotify.DELETE | \
+ INotify.CREATE | INotify.DELETE_SELF | INotify.UNMOUNT | \
+ INotify.MOVE
+except:
+ WATCH_MASK = None
+
+# timer for growing files (cp, download)
+GROWING_TIMER = 5
class Crawler(object):
"""
@@ -82,6 +92,7 @@
self.timer = None
self.restart_timer = None
self.restart_args = []
+ self.last_checked = {}
def append(self, item):
@@ -105,6 +116,10 @@
self.finished()
self.monitoring = []
self.inotify = None
+ for wait, timer in self.last_checked:
+ if timer and timer.active():
+ timer.stop()
+ self.last_checked = {}
# -------------------------------------------------------------------------
@@ -115,8 +130,12 @@
"""
Callback for inotify.
"""
- if not mask & INotify.WATCH_MASK:
- # TODO: maybe check more types of callbacks
+ if mask & INotify.MODIFY and name in self.last_checked and \
+ self.last_checked[name][1]:
+ # A file was modified. Do this check as fast as we can because the
+ # events may come in bursts when a file is just copied. In this
case
+ # a timer is already active and we can return. It still uses too
much
+ # CPU time in the burst, but there is nothing we can do about it.
return True
item = self.db.query(filename=name)
@@ -131,6 +150,23 @@
if i.filename == item.filename:
# already in the checking list, ignore it
return True
+ now = time.time()
+ if name in self.last_checked:
+ last_check, timer = self.last_checked[name]
+ if mask & INotify.CLOSE_WRITE:
+ # The file is closed. So we can remove the current running
+ # timer and check now
+ timer.stop()
+ del self.last_checked[name]
+ else:
+ # Do not check again, but restart the timer, it is expired
+ timer = OneShotTimer(self.inotify_timer_callback, name)
+ timer.start(GROWING_TIMER)
+ self.last_checked[name][1] = timer
+ return True
+ elif INotify.MODIFY:
+ # store the current time
+ self.last_checked[name] = [ now, None ]
self.check_mtime_items.append(item)
if not self.timer:
Crawler.active += 1
@@ -156,6 +192,17 @@
self.inotify.ignore(m)
log.info('remove inotify for %s', m)
self.monitoring.remove(m)
+ return True
+
+
+ def inotify_timer_callback(self, name):
+ """
+ Callback for delayed inotify MODIFY events.
+ """
+ if not name in self.last_checked:
+ return
+ del self.last_checked[name]
+ self.inotify_callback(INotify.MODIFY, name)
def finished(self):
@@ -229,7 +276,7 @@
dirname = os.path.realpath(item.filename)
log.info('add inotify for %s' % dirname)
try:
- self.inotify.watch(dirname)
+ self.inotify.watch(dirname, WATCH_MASK)
except IOError, e:
log.error(e)
-------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
Freevo-cvslog mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freevo-cvslog