Author: dmeyer
Date: Fri Apr 14 13:27:59 2006
New Revision: 1397
Modified:
trunk/beacon/src/crawl.py
trunk/beacon/src/db.py
trunk/beacon/src/parser.py
Log:
add checking for duplicates (and try to avoid it)
Modified: trunk/beacon/src/crawl.py
==============================================================================
--- trunk/beacon/src/crawl.py (original)
+++ trunk/beacon/src/crawl.py Fri Apr 14 13:27:59 2006
@@ -126,6 +126,10 @@
# be much faster if we can handle move.
if item._beacon_isdir:
self.scan_directory_items.append(item)
+ for i in self.check_mtime_items:
+ if i.filename == item.filename:
+ # already in the checking list, ignore it
+ return True
self.check_mtime_items.append(item)
if not self.timer:
Crawler.active += 1
@@ -134,10 +138,13 @@
# The file does not exist, we need to delete it in the database
# (if it is still in there)
- item = self.db.query(filename=name)
- if item._beacon_id:
+ if self.db.get_object(item._beacon_data['name'],
item._beacon_data['parent']):
# Still in the db, delete it
self.db.delete_object(item._beacon_id, beacon_immediately=True)
+ for i in self.check_mtime_items:
+ if i.filename == item.filename:
+ self.check_mtime_items.remove(i)
+ break
if name + '/' in self.monitoring:
# remove directory and all subdirs from the notifier. The directory
# is gone, so all subdirs are invalid, too.
@@ -300,6 +307,5 @@
self.scan_directory()
return False
# parse next item using parse from parser.py
- # log.info('update %s', self.update_items[0])
parse(self.db, self.update_items.pop(0))
return True
Modified: trunk/beacon/src/db.py
==============================================================================
--- trunk/beacon/src/db.py (original)
+++ trunk/beacon/src/db.py Fri Apr 14 13:27:59 2006
@@ -437,6 +437,23 @@
return self._db.query(*args, **kwargs)
+ def get_object(self, name, parent):
+ """
+ Get the object with the given type, name and parent. This function will
+ look at the pending commits and also in the database.
+ """
+ for func, type, args, kwargs in self.changes:
+ if func == self._db.add_object and \
+ 'name' in kwargs and kwargs['name'] == name and \
+ 'parent' in kwargs and kwargs['parent'] == parent:
+ self.commit()
+ break
+ result = self._db.query(name=name, parent=parent)
+ if result:
+ return result[0]
+ return None
+
+
def add_object(self, type, metadata=None, beacon_immediately=False,
*args, **kwargs):
"""
Modified: trunk/beacon/src/parser.py
==============================================================================
--- trunk/beacon/src/parser.py (original)
+++ trunk/beacon/src/parser.py Fri Apr 14 13:27:59 2006
@@ -67,6 +67,17 @@
if item._beacon_data['mtime'] == mtime:
log.debug('up-to-date %s' % item)
return
+
+ if not item._beacon_id:
+ # New file, maybe already added? Do a small check to be sure we don't
+ # add the same item to the db again.
+ data = db.get_object(item._beacon_data['name'], parent._beacon_id)
+ if data:
+ item._beacon_database_update(data)
+ if item._beacon_data['mtime'] == mtime:
+ log.info('up-to-date %s' % item)
+ return
+
log.info('scan %s' % item)
attributes = { 'mtime': mtime }
metadata = kaa.metadata.parse(item.filename)
-------------------------------------------------------
This SF.Net email is sponsored by xPML, a groundbreaking scripting language
that extends applications into web and mobile media. Attend the live webcast
and join the prime developer group breaking into this new coding territory!
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=110944&bid=241720&dat=121642
_______________________________________________
Freevo-cvslog mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freevo-cvslog