Author: dmeyer
Date: Sat Sep 15 13:43:38 2007
New Revision: 2809

Log:
improve channel stuff, more or less working

Added:
   trunk/WIP/netsearch/src/feed/lib/__init__.py
   trunk/WIP/netsearch/src/feed/manager.py
Modified:
   trunk/WIP/netsearch/src/feed/channel.py
   trunk/WIP/netsearch/src/feed/plugins/rss.py
   trunk/WIP/netsearch/test/feed.py

Modified: trunk/WIP/netsearch/src/feed/channel.py
==============================================================================
--- trunk/WIP/netsearch/src/feed/channel.py     (original)
+++ trunk/WIP/netsearch/src/feed/channel.py     Sat Sep 15 13:43:38 2007
@@ -13,14 +13,17 @@
 import kaa.beacon
 from kaa.strutils import str_to_unicode, unicode_to_str
 
+# get manager module
+import manager
+
 # get logging object
-log = logging.getLogger('beacon.feed')
+log = logging.getLogger('beacon.channel')
 
 # ##################################################################
 # some generic entry/channel stuff
 # ##################################################################
 
-IMAGEDIR = os.path.expanduser("~/.beacon/feedinfo/images")
+IMAGEDIR = os.path.expanduser("~/.beacon/images")
 
 if not os.path.isdir(IMAGEDIR):
     os.makedirs(IMAGEDIR)
@@ -31,7 +34,11 @@
 
         def __getattr__(self, attr):
             if attr == 'basename' and not 'basename' in self.keys():
-                self['basename'] = self['title'].replace('/', '') + '.' + 
self['ext']
+                basename = os.path.basename(self['url'])
+                if self.url.endswith('/'):
+                    ext = os.path.splitext(self['url'])[1]
+                    basename = self['title'].replace('/', '') + ext
+                self['basename'] = unicode_to_str(basename)
             return self.get(attr)
 
         def fetch(self, filename):
@@ -41,10 +48,10 @@
             return kaa.notifier.url.fetch(self.url, filename, tmpname)
 
 
-    def __init__(self, url, destdir, cachefile):
+    def __init__(self, url, destdir):
         self.url = url
         self.dirname = destdir
-        self._cache = cachefile
+        self._manager = manager
         self._entries = []
         self._download = True
         self._num = 0
@@ -55,28 +62,40 @@
 
     def configure(self, download=True, num=0, keep=True):
         """
-        Configure feed
-        num:      number of items from the feed (0 == all, default)
-        keep:     keep old entries not in feed anymore (download only)
+        Configure channel
+        num:      number of items from the channel (0 == all, default)
+        keep:     keep old entries not in channel anymore (download only)
         verbose:  print status on stdout
         """
         self._download = download
         self._num = num
         self._keep = keep
-        self._writexml()
+        manager.save()
 
 
-    def _readxml(self, nodes):
-        for node in nodes:
-            if node.nodeName == 'entry':
+    def _readxml(self, node):
+        """
+        Read XML node with channel configuration and cache.
+        """
+        for d in node.childNodes:
+            if not d.nodeName == 'directory':
+                continue
+            self._download = d.getAttribute('download').lower() == 'true'
+            self._num = int(d.getAttribute('num'))
+            self._keep = d.getAttribute('keep').lower() == 'true'
+            for node in d.childNodes:
+                if not node.nodeName == 'entry':
+                    continue
                 fname = unicode_to_str(node.getAttribute('filename')) or None
                 self._entries.append((node.getAttribute('url'), fname))
 
 
-    def _writexml(self):
-        doc = minidom.getDOMImplementation().createDocument(None, "feed", None)
-        top = doc.documentElement
-        top.setAttribute('url', self.url)
+    def _writexml(self, node):
+        """
+        Write XML node with channel configuration and cache.
+        """
+        node.setAttribute('url', self.url)
+        doc = node.ownerDocument
         d = doc.createElement('directory')
         for attr in ('download', 'keep'):
             if getattr(self, '_' + attr):
@@ -85,20 +104,21 @@
                 d.setAttribute(attr, 'false')
             d.setAttribute('num', str(self._num))
         d.appendChild(doc.createTextNode(self.dirname))
-        top.appendChild(d)
+        node.appendChild(d)
         for url, fname in self._entries:
-            e = doc.createElement('entry')
+            e = node.createElement('entry')
             e.setAttribute('url', url)
             if fname:
                 e.setAttribute('filename', str_to_unicode(fname))
-            top.appendChild(e)
-        f = open(self._cache, 'w')
-        f.write(doc.toprettyxml())
-        f.close()
+            node.appendChild(e)
 
 
     @kaa.notifier.yield_execution()
     def _get_image(self, url):
+        """
+        Download image and store it to the image dir. Returns image
+        filename.
+        """
         url = unicode_to_str(url)
         fname = md5.md5(url).hexdigest() + os.path.splitext(url)[1]
         fname = os.path.join(IMAGEDIR, fname)
@@ -111,10 +131,10 @@
     @kaa.notifier.yield_execution()
     def update(self, verbose=False):
         """
-        Update feed.
+        Update channel.
         """
         def print_status(s):
-            sys.stdout.write("%s\r" % str(s))
+            sys.stdout.write("%s\r" % s.get_progressbar())
             sys.stdout.flush()
 
         # get directory information
@@ -132,11 +152,6 @@
                 yield entry
                 continue
 
-            # create additional information
-            info = {}
-            for key in ('title', 'description', 'image'):
-                if entry.get(key):
-                    info[key] = entry[key]
             filename = None
 
             if not self._download and entry.url in allurls:
@@ -144,10 +159,7 @@
                 pass
             elif not self._download:
                 # add to beacon
-                info['url'] = entry['url']
-                i = kaa.beacon.add_item(
-                    type='video', parent=beacondir,
-                    mediafeed_channel=self.url, **info)
+                i = kaa.beacon.add_item(parent=beacondir, **entry)
             else:
                 # download
                 filename = os.path.join(self.dirname, entry.basename)
@@ -163,15 +175,28 @@
                     async = entry.fetch(filename)
                     if verbose:
                         async.get_status().connect(print_status, 
async.get_status())
-                    # FIXME: add additional information to beacon
                     yield async
-
+                    if not os.path.isfile(filename):
+                        log.error('error fetching', entry.url)
+                        continue
+                    
+                if os.path.isfile(filename):
+                    item = kaa.beacon.get(filename)
+                    if not item.scanned():
+                        # BEACON_FIXME
+                        item._beacon_request()
+                        while not item.scanned():
+                            yield kaa.notifier.YieldContinue
+                    for key, value in entry.items():
+                        if not key in ('type', 'url', 'basename'):
+                            item[key] = value
+                        
             self._entries.append((entry['url'], filename))
             num -= 1
             if num == 0:
                 break
 
-        self._writexml()
+        manager.save()
 
         # delete old files or remove old entries from beacon
         for url, filename in entries:

Added: trunk/WIP/netsearch/src/feed/lib/__init__.py
==============================================================================
--- (empty file)
+++ trunk/WIP/netsearch/src/feed/lib/__init__.py        Sat Sep 15 13:43:38 2007
@@ -0,0 +1,10 @@
+import feedparser as _feedparser
+import urllib2
+
+import kaa.notifier
+
[EMAIL PROTECTED]()
+def feedparser(url):
+    print url
+    print _feedparser.parse
+    return _feedparser.parse(urllib2.urlopen(url))

Added: trunk/WIP/netsearch/src/feed/manager.py
==============================================================================
--- (empty file)
+++ trunk/WIP/netsearch/src/feed/manager.py     Sat Sep 15 13:43:38 2007
@@ -0,0 +1,140 @@
+import os
+import logging
+from xml.dom import minidom
+
+import kaa.notifier
+from kaa.strutils import unicode_to_str
+
+# get logging object
+log = logging.getLogger('beacon.channel')
+
+CACHE = os.path.expanduser("~/.beacon/channels.xml")
+
+_initialized = False
+
+# list of all channel objects
+_channels = []
+
+# list of all Channel classes
+_generators = []
+
+def register(regexp, generator):
+    """
+    Register a Channel class.
+    """
+    _generators.append((regexp, generator))
+
+
+def _get_channel(url, destdir):
+    """
+    Get channel class from generators and create the channel object.
+    """
+    for regexp, generator in _generators:
+        if regexp.match(url):
+            return generator(url, destdir)
+    raise RuntimeError
+
+
+def add_channel(url, destdir, download=True, num=0, keep=True):
+    """
+    Add a new channel.
+    """
+    if not _initialized:
+        _init()
+    for c in _channels:
+        if c.dirname == destdir and c.url == url:
+            raise RuntimeError('channel already exists')
+    channel = _get_channel(url, destdir)
+    _channels.append(channel)
+    channel.configure(download, num, keep)
+
+
+def list_channels():
+    """
+    Return a list of all channels.
+    """
+    if not _initialized:
+        _init()
+    return _channels
+
+
+def remove_channel(channel):
+    """
+    Remove a channel.
+    """
+    _channels.remove(channel)
+    save()
+    
+
+def save():
+    """
+    Save all channel information
+    """
+    if not _initialized:
+        _init()
+    doc = minidom.getDOMImplementation().createDocument(None, "channels", None)
+    top = doc.documentElement
+    for c in _channels:
+        node = doc.createElement('channel')
+        c._writexml(node)
+        top.appendChild(node)
+    f = open(CACHE, 'w')
+    f.write(doc.toprettyxml())
+    f.close()
+
+    
+def _init():
+    """
+    Load cached channels from disc.
+    """
+
+    def parse_channel(c):
+        for d in c.childNodes:
+            if not d.nodeName == 'directory':
+                continue
+            dirname = unicode_to_str(d.childNodes[0].data.strip())
+            url = unicode_to_str(c.getAttribute('url'))
+            channel = _get_channel(url, dirname)
+            channel._readxml(c)
+            _channels.append(channel)
+            return
+        
+    global _initialized
+    _initialized = True
+    if not os.path.isfile(CACHE):
+        return
+
+    try:
+        cache = minidom.parse(CACHE)
+    except:
+        log.exception('bad cache file: %s' % CACHE)
+        return
+    if not len(cache.childNodes) == 1 or \
+           not cache.childNodes[0].nodeName == 'channels':
+        log.error('bad cache file: %s' % CACHE)
+        return
+
+    for c in cache.childNodes[0].childNodes:
+        try:
+            parse_channel(c)
+        except:
+            log.exception('bad cache file: %s' % CACHE)
+
+
+_updating = False
+
[EMAIL PROTECTED]()
+def update(verbose=False):
+    """
+    Update all channels
+    """
+    global _updating
+    if _updating:
+        yield False
+    if not _initialized:
+        _init()
+    _updating = True
+    for channel in _channels:
+        yield channel.update(verbose=verbose)
+    _updating = False
+    yield True

Modified: trunk/WIP/netsearch/src/feed/plugins/rss.py
==============================================================================
--- trunk/WIP/netsearch/src/feed/plugins/rss.py (original)
+++ trunk/WIP/netsearch/src/feed/plugins/rss.py Sat Sep 15 13:43:38 2007
@@ -1,4 +1,5 @@
 import re
+import time
 import logging
 
 import kaa.notifier
@@ -9,6 +10,7 @@
 
 # get logging object
 log = logging.getLogger('beacon.feed')
+isotime = '%a, %d %b %Y %H:%M:%S'
 
 class RSS(Channel):
 
@@ -37,24 +39,53 @@
 
         # real iterate
         for f in feed.entries:
-            if 'link' in f.keys():
-                link = f.link
+
+            metadata = {}
+
+            if feedimage:
+                metadata['image'] = feedimage
+            if 'updated' in f.keys():
+                date = f.updated
+                if date.find('+') > 0:
+                    date = date[:date.find('+')].strip()
+                if date.rfind(' ') > date.rfind(':'):
+                    date = date[:date.rfind(' ')]
+                try:
+                    metadata['date'] = int(time.mktime(time.strptime(date, 
isotime)))
+                except ValueError:
+                    log.error('bad date format: %s', date)
+                    
+            if 'itunes_duration' in f.keys():
+                duration = 0
+                for p in f.itunes_duration.split(':'):
+                    duration = duration * 60 + int(p)
+                metadata['length'] = duration
+            if 'summary' in f.keys():
+                metadata['description']=f.summary
+            if 'title' in f.keys():
+                metadata['title'] = f.title
+                
             if 'enclosures' in f.keys():
                 # FIXME: more details than expected
                 if len(f.enclosures) > 1:
                     log.warning('more than one enclosure in %s' % self.url)
-                link = f.enclosures[0].href
+                metadata['url'] = f.enclosures[0].href
+                for ptype in ('video', 'audio', 'image'):
+                    if f.enclosures[0].type.startswith(ptype):
+                        metadata['type'] = ptype
+                        break
+            elif 'link' in f.keys():
+                # bad RSS style
+                metadata['url'] = f.link
+            else:
+                log.error('no link in entry for %s' % self.url)
+                continue
+
             # FIXME: add much better logic here, including
             # getting a good basename for urls ending with /
             # based on type.
-            if not link:
-                log.error('no link in entry for %s' % self.url)
-                continue
             # create entry
-            entry = Channel.Entry(basename=link[link.rfind('/')+1:], url=link,
-                                  description=f.get('summary', ''), 
image=feedimage)
-            if 'title' in f:
-                entry['title'] = f['title']
+            entry = Channel.Entry(**metadata)
             yield entry
 
-register(re.compile('^https?://.*'), RSS)
+register(re.compile('^(http|https|file)://.*'), RSS)

Modified: trunk/WIP/netsearch/test/feed.py
==============================================================================
--- trunk/WIP/netsearch/test/feed.py    (original)
+++ trunk/WIP/netsearch/test/feed.py    Sat Sep 15 13:43:38 2007
@@ -9,10 +9,16 @@
             print 'no channels defined'
             sys.exit(0)
         kaa.beacon.connect()
-        kaa.netsearch.feed.update(verbose=all).connect(sys.exit)
+        kaa.netsearch.feed.update(verbose=True).connect(sys.exit)
         kaa.notifier.loop()
         sys.exit(0)
 
+
+kaa.beacon.connect()
+kaa.netsearch.feed.add_channel('http://foo', 'bar')
+
+sys.exit(0)
+
 # import gtk for gui
 import pygtk
 pygtk.require('2.0')

-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Freevo-cvslog mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freevo-cvslog

Reply via email to