Author: dmeyer
Date: Sat Sep 15 13:43:38 2007
New Revision: 2809
Log:
improve channel stuff, more or less working
Added:
trunk/WIP/netsearch/src/feed/lib/__init__.py
trunk/WIP/netsearch/src/feed/manager.py
Modified:
trunk/WIP/netsearch/src/feed/channel.py
trunk/WIP/netsearch/src/feed/plugins/rss.py
trunk/WIP/netsearch/test/feed.py
Modified: trunk/WIP/netsearch/src/feed/channel.py
==============================================================================
--- trunk/WIP/netsearch/src/feed/channel.py (original)
+++ trunk/WIP/netsearch/src/feed/channel.py Sat Sep 15 13:43:38 2007
@@ -13,14 +13,17 @@
import kaa.beacon
from kaa.strutils import str_to_unicode, unicode_to_str
+# get manager module
+import manager
+
# get logging object
-log = logging.getLogger('beacon.feed')
+log = logging.getLogger('beacon.channel')
# ##################################################################
# some generic entry/channel stuff
# ##################################################################
-IMAGEDIR = os.path.expanduser("~/.beacon/feedinfo/images")
+IMAGEDIR = os.path.expanduser("~/.beacon/images")
if not os.path.isdir(IMAGEDIR):
os.makedirs(IMAGEDIR)
@@ -31,7 +34,11 @@
def __getattr__(self, attr):
if attr == 'basename' and not 'basename' in self.keys():
- self['basename'] = self['title'].replace('/', '') + '.' +
self['ext']
+ basename = os.path.basename(self['url'])
+ if self.url.endswith('/'):
+ ext = os.path.splitext(self['url'])[1]
+ basename = self['title'].replace('/', '') + ext
+ self['basename'] = unicode_to_str(basename)
return self.get(attr)
def fetch(self, filename):
@@ -41,10 +48,10 @@
return kaa.notifier.url.fetch(self.url, filename, tmpname)
- def __init__(self, url, destdir, cachefile):
+ def __init__(self, url, destdir):
self.url = url
self.dirname = destdir
- self._cache = cachefile
+ self._manager = manager
self._entries = []
self._download = True
self._num = 0
@@ -55,28 +62,40 @@
def configure(self, download=True, num=0, keep=True):
"""
- Configure feed
- num: number of items from the feed (0 == all, default)
- keep: keep old entries not in feed anymore (download only)
+ Configure channel
+ num: number of items from the channel (0 == all, default)
+ keep: keep old entries not in channel anymore (download only)
verbose: print status on stdout
"""
self._download = download
self._num = num
self._keep = keep
- self._writexml()
+ manager.save()
- def _readxml(self, nodes):
- for node in nodes:
- if node.nodeName == 'entry':
+ def _readxml(self, node):
+ """
+ Read XML node with channel configuration and cache.
+ """
+ for d in node.childNodes:
+ if not d.nodeName == 'directory':
+ continue
+ self._download = d.getAttribute('download').lower() == 'true'
+ self._num = int(d.getAttribute('num'))
+ self._keep = d.getAttribute('keep').lower() == 'true'
+ for node in d.childNodes:
+ if not node.nodeName == 'entry':
+ continue
fname = unicode_to_str(node.getAttribute('filename')) or None
self._entries.append((node.getAttribute('url'), fname))
- def _writexml(self):
- doc = minidom.getDOMImplementation().createDocument(None, "feed", None)
- top = doc.documentElement
- top.setAttribute('url', self.url)
+ def _writexml(self, node):
+ """
+ Write XML node with channel configuration and cache.
+ """
+ node.setAttribute('url', self.url)
+ doc = node.ownerDocument
d = doc.createElement('directory')
for attr in ('download', 'keep'):
if getattr(self, '_' + attr):
@@ -85,20 +104,21 @@
d.setAttribute(attr, 'false')
d.setAttribute('num', str(self._num))
d.appendChild(doc.createTextNode(self.dirname))
- top.appendChild(d)
+ node.appendChild(d)
for url, fname in self._entries:
- e = doc.createElement('entry')
+ e = node.createElement('entry')
e.setAttribute('url', url)
if fname:
e.setAttribute('filename', str_to_unicode(fname))
- top.appendChild(e)
- f = open(self._cache, 'w')
- f.write(doc.toprettyxml())
- f.close()
+ node.appendChild(e)
@kaa.notifier.yield_execution()
def _get_image(self, url):
+ """
+ Download image and store it to the image dir. Returns image
+ filename.
+ """
url = unicode_to_str(url)
fname = md5.md5(url).hexdigest() + os.path.splitext(url)[1]
fname = os.path.join(IMAGEDIR, fname)
@@ -111,10 +131,10 @@
@kaa.notifier.yield_execution()
def update(self, verbose=False):
"""
- Update feed.
+ Update channel.
"""
def print_status(s):
- sys.stdout.write("%s\r" % str(s))
+ sys.stdout.write("%s\r" % s.get_progressbar())
sys.stdout.flush()
# get directory information
@@ -132,11 +152,6 @@
yield entry
continue
- # create additional information
- info = {}
- for key in ('title', 'description', 'image'):
- if entry.get(key):
- info[key] = entry[key]
filename = None
if not self._download and entry.url in allurls:
@@ -144,10 +159,7 @@
pass
elif not self._download:
# add to beacon
- info['url'] = entry['url']
- i = kaa.beacon.add_item(
- type='video', parent=beacondir,
- mediafeed_channel=self.url, **info)
+ i = kaa.beacon.add_item(parent=beacondir, **entry)
else:
# download
filename = os.path.join(self.dirname, entry.basename)
@@ -163,15 +175,28 @@
async = entry.fetch(filename)
if verbose:
async.get_status().connect(print_status,
async.get_status())
- # FIXME: add additional information to beacon
yield async
-
+ if not os.path.isfile(filename):
+ log.error('error fetching', entry.url)
+ continue
+
+ if os.path.isfile(filename):
+ item = kaa.beacon.get(filename)
+ if not item.scanned():
+ # BEACON_FIXME
+ item._beacon_request()
+ while not item.scanned():
+ yield kaa.notifier.YieldContinue
+ for key, value in entry.items():
+ if not key in ('type', 'url', 'basename'):
+ item[key] = value
+
self._entries.append((entry['url'], filename))
num -= 1
if num == 0:
break
- self._writexml()
+ manager.save()
# delete old files or remove old entries from beacon
for url, filename in entries:
Added: trunk/WIP/netsearch/src/feed/lib/__init__.py
==============================================================================
--- (empty file)
+++ trunk/WIP/netsearch/src/feed/lib/__init__.py Sat Sep 15 13:43:38 2007
@@ -0,0 +1,10 @@
+import feedparser as _feedparser
+import urllib2
+
+import kaa.notifier
+
[EMAIL PROTECTED]()
+def feedparser(url):
+ print url
+ print _feedparser.parse
+ return _feedparser.parse(urllib2.urlopen(url))
Added: trunk/WIP/netsearch/src/feed/manager.py
==============================================================================
--- (empty file)
+++ trunk/WIP/netsearch/src/feed/manager.py Sat Sep 15 13:43:38 2007
@@ -0,0 +1,140 @@
+import os
+import logging
+from xml.dom import minidom
+
+import kaa.notifier
+from kaa.strutils import unicode_to_str
+
+# get logging object
+log = logging.getLogger('beacon.channel')
+
+CACHE = os.path.expanduser("~/.beacon/channels.xml")
+
+_initialized = False
+
+# list of all channel objects
+_channels = []
+
+# list of all Channel classes
+_generators = []
+
+def register(regexp, generator):
+ """
+ Register a Channel class.
+ """
+ _generators.append((regexp, generator))
+
+
+def _get_channel(url, destdir):
+ """
+ Get channel class from generators and create the channel object.
+ """
+ for regexp, generator in _generators:
+ if regexp.match(url):
+ return generator(url, destdir)
+ raise RuntimeError
+
+
+def add_channel(url, destdir, download=True, num=0, keep=True):
+ """
+ Add a new channel.
+ """
+ if not _initialized:
+ _init()
+ for c in _channels:
+ if c.dirname == destdir and c.url == url:
+ raise RuntimeError('channel already exists')
+ channel = _get_channel(url, destdir)
+ _channels.append(channel)
+ channel.configure(download, num, keep)
+
+
+def list_channels():
+ """
+ Return a list of all channels.
+ """
+ if not _initialized:
+ _init()
+ return _channels
+
+
+def remove_channel(channel):
+ """
+ Remove a channel.
+ """
+ _channels.remove(channel)
+ save()
+
+
+def save():
+ """
+ Save all channel information
+ """
+ if not _initialized:
+ _init()
+ doc = minidom.getDOMImplementation().createDocument(None, "channels", None)
+ top = doc.documentElement
+ for c in _channels:
+ node = doc.createElement('channel')
+ c._writexml(node)
+ top.appendChild(node)
+ f = open(CACHE, 'w')
+ f.write(doc.toprettyxml())
+ f.close()
+
+
+def _init():
+ """
+ Load cached channels from disc.
+ """
+
+ def parse_channel(c):
+ for d in c.childNodes:
+ if not d.nodeName == 'directory':
+ continue
+ dirname = unicode_to_str(d.childNodes[0].data.strip())
+ url = unicode_to_str(c.getAttribute('url'))
+ channel = _get_channel(url, dirname)
+ channel._readxml(c)
+ _channels.append(channel)
+ return
+
+ global _initialized
+ _initialized = True
+ if not os.path.isfile(CACHE):
+ return
+
+ try:
+ cache = minidom.parse(CACHE)
+ except:
+ log.exception('bad cache file: %s' % CACHE)
+ return
+ if not len(cache.childNodes) == 1 or \
+ not cache.childNodes[0].nodeName == 'channels':
+ log.error('bad cache file: %s' % CACHE)
+ return
+
+ for c in cache.childNodes[0].childNodes:
+ try:
+ parse_channel(c)
+ except:
+ log.exception('bad cache file: %s' % CACHE)
+
+
+_updating = False
+
[EMAIL PROTECTED]()
+def update(verbose=False):
+ """
+ Update all channels
+ """
+ global _updating
+ if _updating:
+ yield False
+ if not _initialized:
+ _init()
+ _updating = True
+ for channel in _channels:
+ yield channel.update(verbose=verbose)
+ _updating = False
+ yield True
Modified: trunk/WIP/netsearch/src/feed/plugins/rss.py
==============================================================================
--- trunk/WIP/netsearch/src/feed/plugins/rss.py (original)
+++ trunk/WIP/netsearch/src/feed/plugins/rss.py Sat Sep 15 13:43:38 2007
@@ -1,4 +1,5 @@
import re
+import time
import logging
import kaa.notifier
@@ -9,6 +10,7 @@
# get logging object
log = logging.getLogger('beacon.feed')
+isotime = '%a, %d %b %Y %H:%M:%S'
class RSS(Channel):
@@ -37,24 +39,53 @@
# real iterate
for f in feed.entries:
- if 'link' in f.keys():
- link = f.link
+
+ metadata = {}
+
+ if feedimage:
+ metadata['image'] = feedimage
+ if 'updated' in f.keys():
+ date = f.updated
+ if date.find('+') > 0:
+ date = date[:date.find('+')].strip()
+ if date.rfind(' ') > date.rfind(':'):
+ date = date[:date.rfind(' ')]
+ try:
+ metadata['date'] = int(time.mktime(time.strptime(date,
isotime)))
+ except ValueError:
+ log.error('bad date format: %s', date)
+
+ if 'itunes_duration' in f.keys():
+ duration = 0
+ for p in f.itunes_duration.split(':'):
+ duration = duration * 60 + int(p)
+ metadata['length'] = duration
+ if 'summary' in f.keys():
+ metadata['description']=f.summary
+ if 'title' in f.keys():
+ metadata['title'] = f.title
+
if 'enclosures' in f.keys():
# FIXME: more details than expected
if len(f.enclosures) > 1:
log.warning('more than one enclosure in %s' % self.url)
- link = f.enclosures[0].href
+ metadata['url'] = f.enclosures[0].href
+ for ptype in ('video', 'audio', 'image'):
+ if f.enclosures[0].type.startswith(ptype):
+ metadata['type'] = ptype
+ break
+ elif 'link' in f.keys():
+ # bad RSS style
+ metadata['url'] = f.link
+ else:
+ log.error('no link in entry for %s' % self.url)
+ continue
+
# FIXME: add much better logic here, including
# getting a good basename for urls ending with /
# based on type.
- if not link:
- log.error('no link in entry for %s' % self.url)
- continue
# create entry
- entry = Channel.Entry(basename=link[link.rfind('/')+1:], url=link,
- description=f.get('summary', ''),
image=feedimage)
- if 'title' in f:
- entry['title'] = f['title']
+ entry = Channel.Entry(**metadata)
yield entry
-register(re.compile('^https?://.*'), RSS)
+register(re.compile('^(http|https|file)://.*'), RSS)
Modified: trunk/WIP/netsearch/test/feed.py
==============================================================================
--- trunk/WIP/netsearch/test/feed.py (original)
+++ trunk/WIP/netsearch/test/feed.py Sat Sep 15 13:43:38 2007
@@ -9,10 +9,16 @@
print 'no channels defined'
sys.exit(0)
kaa.beacon.connect()
- kaa.netsearch.feed.update(verbose=all).connect(sys.exit)
+ kaa.netsearch.feed.update(verbose=True).connect(sys.exit)
kaa.notifier.loop()
sys.exit(0)
+
+kaa.beacon.connect()
+kaa.netsearch.feed.add_channel('http://foo', 'bar')
+
+sys.exit(0)
+
# import gtk for gui
import pygtk
pygtk.require('2.0')
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Freevo-cvslog mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freevo-cvslog