Author: dmeyer
Date: Fri Jul 13 16:01:22 2007
New Revision: 2762

Log:
Some random ideas about media feeds and how to integrate
such feeds into kaa or freevo. Open for discussion.



Added:
   trunk/WIP/mediafeed.py

Added: trunk/WIP/mediafeed.py
==============================================================================
--- (empty file)
+++ trunk/WIP/mediafeed.py      Fri Jul 13 16:01:22 2007
@@ -0,0 +1,284 @@
+import sys
+import os
+import re
+import urllib
+import urllib2
+
+# external deps
+from BeautifulSoup import BeautifulSoup
+import feedparser
+
+import kaa.notifier
+
+# ##################################################################
+# Brain Dump
+#
+# - Improve RSS channel for better video and audio feed support
+#   https://channelguide.participatoryculture.org/front
+# - Flickr image channel
+# - Torrent downloader (needed for some democracy channels)
+# - Add more item metadata (e.g. download thumbnail/image)
+# - Channel configuration:
+#   o always download / download on demand / play from stream
+#   o how much entries should be show
+#   o keep entries on hd (while in feed / while not watched / up to x)
+# - Add parallel download function
+# - Add channel as 'file' to kaa.beacon making it possible to merge
+#   feed entries and real files.
+#   o does it belong into beacon?
+#   o is it an extra kaa module with beacon plugin?
+#   o daemon to keep feeds in beacon up-to-date
+#
+# ##################################################################
+
+
+# ##################################################################
+# generic status object for InProgress
+# ##################################################################
+
+class Status(kaa.notifier.Signal):
+    def __init__(self):
+        super(Status,self).__init__()
+        self.percent = 0
+        self.pos = 0
+        self.max = 0
+
+    def set(self, pos, max=None):
+        if max is not None:
+            self.max = max
+        self.pos = pos
+        if pos > self.max:
+            self.max = pos
+        if self.max:
+            self.percent = (self.pos * 100) / self.max
+        else:
+            self.percent = 0
+        self.emit()
+
+    def update(self, diff):
+        self.set(self.pos + diff)
+
+
+    def __str__(self):
+        n = 0
+        if self.max:
+            n = int((self.pos / float(self.max)) * 50)
+        return "|%51s| %d / %d" % (("="*n + ">").ljust(51), self.pos, self.max)
+
+
+# ##################################################################
+# function to download to a file with status information
+# ##################################################################
+
+def fetch_HTTP(url, filename):
+    def download(url, filename, status):
+        src = urllib2.urlopen(url)
+        dst = open(filename, 'w')
+        status.set(0, int(src.info().get('Content-Length', 0)))
+        while True:
+            data = src.read(1024)
+            if len(data) == 0:
+                src.close()
+                dst.close()
+                return True
+            status.update(len(data))
+            dst.write(data)
+
+    s = Status()
+    t = kaa.notifier.Thread(download, url, filename, s)
+    t.wait_on_exit(False)
+    async = t.start()
+    async.set_status(s)
+    return async
+
+
+# ##################################################################
+# some generic entry/channel stuff
+# ##################################################################
+
+class Entry(dict):
+
+    def __getattr__(self, attr):
+        if attr == 'basename' and not 'basename' in self.keys():
+            self['basename'] = self['title'].replace('/', '') + '.' + 
self['ext']
+        return self.get(attr)
+
+    def fetch(self, filename):
+        print '%s -> %s' % (self.url, filename)
+        return fetch_HTTP(self.url, filename)
+
+
+class Channel(object):
+
+    def __init__(self, url):
+        self.url = url
+
+    # Some internal helper functions
+
+    def _thread(self, *args, **kwargs):
+        t = kaa.notifier.Thread(*args, **kwargs)
+        t.wait_on_exit(False)
+        self._async = t.start()
+        return self._async
+
+    def _feedparser(self, url):
+        return self._thread(feedparser.parse, url)
+
+    def _beautifulsoup(self, url):
+        def __beautifulsoup(url):
+            return BeautifulSoup(urllib2.urlopen(url))
+        return self._thread(__beautifulsoup, url)
+
+    def _readurl(self, url):
+        def __readurl(url):
+            return urllib2.urlopen(url).read()
+        return self._thread(__readurl, url)
+
+    def _get_result(self):
+        return self._async.get_result()
+
+
+    # update (download) feed
+
+    @kaa.notifier.yield_execution()
+    def update(self, destdir, num=0):
+        def print_status(s):
+            sys.stdout.write("%s\r" % str(s))
+            sys.stdout.flush()
+
+        for entry in self:
+            if isinstance(entry, kaa.notifier.InProgress):
+                # dummy entry to signal waiting
+                yield entry
+                continue
+            num -= 1
+            filename = os.path.join(destdir, entry.basename)
+            if os.path.isfile(filename):
+                print 'skip', filename
+            else:
+                async = entry.fetch(filename)
+                async.get_status().connect(print_status, async.get_status())
+                yield async
+            if num == 0:
+                return
+
+
+# ##################################################################
+# specific channels
+# ##################################################################
+
+class RSS(Channel):
+
+    def __iter__(self):
+        # get feed in a thread
+        yield self._feedparser(self.url)
+
+        if not self._get_result().entries:
+            print 'oops'
+            raise StopIteration
+
+        # real iterate
+        for f in self._get_result().entries:
+            if 'link' in f.keys():
+                link = f.link
+            if 'enclosures' in f.keys():
+                # FIXME: more details than expected
+                if len(f.enclosures) > 1:
+                    print 'WARNING:', f.enclosures
+                link = f.enclosures[0].href
+            # FIXME: add much better logic here, including
+            # getting a good basename for urls ending with /
+            # based on type.
+            if not link:
+                print 'WARNING', f
+            entry = Entry(basename=link[link.rfind('/')+1:], url=link)
+            # FIXME: description, etc missing
+            if 'title' in f:
+                entry['title'] = f['title']
+            yield entry
+
+
+class Stage6(Channel):
+
+    match_video = re.compile('.*/video/([0-9]+)/').match
+
+    def __iter__(self):
+        baseurl = 'http://stage6.divx.com/%s/videos/order:date' % self.url
+        counter = 0
+        while True:
+            counter += 1
+            url = baseurl
+            if counter > 1:
+                url = baseurl + '?page=%s' % counter
+
+            # get page in a thread
+            yield self._beautifulsoup(url)
+            hits = self._get_result().findAll(
+                'a', href=lambda(v): Stage6.match_video(unicode(v)))
+            if not len(hits):
+                raise StopIteration
+
+            # iterate over the hits on the page
+            for url in hits:
+                title = url.get('title')
+                if not title:
+                    continue
+                # We could grab the side of the video to get the tags of this
+                # clip but maybe it costs too much time.
+                vid = Stage6.match_video(url.get('href')).groups()[0]
+                vurl = url='http://video.stage6.com/%s/.divx' % vid
+                yield Entry(id=vid, title=title, ext='divx', url=vurl)
+
+
+class YouTube(Channel):
+
+    def __init__(self, tags):
+        url = 'http://www.youtube.com/rss/tag/%s.rss' % urllib.quote(tags)
+        super(YouTube, self).__init__(url)
+
+    def __iter__(self):
+        # get feed in a thread
+        yield self._feedparser(self.url)
+
+        # real iterate
+        for f in self._get_result().entries:
+            yield self._readurl(f.id)
+            m = re.search('"/player2.swf[^"]*youtube.com/&([^"]*)', 
self._get_result())
+            url = 'http://youtube.com/get_video?' + m.groups()[0]
+            yield Entry(url=url, title=f.title, ext='flv')
+
+
+# ##################################################################
+# test code
+# ##################################################################
+
+class Filter(Channel):
+
+    def __init__(self, channel, filter):
+        Channel.__init__(self, None)
+        self._channel = channel
+        self._filter = filter
+
+    def __iter__(self):
+        for f in self._channel:
+            if isinstance(f, kaa.notifier.InProgress):
+                # dummy entry to signal waiting
+                yield f
+                continue
+            if self._filter(f):
+                yield f
+
+# c = Stage6('Diva-Channel')
+# f = Filter(c, lambda(e): 'Lisa Loeb' in e.title)
+# f.update('/local/video')
+
+# f = Stage6('Diva-Channel')
+# f.update('/local/video/MusicVideo', 2)
+
+f = RSS('http://www.tagesschau.de/export/video-podcast')
+f.update('/local/video', 2).connect(sys.exit)
+
+# f = YouTube(tags='robot chicken')
+# f.update('/local/video', 2)
+
+kaa.notifier.loop()

-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Freevo-cvslog mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freevo-cvslog

Reply via email to