Author: dmeyer
Date: Sat Aug 18 15:53:42 2007
New Revision: 2780

Log:
refactor code, fix small stuff

Added:
   trunk/WIP/netsearch/src/feed/__init__.py
   trunk/WIP/netsearch/src/feed/download.py
   trunk/WIP/netsearch/src/feed/plugins/
   trunk/WIP/netsearch/src/feed/plugins/__init__.py
   trunk/WIP/netsearch/src/feed/plugins/rss.py
   trunk/WIP/netsearch/src/feed/plugins/stage6.py
   trunk/WIP/netsearch/src/feed/plugins/youtube.py
   trunk/WIP/netsearch/test/feed.py
Modified:
   trunk/WIP/netsearch/src/feed/channel.py

Added: trunk/WIP/netsearch/src/feed/__init__.py
==============================================================================
--- (empty file)
+++ trunk/WIP/netsearch/src/feed/__init__.py    Sat Aug 18 15:53:42 2007
@@ -0,0 +1,27 @@
+# ##################################################################
+# Brain Dump
+#
+# - Improve RSS channel for better video and audio feed support
+#   https://channelguide.participatoryculture.org/front
+# - Flickr image channel
+# - Torrent downloader (needed for some democracy channels)
+# - Add more item metadata (e.g. download thumbnail/image)
+# - Channel configuration:
+#   o always download / download on demand / play from stream
+#   o how much entries should be show
+#   o keep entries on hd (while in feed / while not watched / up to x)
+# - Add parallel download function
+# - Add channel as 'file' to kaa.beacon making it possible to merge
+#   feed entries and real files.
+#   o does it belong into beacon?
+#   o is it an extra kaa module with beacon plugin?
+#   o daemon to keep feeds in beacon up-to-date
+#
+# ##################################################################
+
+
+import channel
+import plugins
+
+add_password = channel.pm.add_password
+Channel = channel.get_channel

Modified: trunk/WIP/netsearch/src/feed/channel.py
==============================================================================
--- trunk/WIP/netsearch/src/feed/channel.py     (original)
+++ trunk/WIP/netsearch/src/feed/channel.py     Sat Aug 18 15:53:42 2007
@@ -1,6 +1,7 @@
 import sys
 import os
 import re
+import md5
 import urllib
 import urllib2
 
@@ -12,97 +13,26 @@
 import kaa.beacon
 import kaa.strutils
 
+from download import fetch
+
 for t in ('video', 'audio', 'image'):
     kaa.beacon.register_file_type_attrs(
         t, mediafeed_channel = (int, kaa.beacon.ATTR_SIMPLE))
 
-# ##################################################################
-# Brain Dump
-#
-# - Improve RSS channel for better video and audio feed support
-#   https://channelguide.participatoryculture.org/front
-# - Flickr image channel
-# - Torrent downloader (needed for some democracy channels)
-# - Add more item metadata (e.g. download thumbnail/image)
-# - Channel configuration:
-#   o always download / download on demand / play from stream
-#   o how much entries should be show
-#   o keep entries on hd (while in feed / while not watched / up to x)
-# - Add parallel download function
-# - Add channel as 'file' to kaa.beacon making it possible to merge
-#   feed entries and real files.
-#   o does it belong into beacon?
-#   o is it an extra kaa module with beacon plugin?
-#   o daemon to keep feeds in beacon up-to-date
-#
-# ##################################################################
-
-
-# ##################################################################
-# generic status object for InProgress
-# ##################################################################
-
-class Status(kaa.notifier.Signal):
-    def __init__(self):
-        super(Status,self).__init__()
-        self.percent = 0
-        self.pos = 0
-        self.max = 0
-
-    def set(self, pos, max=None):
-        if max is not None:
-            self.max = max
-        self.pos = pos
-        if pos > self.max:
-            self.max = pos
-        if self.max:
-            self.percent = (self.pos * 100) / self.max
-        else:
-            self.percent = 0
-        self.emit()
-
-    def update(self, diff):
-        self.set(self.pos + diff)
-
-
-    def __str__(self):
-        n = 0
-        if self.max:
-            n = int((self.pos / float(self.max)) * 50)
-        return "|%51s| %d / %d" % (("="*n + ">").ljust(51), self.pos, self.max)
-
+pm = urllib2.HTTPPasswordMgrWithDefaultRealm()
+auth_handler = urllib2.HTTPBasicAuthHandler(pm)
+opener = urllib2.build_opener(auth_handler)
+urllib2.install_opener(opener)
 
 # ##################################################################
-# function to download to a file with status information
+# some generic entry/channel stuff
 # ##################################################################
 
-def fetch_HTTP(url, filename):
-    def download(url, filename, status):
-        src = urllib2.urlopen(url)
-        dst = open(filename, 'w')
-        status.set(0, int(src.info().get('Content-Length', 0)))
-        while True:
-            data = src.read(1024)
-            if len(data) == 0:
-                src.close()
-                dst.close()
-                return True
-            status.update(len(data))
-            dst.write(data)
-
-    s = Status()
-    t = kaa.notifier.Thread(download, url, filename, s)
-    t.wait_on_exit(False)
-    async = t.start()
-    async.set_status(s)
-    return async
-
+IMAGEDIR = os.path.expanduser("~/.beacon/feedinfo/images")
 
-# ##################################################################
-# some generic entry/channel stuff
-# ##################################################################
+if not os.path.isdir(IMAGEDIR):
+    os.makedirs(IMAGEDIR)
 
-IMAGEDIR = '/tmp'
 
 class Entry(dict):
 
@@ -113,7 +43,7 @@
 
     def fetch(self, filename):
         print '%s -> %s' % (self.url, filename)
-        return fetch_HTTP(self.url, filename)
+        return fetch(self.url, filename)
 
 
 class Channel(object):
@@ -130,7 +60,7 @@
         return self._async
 
     def _feedparser(self, url):
-        return self._thread(feedparser.parse, url)
+        return self._thread(feedparser.parse, urllib2.urlopen(url))
 
     def _beautifulsoup(self, url):
         def __beautifulsoup(url):
@@ -148,7 +78,8 @@
     @kaa.notifier.yield_execution()
     def _get_image(self, url):
         url = kaa.strutils.unicode_to_str(url)
-        fname = os.path.join(IMAGEDIR, url.replace('/', '.'))
+        fname = md5.md5(url).hexdigest() + os.path.splitext(url)[1]
+        fname = os.path.join(IMAGEDIR, fname)
         if os.path.isfile(fname):
             yield fname
         img = open(fname, 'w')
@@ -171,13 +102,10 @@
                 continue
             num -= 1
             filename = os.path.join(destdir, entry.basename)
-            if os.path.isfile(filename):
-                print 'skip', filename
-            else:
-                # FIXME: download to tmp dir first
-                async = entry.fetch(filename)
-                async.get_status().connect(print_status, async.get_status())
-                yield async
+            # FIXME: download to tmp dir first
+            async = entry.fetch(filename)
+            async.get_status().connect(print_status, async.get_status())
+            yield async
             # FIXME: add additional information to beacon
             if num == 0:
                 return
@@ -202,7 +130,7 @@
             else:
                 data = {}
                 for key in ('url', 'title', 'description', 'image'):
-                    if entry.get('key'):
+                    if entry.get(key):
                         data[key] = entry[key]
                 i = kaa.beacon.add_item(type='video', parent=d,
                                         mediafeed_channel=self.url, **data)
@@ -213,142 +141,15 @@
         for i in items.values():
             i.delete()
 
+_generators = []
 
-# ##################################################################
-# specific channels
-# ##################################################################
-
-class RSS(Channel):
-
-    def __iter__(self):
-        # get feed in a thread
-        yield self._feedparser(self.url)
-
-        if not self._get_result().entries:
-            print 'oops'
-            raise StopIteration
-
-        # basic information
-        feedimage = None
-        if self._get_result().feed.get('image'):
-            feedimage = self._get_result().feed.get('image').get('href')
-
-        if feedimage:
-            feedimage = self._get_image(feedimage)
-            if isinstance(feedimage, kaa.notifier.InProgress):
-                yield feedimage
-                feedimage = feedimage.get_result()
-
-        # real iterate
-        for f in self._get_result().entries:
-            if 'link' in f.keys():
-                link = f.link
-            if 'enclosures' in f.keys():
-                # FIXME: more details than expected
-                if len(f.enclosures) > 1:
-                    print 'WARNING:', f.enclosures
-                link = f.enclosures[0].href
-            # FIXME: add much better logic here, including
-            # getting a good basename for urls ending with /
-            # based on type.
-            if not link:
-                print 'WARNING', f
-
-            # FIXME: beacon does not thumbnail the image without
-            # a rescan of the directory!
-            entry = Entry(basename=link[link.rfind('/')+1:], url=link,
-                          description=f.get('summary', ''), image=feedimage)
-            if 'title' in f:
-                entry['title'] = f['title']
-            yield entry
-
-
-class Stage6(Channel):
-
-    match_video = re.compile('.*/video/([0-9]+)/').match
-
-    def __iter__(self):
-        baseurl = 'http://stage6.divx.com/%s/videos/order:date' % self.url
-        counter = 0
-        while True:
-            counter += 1
-            url = baseurl
-            if counter > 1:
-                url = baseurl + '?page=%s' % counter
-
-            # get page in a thread
-            yield self._beautifulsoup(url)
-            hits = self._get_result().findAll(
-                'a', href=lambda(v): Stage6.match_video(unicode(v)))
-            if not len(hits):
-                raise StopIteration
-
-            # iterate over the hits on the page
-            for url in hits:
-                title = url.get('title')
-                if not title:
-                    continue
-                # FIXME: grab the side of the video to get the tags of this
-                # clip and an image
-                vid = Stage6.match_video(url.get('href')).groups()[0]
-                vurl = url='http://video.stage6.com/%s/.divx' % vid
-                yield Entry(id=vid, title=title, ext='divx', url=vurl)
-
-
-class YouTube(Channel):
-
-    def __init__(self, tags):
-        url = 'http://www.youtube.com/rss/tag/%s.rss' % urllib.quote(tags)
-        super(YouTube, self).__init__(url)
-
-    def __iter__(self):
-        # get feed in a thread
-        yield self._feedparser(self.url)
-
-        # real iterate
-        for f in self._get_result().entries:
-            yield self._readurl(f.id)
-            m = re.search('"/player2.swf[^"]*youtube.com/&([^"]*)', 
self._get_result())
-            url = 'http://youtube.com/get_video?' + m.groups()[0]
-            yield Entry(url=url, title=f.title, ext='flv')
-
-
-# ##################################################################
-# test code
-# ##################################################################
-
-class Filter(Channel):
-
-    def __init__(self, channel, filter):
-        Channel.__init__(self, None)
-        self._channel = channel
-        self._filter = filter
-
-    def __iter__(self):
-        for f in self._channel:
-            if isinstance(f, kaa.notifier.InProgress):
-                # dummy entry to signal waiting
-                yield f
-                continue
-            if self._filter(f):
-                yield f
+def register(regexp, generator):
+    _generators.append((regexp, generator))
 
[EMAIL PROTECTED]()
-def update_feeds(*feeds):
-    for feed, destdir, num, download in feeds:
-        if download:
-            yield feed.update(destdir, num)
-        else:
-            yield feed.store_in_beacon(destdir, num)
-            
-kaa.beacon.connect()
-d = '/local/video/feedtest'
-update_feeds((RSS('http://podcast.wdr.de/blaubaer.xml'), d, 5, False),
-             (RSS('http://podcast.nationalgeographic.com/wild-chronicles/'),
-              d, 5, False),
-             (RSS('http://www.tagesschau.de/export/video-podcast'), d, 1, 
False),
-             (YouTube(tags='robot chicken'), d, 2, True),
-             (Stage6('Diva-Channel'), d, 5, False)).\
-             connect(sys.exit)
+def get_channel(url):
+    for regexp, generator in _generators:
+        if regexp.match(url):
+            return generator(url)
+    raise RuntimeError
 
-kaa.notifier.loop()
+    

Added: trunk/WIP/netsearch/src/feed/download.py
==============================================================================
--- (empty file)
+++ trunk/WIP/netsearch/src/feed/download.py    Sat Aug 18 15:53:42 2007
@@ -0,0 +1,82 @@
+import os
+import stat
+import urllib
+import urllib2
+import kaa.notifier
+
+class Status(kaa.notifier.Signal):
+    """
+    Generic status object for InProgress
+    """
+    def __init__(self):
+        super(Status,self).__init__()
+        self.percent = 0
+        self.pos = 0
+        self.max = 0
+
+    def set(self, pos, max=None):
+        if max is not None:
+            self.max = max
+        self.pos = pos
+        if pos > self.max:
+            self.max = pos
+        if self.max:
+            self.percent = (self.pos * 100) / self.max
+        else:
+            self.percent = 0
+        self.emit()
+
+    def update(self, diff):
+        self.set(self.pos + diff)
+
+
+    def __str__(self):
+        n = 0
+        if self.max:
+            n = int((self.pos / float(self.max)) * 50)
+        return "|%51s| %d / %d" % (("="*n + ">").ljust(51), self.pos, self.max)
+
+
+def fetch_HTTP(url, filename):
+    """
+    Fetch HTTP URL.
+    """
+    def download(url, filename, status):
+        src = urllib2.urlopen(url)
+        length = int(src.info().get('Content-Length', 0))
+        print length
+        if os.path.isfile(filename) and os.stat(filename)[stat.ST_SIZE] == 
length:
+            return True
+        tmpname = os.path.join(os.path.dirname(filename),
+                               '.' + os.path.basename(filename))
+        dst = open(tmpname, 'w')
+        status.set(0, length)
+        while True:
+            data = src.read(1024)
+            if len(data) == 0:
+                src.close()
+                dst.close()
+                os.rename(tmpname, filename)
+                return True
+            status.update(len(data))
+            dst.write(data)
+
+    if url.find(' ') > 0:
+        # stupid url encoding in url
+        url = url[:8+url[8:].find('/')] + \
+              urllib.quote(url[8+url[8:].find('/'):])
+    s = Status()
+    t = kaa.notifier.Thread(download, url, filename, s)
+    t.wait_on_exit(False)
+    async = t.start()
+    async.set_status(s)
+    return async
+
+
+def fetch(url, filename):
+    """
+    Generic fetch function.
+    """
+    if url.startswith('http://') or url.startswith('https://'):
+        return fetch_HTTP(url, filename)
+    raise RuntimeError('unable to fetch %s' % url)

Added: trunk/WIP/netsearch/src/feed/plugins/__init__.py
==============================================================================
--- (empty file)
+++ trunk/WIP/netsearch/src/feed/plugins/__init__.py    Sat Aug 18 15:53:42 2007
@@ -0,0 +1,6 @@
+import os
+
+for plugin in os.listdir(os.path.dirname(__file__)):
+    if plugin.endswith('.py') and not plugin == '__init__.py':
+        exec('import %s' % os.path.splitext(plugin)[0])
+    

Added: trunk/WIP/netsearch/src/feed/plugins/rss.py
==============================================================================
--- (empty file)
+++ trunk/WIP/netsearch/src/feed/plugins/rss.py Sat Aug 18 15:53:42 2007
@@ -0,0 +1,48 @@
+import re
+import kaa.notifier
+from kaa.netsearch.feed.channel import Channel, Entry, register
+
+
+class RSS(Channel):
+
+    def __iter__(self):
+        # get feed in a thread
+        yield self._feedparser(self.url)
+        if not self._get_result().entries:
+            print 'oops'
+            raise StopIteration
+
+        # basic information
+        feedimage = None
+        if self._get_result().feed.get('image'):
+            feedimage = self._get_result().feed.get('image').get('href')
+
+        if feedimage:
+            feedimage = self._get_image(feedimage)
+            if isinstance(feedimage, kaa.notifier.InProgress):
+                yield feedimage
+                feedimage = feedimage.get_result()
+
+        # real iterate
+        for f in self._get_result().entries:
+            if 'link' in f.keys():
+                link = f.link
+            if 'enclosures' in f.keys():
+                # FIXME: more details than expected
+                if len(f.enclosures) > 1:
+                    print 'WARNING:', f.enclosures
+                link = f.enclosures[0].href
+            # FIXME: add much better logic here, including
+            # getting a good basename for urls ending with /
+            # based on type.
+            if not link:
+                print 'WARNING', f
+            # FIXME: beacon does not thumbnail the image without
+            # a rescan of the directory!
+            entry = Entry(basename=link[link.rfind('/')+1:], url=link,
+                          description=f.get('summary', ''), image=feedimage)
+            if 'title' in f:
+                entry['title'] = f['title']
+            yield entry
+
+register(re.compile('^https?://.*'), RSS)

Added: trunk/WIP/netsearch/src/feed/plugins/stage6.py
==============================================================================
--- (empty file)
+++ trunk/WIP/netsearch/src/feed/plugins/stage6.py      Sat Aug 18 15:53:42 2007
@@ -0,0 +1,33 @@
+import re
+from kaa.netsearch.feed.channel import Channel, Entry, register
+
+class Stage6(Channel):
+
+    match_video = re.compile('.*/video/([0-9]+)/').match
+
+    def __iter__(self):
+        baseurl = 'http://stage6.divx.com/%s/videos/order:date' % self.url
+        counter = 0
+        while True:
+            counter += 1
+            url = baseurl
+            if counter > 1:
+                url = baseurl + '?page=%s' % counter
+
+            # get page in a thread
+            yield self._beautifulsoup(url)
+            hits = self._get_result().findAll(
+                'a', href=lambda(v): Stage6.match_video(unicode(v)))
+            if not len(hits):
+                raise StopIteration
+
+            # iterate over the hits on the page
+            for url in hits:
+                title = url.get('title')
+                if not title:
+                    continue
+                # FIXME: grab the side of the video to get the tags of this
+                # clip and an image
+                vid = Stage6.match_video(url.get('href')).groups()[0]
+                vurl = url='http://video.stage6.com/%s/.divx' % vid
+                yield Entry(id=vid, title=title, ext='divx', url=vurl)

Added: trunk/WIP/netsearch/src/feed/plugins/youtube.py
==============================================================================
--- (empty file)
+++ trunk/WIP/netsearch/src/feed/plugins/youtube.py     Sat Aug 18 15:53:42 2007
@@ -0,0 +1,19 @@
+import re
+from kaa.netsearch.feed.channel import Channel, Entry, register
+
+class YouTube(Channel):
+
+    def __init__(self, tags):
+        url = 'http://www.youtube.com/rss/tag/%s.rss' % urllib.quote(tags)
+        super(YouTube, self).__init__(url)
+
+    def __iter__(self):
+        # get feed in a thread
+        yield self._feedparser(self.url)
+
+        # real iterate
+        for f in self._get_result().entries:
+            yield self._readurl(f.id)
+            m = re.search('"/player2.swf[^"]*youtube.com/&([^"]*)', 
self._get_result())
+            url = 'http://youtube.com/get_video?' + m.groups()[0]
+            yield Entry(url=url, title=f.title, ext='flv')

Added: trunk/WIP/netsearch/test/feed.py
==============================================================================
--- (empty file)
+++ trunk/WIP/netsearch/test/feed.py    Sat Aug 18 15:53:42 2007
@@ -0,0 +1,44 @@
+import sys
+import kaa.notifier
+from kaa.netsearch.feed import Channel
+
+# ##################################################################
+# test code
+# ##################################################################
+
+# class Filter(Channel):
+
+#     def __init__(self, channel, filter):
+#         Channel.__init__(self, None)
+#         self._channel = channel
+#         self._filter = filter
+
+#     def __iter__(self):
+#         for f in self._channel:
+#             if isinstance(f, kaa.notifier.InProgress):
+#                 # dummy entry to signal waiting
+#                 yield f
+#                 continue
+#             if self._filter(f):
+#                 yield f
+
[EMAIL PROTECTED]()
+def update_feeds(*feeds):
+    for feed, destdir, num, download in feeds:
+        if download:
+            yield feed.update(destdir, num)
+        else:
+            yield feed.store_in_beacon(destdir, num)
+
+kaa.beacon.connect()
+d = '/local/video/feedtest'
+update_feeds((Channel('http://podcast.wdr.de/blaubaer.xml'), d, 5, False),
+             
(Channel('http://podcast.nationalgeographic.com/wild-chronicles/'), \
+              d, 5, False)).\
+             connect(sys.exit)
+#              (Channel('http://www.tagesschau.de/export/video-podcast'), d, 
1, False),
+#              (YouTube(tags='robot chicken'), d, 2, True),
+#              (Stage6('stage6://Diva-Channel'), d, 5, False)).\
+
+kaa.notifier.loop()
+

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/
_______________________________________________
Freevo-cvslog mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freevo-cvslog

Reply via email to