Author: dmeyer Date: Sat Aug 18 15:53:42 2007 New Revision: 2780 Log: refactor code, fix small stuff
Added: trunk/WIP/netsearch/src/feed/__init__.py trunk/WIP/netsearch/src/feed/download.py trunk/WIP/netsearch/src/feed/plugins/ trunk/WIP/netsearch/src/feed/plugins/__init__.py trunk/WIP/netsearch/src/feed/plugins/rss.py trunk/WIP/netsearch/src/feed/plugins/stage6.py trunk/WIP/netsearch/src/feed/plugins/youtube.py trunk/WIP/netsearch/test/feed.py Modified: trunk/WIP/netsearch/src/feed/channel.py Added: trunk/WIP/netsearch/src/feed/__init__.py ============================================================================== --- (empty file) +++ trunk/WIP/netsearch/src/feed/__init__.py Sat Aug 18 15:53:42 2007 @@ -0,0 +1,27 @@ +# ################################################################## +# Brain Dump +# +# - Improve RSS channel for better video and audio feed support +# https://channelguide.participatoryculture.org/front +# - Flickr image channel +# - Torrent downloader (needed for some democracy channels) +# - Add more item metadata (e.g. download thumbnail/image) +# - Channel configuration: +# o always download / download on demand / play from stream +# o how much entries should be show +# o keep entries on hd (while in feed / while not watched / up to x) +# - Add parallel download function +# - Add channel as 'file' to kaa.beacon making it possible to merge +# feed entries and real files. +# o does it belong into beacon? +# o is it an extra kaa module with beacon plugin? +# o daemon to keep feeds in beacon up-to-date +# +# ################################################################## + + +import channel +import plugins + +add_password = channel.pm.add_password +Channel = channel.get_channel Modified: trunk/WIP/netsearch/src/feed/channel.py ============================================================================== --- trunk/WIP/netsearch/src/feed/channel.py (original) +++ trunk/WIP/netsearch/src/feed/channel.py Sat Aug 18 15:53:42 2007 @@ -1,6 +1,7 @@ import sys import os import re +import md5 import urllib import urllib2 @@ -12,97 +13,26 @@ import kaa.beacon import kaa.strutils +from download import fetch + for t in ('video', 'audio', 'image'): kaa.beacon.register_file_type_attrs( t, mediafeed_channel = (int, kaa.beacon.ATTR_SIMPLE)) -# ################################################################## -# Brain Dump -# -# - Improve RSS channel for better video and audio feed support -# https://channelguide.participatoryculture.org/front -# - Flickr image channel -# - Torrent downloader (needed for some democracy channels) -# - Add more item metadata (e.g. download thumbnail/image) -# - Channel configuration: -# o always download / download on demand / play from stream -# o how much entries should be show -# o keep entries on hd (while in feed / while not watched / up to x) -# - Add parallel download function -# - Add channel as 'file' to kaa.beacon making it possible to merge -# feed entries and real files. -# o does it belong into beacon? -# o is it an extra kaa module with beacon plugin? -# o daemon to keep feeds in beacon up-to-date -# -# ################################################################## - - -# ################################################################## -# generic status object for InProgress -# ################################################################## - -class Status(kaa.notifier.Signal): - def __init__(self): - super(Status,self).__init__() - self.percent = 0 - self.pos = 0 - self.max = 0 - - def set(self, pos, max=None): - if max is not None: - self.max = max - self.pos = pos - if pos > self.max: - self.max = pos - if self.max: - self.percent = (self.pos * 100) / self.max - else: - self.percent = 0 - self.emit() - - def update(self, diff): - self.set(self.pos + diff) - - - def __str__(self): - n = 0 - if self.max: - n = int((self.pos / float(self.max)) * 50) - return "|%51s| %d / %d" % (("="*n + ">").ljust(51), self.pos, self.max) - +pm = urllib2.HTTPPasswordMgrWithDefaultRealm() +auth_handler = urllib2.HTTPBasicAuthHandler(pm) +opener = urllib2.build_opener(auth_handler) +urllib2.install_opener(opener) # ################################################################## -# function to download to a file with status information +# some generic entry/channel stuff # ################################################################## -def fetch_HTTP(url, filename): - def download(url, filename, status): - src = urllib2.urlopen(url) - dst = open(filename, 'w') - status.set(0, int(src.info().get('Content-Length', 0))) - while True: - data = src.read(1024) - if len(data) == 0: - src.close() - dst.close() - return True - status.update(len(data)) - dst.write(data) - - s = Status() - t = kaa.notifier.Thread(download, url, filename, s) - t.wait_on_exit(False) - async = t.start() - async.set_status(s) - return async - +IMAGEDIR = os.path.expanduser("~/.beacon/feedinfo/images") -# ################################################################## -# some generic entry/channel stuff -# ################################################################## +if not os.path.isdir(IMAGEDIR): + os.makedirs(IMAGEDIR) -IMAGEDIR = '/tmp' class Entry(dict): @@ -113,7 +43,7 @@ def fetch(self, filename): print '%s -> %s' % (self.url, filename) - return fetch_HTTP(self.url, filename) + return fetch(self.url, filename) class Channel(object): @@ -130,7 +60,7 @@ return self._async def _feedparser(self, url): - return self._thread(feedparser.parse, url) + return self._thread(feedparser.parse, urllib2.urlopen(url)) def _beautifulsoup(self, url): def __beautifulsoup(url): @@ -148,7 +78,8 @@ @kaa.notifier.yield_execution() def _get_image(self, url): url = kaa.strutils.unicode_to_str(url) - fname = os.path.join(IMAGEDIR, url.replace('/', '.')) + fname = md5.md5(url).hexdigest() + os.path.splitext(url)[1] + fname = os.path.join(IMAGEDIR, fname) if os.path.isfile(fname): yield fname img = open(fname, 'w') @@ -171,13 +102,10 @@ continue num -= 1 filename = os.path.join(destdir, entry.basename) - if os.path.isfile(filename): - print 'skip', filename - else: - # FIXME: download to tmp dir first - async = entry.fetch(filename) - async.get_status().connect(print_status, async.get_status()) - yield async + # FIXME: download to tmp dir first + async = entry.fetch(filename) + async.get_status().connect(print_status, async.get_status()) + yield async # FIXME: add additional information to beacon if num == 0: return @@ -202,7 +130,7 @@ else: data = {} for key in ('url', 'title', 'description', 'image'): - if entry.get('key'): + if entry.get(key): data[key] = entry[key] i = kaa.beacon.add_item(type='video', parent=d, mediafeed_channel=self.url, **data) @@ -213,142 +141,15 @@ for i in items.values(): i.delete() +_generators = [] -# ################################################################## -# specific channels -# ################################################################## - -class RSS(Channel): - - def __iter__(self): - # get feed in a thread - yield self._feedparser(self.url) - - if not self._get_result().entries: - print 'oops' - raise StopIteration - - # basic information - feedimage = None - if self._get_result().feed.get('image'): - feedimage = self._get_result().feed.get('image').get('href') - - if feedimage: - feedimage = self._get_image(feedimage) - if isinstance(feedimage, kaa.notifier.InProgress): - yield feedimage - feedimage = feedimage.get_result() - - # real iterate - for f in self._get_result().entries: - if 'link' in f.keys(): - link = f.link - if 'enclosures' in f.keys(): - # FIXME: more details than expected - if len(f.enclosures) > 1: - print 'WARNING:', f.enclosures - link = f.enclosures[0].href - # FIXME: add much better logic here, including - # getting a good basename for urls ending with / - # based on type. - if not link: - print 'WARNING', f - - # FIXME: beacon does not thumbnail the image without - # a rescan of the directory! - entry = Entry(basename=link[link.rfind('/')+1:], url=link, - description=f.get('summary', ''), image=feedimage) - if 'title' in f: - entry['title'] = f['title'] - yield entry - - -class Stage6(Channel): - - match_video = re.compile('.*/video/([0-9]+)/').match - - def __iter__(self): - baseurl = 'http://stage6.divx.com/%s/videos/order:date' % self.url - counter = 0 - while True: - counter += 1 - url = baseurl - if counter > 1: - url = baseurl + '?page=%s' % counter - - # get page in a thread - yield self._beautifulsoup(url) - hits = self._get_result().findAll( - 'a', href=lambda(v): Stage6.match_video(unicode(v))) - if not len(hits): - raise StopIteration - - # iterate over the hits on the page - for url in hits: - title = url.get('title') - if not title: - continue - # FIXME: grab the side of the video to get the tags of this - # clip and an image - vid = Stage6.match_video(url.get('href')).groups()[0] - vurl = url='http://video.stage6.com/%s/.divx' % vid - yield Entry(id=vid, title=title, ext='divx', url=vurl) - - -class YouTube(Channel): - - def __init__(self, tags): - url = 'http://www.youtube.com/rss/tag/%s.rss' % urllib.quote(tags) - super(YouTube, self).__init__(url) - - def __iter__(self): - # get feed in a thread - yield self._feedparser(self.url) - - # real iterate - for f in self._get_result().entries: - yield self._readurl(f.id) - m = re.search('"/player2.swf[^"]*youtube.com/&([^"]*)', self._get_result()) - url = 'http://youtube.com/get_video?' + m.groups()[0] - yield Entry(url=url, title=f.title, ext='flv') - - -# ################################################################## -# test code -# ################################################################## - -class Filter(Channel): - - def __init__(self, channel, filter): - Channel.__init__(self, None) - self._channel = channel - self._filter = filter - - def __iter__(self): - for f in self._channel: - if isinstance(f, kaa.notifier.InProgress): - # dummy entry to signal waiting - yield f - continue - if self._filter(f): - yield f +def register(regexp, generator): + _generators.append((regexp, generator)) [EMAIL PROTECTED]() -def update_feeds(*feeds): - for feed, destdir, num, download in feeds: - if download: - yield feed.update(destdir, num) - else: - yield feed.store_in_beacon(destdir, num) - -kaa.beacon.connect() -d = '/local/video/feedtest' -update_feeds((RSS('http://podcast.wdr.de/blaubaer.xml'), d, 5, False), - (RSS('http://podcast.nationalgeographic.com/wild-chronicles/'), - d, 5, False), - (RSS('http://www.tagesschau.de/export/video-podcast'), d, 1, False), - (YouTube(tags='robot chicken'), d, 2, True), - (Stage6('Diva-Channel'), d, 5, False)).\ - connect(sys.exit) +def get_channel(url): + for regexp, generator in _generators: + if regexp.match(url): + return generator(url) + raise RuntimeError -kaa.notifier.loop() + Added: trunk/WIP/netsearch/src/feed/download.py ============================================================================== --- (empty file) +++ trunk/WIP/netsearch/src/feed/download.py Sat Aug 18 15:53:42 2007 @@ -0,0 +1,82 @@ +import os +import stat +import urllib +import urllib2 +import kaa.notifier + +class Status(kaa.notifier.Signal): + """ + Generic status object for InProgress + """ + def __init__(self): + super(Status,self).__init__() + self.percent = 0 + self.pos = 0 + self.max = 0 + + def set(self, pos, max=None): + if max is not None: + self.max = max + self.pos = pos + if pos > self.max: + self.max = pos + if self.max: + self.percent = (self.pos * 100) / self.max + else: + self.percent = 0 + self.emit() + + def update(self, diff): + self.set(self.pos + diff) + + + def __str__(self): + n = 0 + if self.max: + n = int((self.pos / float(self.max)) * 50) + return "|%51s| %d / %d" % (("="*n + ">").ljust(51), self.pos, self.max) + + +def fetch_HTTP(url, filename): + """ + Fetch HTTP URL. + """ + def download(url, filename, status): + src = urllib2.urlopen(url) + length = int(src.info().get('Content-Length', 0)) + print length + if os.path.isfile(filename) and os.stat(filename)[stat.ST_SIZE] == length: + return True + tmpname = os.path.join(os.path.dirname(filename), + '.' + os.path.basename(filename)) + dst = open(tmpname, 'w') + status.set(0, length) + while True: + data = src.read(1024) + if len(data) == 0: + src.close() + dst.close() + os.rename(tmpname, filename) + return True + status.update(len(data)) + dst.write(data) + + if url.find(' ') > 0: + # stupid url encoding in url + url = url[:8+url[8:].find('/')] + \ + urllib.quote(url[8+url[8:].find('/'):]) + s = Status() + t = kaa.notifier.Thread(download, url, filename, s) + t.wait_on_exit(False) + async = t.start() + async.set_status(s) + return async + + +def fetch(url, filename): + """ + Generic fetch function. + """ + if url.startswith('http://') or url.startswith('https://'): + return fetch_HTTP(url, filename) + raise RuntimeError('unable to fetch %s' % url) Added: trunk/WIP/netsearch/src/feed/plugins/__init__.py ============================================================================== --- (empty file) +++ trunk/WIP/netsearch/src/feed/plugins/__init__.py Sat Aug 18 15:53:42 2007 @@ -0,0 +1,6 @@ +import os + +for plugin in os.listdir(os.path.dirname(__file__)): + if plugin.endswith('.py') and not plugin == '__init__.py': + exec('import %s' % os.path.splitext(plugin)[0]) + Added: trunk/WIP/netsearch/src/feed/plugins/rss.py ============================================================================== --- (empty file) +++ trunk/WIP/netsearch/src/feed/plugins/rss.py Sat Aug 18 15:53:42 2007 @@ -0,0 +1,48 @@ +import re +import kaa.notifier +from kaa.netsearch.feed.channel import Channel, Entry, register + + +class RSS(Channel): + + def __iter__(self): + # get feed in a thread + yield self._feedparser(self.url) + if not self._get_result().entries: + print 'oops' + raise StopIteration + + # basic information + feedimage = None + if self._get_result().feed.get('image'): + feedimage = self._get_result().feed.get('image').get('href') + + if feedimage: + feedimage = self._get_image(feedimage) + if isinstance(feedimage, kaa.notifier.InProgress): + yield feedimage + feedimage = feedimage.get_result() + + # real iterate + for f in self._get_result().entries: + if 'link' in f.keys(): + link = f.link + if 'enclosures' in f.keys(): + # FIXME: more details than expected + if len(f.enclosures) > 1: + print 'WARNING:', f.enclosures + link = f.enclosures[0].href + # FIXME: add much better logic here, including + # getting a good basename for urls ending with / + # based on type. + if not link: + print 'WARNING', f + # FIXME: beacon does not thumbnail the image without + # a rescan of the directory! + entry = Entry(basename=link[link.rfind('/')+1:], url=link, + description=f.get('summary', ''), image=feedimage) + if 'title' in f: + entry['title'] = f['title'] + yield entry + +register(re.compile('^https?://.*'), RSS) Added: trunk/WIP/netsearch/src/feed/plugins/stage6.py ============================================================================== --- (empty file) +++ trunk/WIP/netsearch/src/feed/plugins/stage6.py Sat Aug 18 15:53:42 2007 @@ -0,0 +1,33 @@ +import re +from kaa.netsearch.feed.channel import Channel, Entry, register + +class Stage6(Channel): + + match_video = re.compile('.*/video/([0-9]+)/').match + + def __iter__(self): + baseurl = 'http://stage6.divx.com/%s/videos/order:date' % self.url + counter = 0 + while True: + counter += 1 + url = baseurl + if counter > 1: + url = baseurl + '?page=%s' % counter + + # get page in a thread + yield self._beautifulsoup(url) + hits = self._get_result().findAll( + 'a', href=lambda(v): Stage6.match_video(unicode(v))) + if not len(hits): + raise StopIteration + + # iterate over the hits on the page + for url in hits: + title = url.get('title') + if not title: + continue + # FIXME: grab the side of the video to get the tags of this + # clip and an image + vid = Stage6.match_video(url.get('href')).groups()[0] + vurl = url='http://video.stage6.com/%s/.divx' % vid + yield Entry(id=vid, title=title, ext='divx', url=vurl) Added: trunk/WIP/netsearch/src/feed/plugins/youtube.py ============================================================================== --- (empty file) +++ trunk/WIP/netsearch/src/feed/plugins/youtube.py Sat Aug 18 15:53:42 2007 @@ -0,0 +1,19 @@ +import re +from kaa.netsearch.feed.channel import Channel, Entry, register + +class YouTube(Channel): + + def __init__(self, tags): + url = 'http://www.youtube.com/rss/tag/%s.rss' % urllib.quote(tags) + super(YouTube, self).__init__(url) + + def __iter__(self): + # get feed in a thread + yield self._feedparser(self.url) + + # real iterate + for f in self._get_result().entries: + yield self._readurl(f.id) + m = re.search('"/player2.swf[^"]*youtube.com/&([^"]*)', self._get_result()) + url = 'http://youtube.com/get_video?' + m.groups()[0] + yield Entry(url=url, title=f.title, ext='flv') Added: trunk/WIP/netsearch/test/feed.py ============================================================================== --- (empty file) +++ trunk/WIP/netsearch/test/feed.py Sat Aug 18 15:53:42 2007 @@ -0,0 +1,44 @@ +import sys +import kaa.notifier +from kaa.netsearch.feed import Channel + +# ################################################################## +# test code +# ################################################################## + +# class Filter(Channel): + +# def __init__(self, channel, filter): +# Channel.__init__(self, None) +# self._channel = channel +# self._filter = filter + +# def __iter__(self): +# for f in self._channel: +# if isinstance(f, kaa.notifier.InProgress): +# # dummy entry to signal waiting +# yield f +# continue +# if self._filter(f): +# yield f + [EMAIL PROTECTED]() +def update_feeds(*feeds): + for feed, destdir, num, download in feeds: + if download: + yield feed.update(destdir, num) + else: + yield feed.store_in_beacon(destdir, num) + +kaa.beacon.connect() +d = '/local/video/feedtest' +update_feeds((Channel('http://podcast.wdr.de/blaubaer.xml'), d, 5, False), + (Channel('http://podcast.nationalgeographic.com/wild-chronicles/'), \ + d, 5, False)).\ + connect(sys.exit) +# (Channel('http://www.tagesschau.de/export/video-podcast'), d, 1, False), +# (YouTube(tags='robot chicken'), d, 2, True), +# (Stage6('stage6://Diva-Channel'), d, 5, False)).\ + +kaa.notifier.loop() + ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ _______________________________________________ Freevo-cvslog mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/freevo-cvslog
