Author: duncan Date: Tue Oct 17 18:49:16 2006 New Revision: 8407 Added: branches/rel-1/freevo/src/plugins/applelib.py branches/rel-1/freevo/src/plugins/appletrailers.py
Log: Added apple trailers Added: branches/rel-1/freevo/src/plugins/applelib.py ============================================================================== --- (empty file) +++ branches/rel-1/freevo/src/plugins/applelib.py Tue Oct 17 18:49:16 2006 @@ -0,0 +1,473 @@ +#!/usr/bin/python +# -*- coding: iso-8859-1 -*- +# ----------------------------------------------------------------------- +# applelib.py - Module for parsing apple's trailer site +# ----------------------------------------------------------------------- +# +# +# ----------------------------------------------------------------------- +# Copyright (C) 2006 Pierre Ossman +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MER- +# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# ----------------------------------------------------------------------- + +import sys +import os +import re +import urllib +import urlparse +import pickle + +_DEFAULT_URL = 'http://www.apple.com/trailers/' + +# Date of trailer addition. Comes on a separate line. +_date_re = re.compile(r'''<dt>(?P<month>[0-9]+)\.(?P<day>[0-9]+)</dt>''', re.IGNORECASE) + +# Trailer link +_trailer_link_re = re.compile(r'''<dd><a[^>]*href="(?P<url>[^"]+)"[^>]*>(?P<title>[^<]+).*</a>.*</dd>''', re.IGNORECASE) + +# Start of a studio section +_studio_name_re = re.compile(r'''<h4><a[^>]*>(?P<name>[^<]*).*</a>.*</h4>''', re.IGNORECASE) + +# Start of a genre section +_genre_name_re = re.compile(r'''<h4>(?P<name>[^<]*).*</h4>''', re.IGNORECASE) + +# Trailer link when in studio/genre list +_trailer_list_link_re = re.compile(r'''<li><a[^>]*href="(?P<url>[^"]+)"[^>]*>(?P<title>[^<]+).*</a>.*</li>''', re.IGNORECASE) + +# Trailer subpages +_subpage_link_re = re.compile(r'''href="(?P<url>[^"]*(?P<size>sm|small|low|240|mid|medium|320|lg|large|high|480|fullscreen)[^"]*\.html[^"]*)"''', re.IGNORECASE) + +# Extra step before trailer page +_frontpage_link_re = re.compile(r'''<a[^>]+href="(?P<url>/trailers/[^"]*(?P<type>trailer|teaser)[^"]*)"[^>]*>''', re.IGNORECASE) + +# Stream link regexps +_stream_link_res = ( + re.compile(r'''<param[^>]+name="href"[^>]+value="(?P<url>[^"]+)"[^>]*>''', re.IGNORECASE), + re.compile(r'''<param[^>]*name="src"[^>]*value="(?P<url>[^"]*)"[^>]*>''', re.IGNORECASE), + re.compile(r'''XHTML[(]([^)]*\'href\',)?\'(?P<url>[^\']*)\'''', re.IGNORECASE) + ) + +# Old regexps +# re.compile(r'''XHTML[(]\'(?P<url>[^\']*)\'''', re.IGNORECASE) + +# New script based pages +_scriptpage_link_re = re.compile(r'''href="(?P<url>[^"]*video.html\?[^"]*)"''', re.IGNORECASE) + +# Stream URLs in script based pages +_scriptpage_stream_link_re = re.compile(r'''movieAddress[^=]*=[^"]*"(?P<url>[^"]*(?P<size>240|320|480|640)[^"]*\.mov[^"]*)"''', re.IGNORECASE) + +# Mapping between size code and name +_sizemap = [ + (('sm', 'small', 'low', '240'), ('Small', 0)), + (('mid', 'medium', '320'), ('Medium', 1)), + (('lg', 'large', 'high', '480'), ('Large', 2)), + (('extralarge',), ('Extra Large', 3)), + (('fullscreen',), ('Fullscreen', 4)), + (('teaser',), ('Teaser', -1)), + (('trailer',), ('Trailer', -1)), + (('480p',), ('Small [HD 480p]', 10)), + (('720p',), ('Medium [HD 720p]', 11)), + (('1080i',), ('Small [HD 1080i]', 12)), + (('1080p',), ('Large [HD 1080p]', 13)) + ] + +_last_date = None +_last_studio = None +_last_genre = None + +def _parse_hidef(line, t): + global _last_date + + m = _date_re.search(line) + if m: + _last_date = "%s/%s" % (m.group("day"), m.group("month")) + return + + m = _trailer_link_re.search(line) + if m: + t.add_trailer(m.group("title"), url = m.group("url"), date = _last_date, category = "HD") + _last_date = None + +def _parse_exclusive(line, t): + global _last_date + + m = _date_re.search(line) + if m: + _last_date = "%s/%s" % (m.group("day"), m.group("month")) + return + + m = _trailer_link_re.search(line) + if m: + t.add_trailer(m.group("title"), url = m.group("url"), date = _last_date, category = "Exclusive") + _last_date = None + +def _parse_newest(line, t): + global _last_date + + m = _date_re.search(line) + if m: + _last_date = "%s/%s" % (m.group("day"), m.group("month")) + return + + m = _trailer_link_re.search(line) + if m: + t.add_trailer(m.group("title"), url = m.group("url"), date = _last_date, category = "Newest") + _last_date = None + +def _parse_studios(line, t): + global _last_studio + + m = _studio_name_re.search(line) + if m: + _last_studio = m.group("name") + return + + m = _trailer_list_link_re.search(line) + if m: + t.add_trailer(m.group("title"), url = m.group("url"), studio = _last_studio) + +def _parse_genres(line, t): + global _last_genre + + m = _genre_name_re.search(line) + if m: + _last_genre = m.group("name") + return + + m = _trailer_list_link_re.search(line) + if m: + t.add_trailer(m.group("title"), url = m.group("url"), genre = _last_genre) + +# Stages of parsing the page. Each stage consists of a start regexp, a stop +# regexp and a line parser. +_stages = [ + ('<h3>Featured High Definition Trailers</h3>', '<!-- .* High Definition Trailers -->', _parse_hidef), + ('<h3>Trailers Exclusive</h3>', '<!-- .* Trailer Exclusives-->', _parse_exclusive), + ('<h3>Newest Trailers</h3>', '<!-- .* Newest Trailers -->', _parse_newest), + ('<div id="trailers-studio">', '</div>', _parse_studios), + ('<div id="trailers-genre">', '</div>', _parse_genres) ] + +class Trailers: + def __init__(self): + self.titles = {} + + def parse(self, callback = None, url = _DEFAULT_URL): + self._mark_old() + + self._url = url + + lines = self._dl(url).split("\n") + + count = 0 + in_stage = False + stage = 0 + start = re.compile(_stages[0][0], re.IGNORECASE) + for line in lines: + count += 1 + if callback is not None: + callback(100 * count / len(lines)) + + if in_stage: + _stages[stage][2](line, self) + if stop.search(line): + in_stage = False + stage += 1 + if stage >= len(_stages): + break + start = re.compile(_stages[stage][0], re.IGNORECASE) + + if not in_stage and start.search(line): + in_stage = True + stop = re.compile(_stages[stage][1], re.IGNORECASE) + _stages[stage][2](line, self) + + if callback is not None: + callback(100) + + self._prune() + + self.categories = [] + self.genres = [] + self.studios = [] + + for title in self.titles.keys(): + if self.titles[title]["studio"] not in self.studios: + self.studios.append(self.titles[title]["studio"]) + for g in self.titles[title]["genres"]: + if g not in self.genres: + self.genres.append(g) + for t in self.titles[title]["trailers"]: + for c in t["categories"]: + if c not in self.categories: + self.categories.append(c) + + return self.titles + + def add_trailer(self, title, date = None, url = None, category = None, studio = None, genre = None): + title = title.strip() + if not self.titles.has_key(title): + self.titles[title] = {"studio":None, "genres":[], "trailers":[]} + + t = self.titles[title] + + if t.has_key("_old"): + del t["_old"] + + if url is not None: + url = urlparse.urljoin(self._url, url) + self._parse_trailer_page(t, url, date, category) + + if studio is not None: + t["studio"] = studio + + if genre is not None and genre not in t["genres"]: + t["genres"].append(genre) + + def _parse_trailer_page(self, title, url, date, category): + for t in title["trailers"]: + if t["url"] == url: + if t.has_key("_old"): + del t["_old"] + if date is not None: + t["date"] = date + if category is not None and category not in t["categories"]: + t["categories"].append(category) + return + + if category is None: + categories = [] + else: + categories = [category] + + t = {"url":url, "date":date, "categories":categories, "streams":[]} + title["trailers"].append(t) + + lines = self._dl(url).split("\n") + + streams = [] + for line in lines: + iterator = _subpage_link_re.finditer(line) + for m in iterator: + page_size, page_key = self._map_size(m.group("size")) + + suburl = urlparse.urljoin(url, m.group("url")) + substreams = self._parse_stream_page(suburl) + + for ss in substreams: + for s in streams: + if s["url"] == ss["url"]: + break + else: + if ss["size"] is None: + size = page_size + key = page_key + else: + size, key = self._map_size(ss["size"]) + self._add_stream(streams, + {"url":ss["url"], + "size":size, + "sort_key":key}) + + iterator = _scriptpage_link_re.finditer(line) + for m in iterator: + suburl = urlparse.urljoin(url, m.group("url")) + substreams = self._parse_stream_page(suburl) + + for ss in substreams: + for s in streams: + if s["url"] == ss["url"]: + break + else: + size, key = self._map_size(ss["size"]) + self._add_stream(streams, + {"url":ss["url"], + "size":size, + "sort_key":key}) + + iterator = _frontpage_link_re.finditer(line) + for m in iterator: + page_size, page_key = self._map_size(m.group("type")) + + suburl = urlparse.urljoin(url, m.group("url")) + substreams = self._parse_stream_page(suburl) + if substreams: + for ss in substreams: + for s in streams: + if s["url"] == ss["url"]: + break + else: + if ss["size"] is None: + size = page_size + key = page_key + else: + size, key = self._map_size(ss["size"]) + self._add_stream(streams, + {"url":ss["url"], + "size":size, + "sort_key":key}) + else: + self._parse_trailer_page(title, suburl, date, category) + + substreams = self._extract_streams(url, line) + if substreams: + for ss in substreams: + for s in streams: + if s["url"] == ss["url"]: + break + else: + size, key = self._map_size(ss["size"]) + self._add_stream(streams, + {"url":ss["url"], + "size":size, + "sort_key":key}) + + t["streams"] = streams + + def _parse_stream_page(self, url): + lines = self._dl(url).split("\n") + + streams = [] + for line in lines: + streams = streams + self._extract_streams(url, line) + + return streams + + def _extract_streams(self, baseurl, line): + streams = [] + + for expr in _stream_link_res: + m = expr.search(line) + if m: + stream_url = urlparse.urljoin(baseurl, m.group("url")) + + size = None + if stream_url.find("480p") != -1: + size = "480p" + elif stream_url.find("720p") != -1: + size = "720p" + elif stream_url.find("1080p") != -1: + size = "1080p" + elif stream_url.find("1080i") != -1: + size = "1080i" + + streams.append({"url":stream_url, "size":size}) + + m = _scriptpage_stream_link_re.search(line) + if m: + stream_url = urlparse.urljoin(baseurl, m.group("url")) + streams.append({"url":stream_url, "size":m.group("size")}) + + return streams + + def _map_size(self, size): + for sm in _sizemap: + if size in sm[0]: + return sm[1] + return ("Unknown (%s)" % str(size), -10) + + def _dl(self, url): + f = urllib.urlopen(url) + return f.read() + + def _add_stream(self, list, stream): + for s in list: + if s['sort_key'] < stream['sort_key']: + list.insert(list.index(s), stream) + break + else: + list.append(stream) + + def _mark_old(self): + for title in self.titles.keys(): + self.titles[title]["_old"] = True + for trailer in self.titles[title]["trailers"]: + trailer["_old"] = True + + def _prune(self): + keys = self.titles.keys() + for title in keys: + if self.titles[title].has_key("_old"): + del self.titles[title] + else: + trailers = [] + for trailer in self.titles[title]["trailers"]: + if not trailer.has_key("_old"): + trailers.append(trailer) + if trailers: + self.titles[title]["trailers"] = trailers + else: + del self.titles[title] + + def only_studio(self, studio): + keys = self.titles.keys() + for title in keys: + if studio != self.titles[title]["studio"]: + del self.titles[title] + + def only_genre(self, genre): + keys = self.titles.keys() + for title in keys: + if genre not in self.titles[title]["genres"]: + del self.titles[title] + + def only_category(self, category): + keys = self.titles.keys() + for title in keys: + trailers = [] + for trailer in self.titles[title]["trailers"]: + if category in trailer["categories"]: + trailers.append(trailer) + if trailers: + self.titles[title]["trailers"] = trailers + else: + del self.titles[title] + + def sort_by_title(self): + keys = self.titles.keys() + keys.sort() + return keys + +if __name__ == '__main__': + # Use this to test loading subpages + t = Trailers() + title = {"trailers":[]} + t._parse_trailer_page(title, "http://www.apple.com/trailers/wb/blooddiamond/hd/", None, None) + print title + print "" + title = {"trailers":[]} + t._parse_trailer_page(title, "http://www.apple.com/trailers/touchstone/apocalypto/", None, None) + print title + sys.exit(0) + + try: + t = pickle.load(file("trailers.dump")) + except: + t = Trailers() + l = t.parse() + pickle.dump(t, file("trailers.dump", "w")) + keys = l.keys() + keys.sort() + for title in keys: + print title + if l[title]["studio"] is not None: + print "\tStudio:\t", l[title]["studio"] + if l[title]["genres"]: + print "\tGenres:\t", l[title]["genres"] + print "" + for t in l[title]["trailers"]: + print "\t", t + print "" Added: branches/rel-1/freevo/src/plugins/appletrailers.py ============================================================================== --- (empty file) +++ branches/rel-1/freevo/src/plugins/appletrailers.py Tue Oct 17 18:49:16 2006 @@ -0,0 +1,259 @@ +# -*- coding: iso-8859-1 -*- +# ----------------------------------------------------------------------- +# appletrailers.py - Plugin for streaming trailers from apple.com +# ----------------------------------------------------------------------- +# +# Add "plugin.activate('video.appletrailers')" in local_conf.py +# to activate +# +# ----------------------------------------------------------------------- +# Copyright (C) 2006 Pierre Ossman +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MER- +# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# ----------------------------------------------------------------------- + +import os + +import config +import plugin +import menu +import stat +import time +import string +import util.fileops +import util.misc + +from item import Item +from video.videoitem import VideoItem +from gui.ProgressBox import ProgressBox + +import applelib + +MAX_CACHE_AGE = (60 * 60) * 8 # 8 hours + +cachedir = os.path.join(config.FREEVO_CACHEDIR, 'appletrailers') +if not os.path.isdir(cachedir): + os.mkdir(cachedir, + stat.S_IMODE(os.stat(config.FREEVO_CACHEDIR)[stat.ST_MODE])) + +class PluginInterface(plugin.MainMenuPlugin): + """ + A freevo interface to http://www.apple.com/trailers + + plugin.activate('video.appletrailers') + """ + def __init__(self): + plugin.MainMenuPlugin.__init__(self) + + def items(self, parent): + return [ BrowseBy(parent) ] + +class AppleItem(Item): + def __init__(self, parent): + Item.__init__(self, parent) + self.type = 'trailers' + self.__load() + + def __progress(self, percent): + if percent > self.__last_perc: + for i in xrange(percent - self.__last_perc): + self.__pop.tick() + self.__last_perc = percent + + def __load(self): + pfile = os.path.join(cachedir, 'data') + if (os.path.isfile(pfile) == 0): + self.trailers = applelib.Trailers() + self.__pop = ProgressBox(text=_('Scanning Apple for trailers...'), full=100) + self.__pop.show() + self.__last_perc = -1 + self.trailers.parse(self.__progress) + self.__pop.destroy() + util.fileops.save_pickle(self.trailers, pfile) + else: + self.trailers = util.fileops.read_pickle(pfile) + if abs(time.time() - os.path.getmtime(pfile)) > MAX_CACHE_AGE: + self.__pop = ProgressBox(text=_('Scanning Apple for trailers...'), full=100) + self.__pop.show() + self.__last_perc = -1 + self.trailers.parse(self.__progress) + self.__pop.destroy() + util.fileops.save_pickle(self.trailers, pfile) + +class TrailerVideoItem(VideoItem): + def __init__(self, name, url, parent): + VideoItem.__init__(self, url, parent) + self.name = name + self.type = 'trailers' + +class Trailer(Item): + def __init__(self, name, title, trailer, parent): + Item.__init__(self, parent) + self.name = name + self.type = 'trailers' + self.title = title + self._trailer = trailer + + def actions(self): + return [ (self.make_menu, 'Streams') ] + + def make_menu(self, arg=None, menuw=None): + entries = [] + for s in self._trailer["streams"]: + if s["size"] is None: + name = "Unknown" + else: + name = s["size"] + entries.append(TrailerVideoItem(name, s["url"], self)) + menuw.pushmenu(menu.Menu(self.title, entries)) + +class TrailerMenu(Item): + def __init__(self, name, title, trailer, parent): + Item.__init__(self, parent) + self.name = name + self.type = 'trailers' + self.title = title + self._trailer = trailer + + def actions(self): + return [ (self.make_menu, 'Trailers') ] + + def make_menu(self, arg=None, menuw=None): + entries = [] + i = 1 + for t in self._trailer["trailers"]: + name = "Trailer %d" % i + if t["categories"] is not None: + name += " [" + ",".join(t["categories"]) + "]" + if t["date"] is not None: + name += " (" + t["date"] + ")" + entries.append(Trailer(name, self.title, t, self)) + i += 1 + menuw.pushmenu(menu.Menu(self.title, entries)) + +class BrowseByTitle(AppleItem): + def __init__(self, parent): + AppleItem.__init__(self, parent) + self.name = _('Browse by Title') + self.title = _('Trailers') + + def actions(self): + return [ (self.make_menu, 'Titles') ] + + def _gen_name(self, title, trailer): + name = title + dates = [] + categories = [] + for t in trailer["trailers"]: + if t["date"] is not None and t["date"] not in dates: + dates.append(t["date"]) + if t["categories"] is not None and t["categories"] not in categories: + categories += t["categories"] + if categories: + name += " [" + ",".join(categories) + "]" + if dates: + name += " (" + ",".join(dates) + ")" + return name + + def make_menu(self, arg=None, menuw=None): + entries = [] + for t in self.trailers.sort_by_title(): + title = self.trailers.titles[t] + name = self._gen_name(t, title) + if len(title["trailers"]) == 1: + entries.append(Trailer(name, t, title["trailers"][0], self)) + else: + entries.append(TrailerMenu(name, t, title, self)) + menuw.pushmenu(menu.Menu(self.title, entries)) + +class Genre(BrowseByTitle): + def __init__(self, genre, parent): + BrowseByTitle.__init__(self, parent) + self.name = genre + self.title = genre + self.trailers.only_genre(genre) + +class Category(BrowseByTitle): + def __init__(self, category, parent): + BrowseByTitle.__init__(self, parent) + self.name = category + self.title = category + self.trailers.only_category(category) + +class Studio(BrowseByTitle): + def __init__(self, studio, parent): + BrowseByTitle.__init__(self, parent) + self.name = studio + self.title = studio + self.trailers.only_category(studio) + +class BrowseByGenre(AppleItem): + def __init__(self, parent): + AppleItem.__init__(self, parent) + self.name = _('Browse by Genre') + + def actions(self): + return [ (self.make_menu, 'Genres') ] + + def make_menu(self, arg=None, menuw=None): + genres = [] + for g in self.trailers.genres: + genres.append(Genre(g, self)) + menuw.pushmenu(menu.Menu(_('Choose a genre'), genres)) + +class BrowseByCategory(AppleItem): + def __init__(self, parent): + AppleItem.__init__(self, parent) + self.name = _('Browse by Category') + + def actions(self): + return [ (self.make_menu, 'Categories') ] + + def make_menu(self, arg=None, menuw=None): + categories = [] + for c in self.trailers.categories: + categories.append(Category(c, self)) + menuw.pushmenu(menu.Menu(_('Choose a category'), categories)) + +class BrowseByStudio(AppleItem): + def __init__(self, parent): + AppleItem.__init__(self, parent) + self.name = _('Browse by Studio') + + def actions(self): + return [ (self.make_menu, 'Studios') ] + + def make_menu(self, arg=None, menuw=None): + studios = [] + for s in self.trailers.studios: + studios.append(Studio(s, self)) + menuw.pushmenu(menu.Menu(_('Choose a studio'), studios)) + +class BrowseBy(Item): + def __init__(self, parent): + Item.__init__(self, parent) + self.name = 'Apple Trailers' + self.type = 'trailers' + + def actions(self): + return [ (self.make_menu, 'Browse by') ] + + def make_menu(self, arg=None, menuw=None): + menuw.pushmenu(menu.Menu('Apple Trailers', + [ BrowseByGenre(self), + BrowseByCategory(self), + BrowseByStudio(self), + BrowseByTitle(self) ])) ------------------------------------------------------------------------- Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 _______________________________________________ Freevo-cvslog mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/freevo-cvslog
