Re: [Freevo-devel] [NEW] fxdimdb.py class for fxd and imdb handling...

Rob Vandermeulen Thu, 10 Jul 2003 05:52:37 -0700

There still were some pretty big bugs in the class, wich are fixed now. I also 
adapted imdb.py plugin to use the new class, and updated some parts of the 
doc...


have fun
den_RDC

#if 0 /*
# -----------------------------------------------------------------------
# helpers/fxdimdb.py - class and helpers for fxd/imdb generation
# -----------------------------------------------------------------------
# $Id: fileheader,v 0.1 2003/07/08 22:31:00 den_RDC Exp $
#
# Notes: see http://pintje.servebeer.com/fxdimdb.html for documentatio,
# Todo: 
# - add support making fxds without imdb (or documenting it)
# - webradio support?
#
# -----------------------------------------------------------------------
# $Log: fxdimdb.py,v $
#
# Revision 0.1  2003/07/08 22:31:00  den_RDC
# Initial release.
#
#
# -----------------------------------------------------------------------
# Freevo - A Home Theater PC framework
# Copyright (C) 2003 Krister Lagerstrom, et al.
# Please see the file freevo/Docs/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# ----------------------------------------------------------------------- */
#endif


# python has no data hiding, but this is the intended use...
# subroutines completly in lowercase are regarded as more "private" functions
# subRoutines are regarded as public

#some data
__author__ = "den_RDC ([EMAIL PROTECTED])"
__version__ = "Revision 0.1"
__copyright__ = "Copyright (C) 2003 den_RDC"
__license__ = "GPL"

#Module Imports
import re
import urllib, urllib2, urlparse
import sys
import string
import codecs
import os


try:
    import config 
    from xml_parser import parseMovieFile
except:
    STARTDIR = os.path.abspath('./')
    os.environ['FREEVO_STARTDIR'] = STARTDIR
    
    sys.path.append(os.path.abspath('./'))
    sys.path.append(os.path.abspath('./src'))
    sys.path.append(os.path.abspath('./src/video'))
    import config
    from xml_parser import save_parseMovieFile

#Constants

freevo_version = '1.3.2'

FALSE = 0
TRUE = 1
imdb_title_list = '/tmp/imdb-movies.list'
imdb_title_list_url = 'ftp://ftp.funet.fi/pub/mirrors/ftp.imdb.com/pub/movies.list.gz'
imdb_titles = None
imdb_info_tags = ('year', 'genre', 'tagline', 'plot', 'rating', 'runtime');


# headers for urllib2
txdata = None
txheaders = {   
    'User-Agent': 'freevo %s (%s)' % (freevo_version, sys.platform),
    'Accept-Language': 'en-us',
}

#Begin class

class FxdImdb:
    """Class for creating fxd files and fetching imdb information"""
    
    def __init__(self):
        """Initialise class instance"""
    
        #these are considered as private variables - don't mess with them unless no other choise is given
        #fyi, the other choice always exists : add a subroutine or ask :)
        
        self.imdb_id_list = []
        self.imdb_id = None
        self.isdiscset = FALSE
        self.title = ''
        self.info = {}
        
        self.image = None # full path image filename
        self.image_urls = [] # possible image url list
        self.image_url = None # final image url 
        
        self.fxdfile = None # filename, full path, WITHOUT extension

        self.append = FALSE
        self.device = None
        self.regexp = None
        self.mpl_global_opt = None
        self.media_id = None
        self.file_opts = []
        self.video = []
        self.variant = []
        self.parts = []
        self.var_mplopt = []
        self.var_names = []
        
        #initialize self.info    
        for t in imdb_info_tags:
            self.info[t] = ""
            
        #image_url_handler stuff
        self.image_url_handler = {}
        self.image_url_handler['www.impawards.com'] = self.impawards
        
    
    
    def searchImdb(self, name):
        """name (string), returns id list
        Search for name and returns an id list with tuples:
            (id , name, year, type)"""

        url = 'http://us.imdb.com/Tsearch?title=%s&restrict=Movies+and+TV' % urllib.quote(name)
        req = urllib2.Request(url, txdata, txheaders)
    
        try:
            response = urllib2.urlopen(req)
        except urllib2.HTTPError, error:
            raise FxdImdb_Net_Error("IMDB unreachable : " + error) 
            return None
            
        regexp_title = re.compile('.*<LI><A HREF="/Title\?([0-9]*)">(.*) '+
                                   '\(([12][0-9][0-9][0-9].*)\)</A>')
        regexp_type  = re.compile('<H2><A NAME=.*>(.*)</A></H2>')
        
        type = ''
    
        m = re.match('http://.*imdb.com/Title\?([0-9]*)', response.geturl())
        if m:
            data = self.parsedata(response)
            self.imdb_id_list = [ ( m.group(1), data[0], data[1]['year'], '' ) ]
            return self.imdb_id_list
        
        for line in response.read().split("\n"):
            m = regexp_type.match(line)
            if m:
                type = m.group(1)
                if type == 'Movies':
                    type = 'Movie'
                elif type == 'TV-Movies':
                    type = 'TV-Movie'
    
            m = regexp_title.match(line)
    
            if m and not type == 'Video Games':
                id   = m.group(1)
                name = m.group(2)
                year = m.group(3)
    
                if name[0] == '"' and name [-1] == '"':
                    name=name[1:-1]
                if year.find(')') > 0:
                    year = year[:year.find(')')]
    
                for i in range(len(self.imdb_id_list)):
                    if self.imdb_id_list == id:
                        self.imdb_id_list = ( id , name, year, type )
                        break
                else:
                    self.imdb_id_list += [ ( id , name, year, type ) ]
        
        response.close()
        return self.imdb_id_list
    
    
    
    def setImdbId(self, id):
        """id (number)
        Set an imdb_id number for object, and fetch data"""
        self.imdb_id = id
        
        url = 'http://us.imdb.com/Title?%s' % id
        req = urllib2.Request(url, txdata, txheaders)
        
        try:
            idpage = urllib2.urlopen(req)
        except urllib2.HTTPError, error:
            raise FxdImdb_Net_Error("IMDB unreachable" + error)
            return None
    
        self.parsedata(idpage, id)
        idpage.close()
    
    def setFxdFile(self, fxdfilename = None, overwrite = FALSE):
        """fxdfilename (string, full path)
        Set fxd file to write to, may be omitted, may be an existing file (data will be added)
            unless overwrite = TRUE"""
        
        if fxdfilename: 
            if os.path.splitext(fxdfilename)[1] == '.fxd':
                self.fxdfile = os.path.splitext(fxdfilename)[0]
            else: self.fxdfile = fxdfilename
        
        else:
            if self.isdiscset == TRUE:
                self.fxdfile = os.path.join(config.MOVIE_DATA_DIR , self.getmedia_id(self.device))
            else:
                #unwrap first video tuple
                type, none, device, none, file = self.video[0]
                if type == 'file' and device == None: self.fxdfile = os.path.splitext(file)[0]
                else: self.fxdfile = os.path.join(config.MOVIE_DATA_DIR , self.getmedia_id(device))
        
        if overwrite == FALSE:
            try:
                open(self.fxdfile + '.fxd')
                self.append = TRUE
            except: 
                pass
        else: self.append = FALSE

        if (self.append == TRUE) and (save_parseMovieFile(self.fxdfile + '.fxd', None, []) == []):
            raise FxdImdb_XML_Error("FXD file to be updated is invalid, please correct it.")
            
    
    def setVideo(self, *videos, **mplayer_opt):
        """videos (tuple (type, id-ref, device, mplayer-opts, file/param) (multiple allowed), 
                global_mplayer_opts
        Set media file(s) for fxd"""
        if self.isdiscset == TRUE:
            raise FxdImdb_XML_Error("<disc-set> already used, can't use both <movie> and <disc-set>")
        
        if videos:
            for video in videos:
                self.video += [ video ]
        if mplayer_opt and 'mplayer_opt' in mpl_global_opt: self.mpl_global_opt = mplayer_opt['mplayer_opt']
        
    def setVariants(self, *parts, **mplayer_opt):
        """ variants/parts (tuple (name, ref, mpl_opts, sub, s_dev, audio, a_dev)),
                    var_mplayer_opts
        Set Variants & parts"""
        if self.isdiscset == TRUE:
            raise FxdImdb_XML_Error("<disc-set> already used, can't use both <movie> and <disc-set>")
 
        if mplayer_opt and 'mplayer_opt' in mpl_global_opt: self.varmpl_opt = (mplayer_opt['mplayer_opt'])
        for part in parts:
            self.variant += [ part ]
        
    
    def writeFxd(self):
        """Write fxd file"""
        #if fxdfile is empty, set it yourself
        if not self.fxdfile: self.setFxdFile()
        
        #should we add to an existing file?
        if self.append == TRUE :
            if self.isdiscset == TRUE:
                self.update_discset()
            else: self.update_movie()
        else:
            #fetch images
            self.fetch_image()
            #should we write a disc-set ?
            if self.isdiscset == TRUE:
                self.write_discset()
            else: self.write_movie()
            
        #check fxd 

        if save_parseMovieFile(self.fxdfile + '.fxd', None, []) == []:
            raise FxdImdb_XML_Error("""FXD file generated is invalid, please post bugreport, tracebacks and fxd file.""")

        
        
    
    def setDiscset(self, device, regexp, *file_opts, **mpl_global_opt):
        """ device (string), regexp (string), file_opts (tuple (mplayer-opts,file)), mpl_global_opt (string)
        Set media is dvd/vcd, """
        if len(self.video) != 0 or len(self.variant) != 0:
            raise FxdImdb_XML_Error("<movie> already used, can't use both <movie> and <disc-set>")
        
        self.isdiscset = TRUE
        if (not device and not regexp) or (device and regexp):
            raise FxdImdb_XML_Error("Can't use both media-id and regexp")
            
        self.device = device
        self.regexp = regexp
        
        for opts in file_opts:
            self.file_opts += [ opts ]
            
        if mpl_global_opt and 'mplayer_opt' in mpl_global_opt: 
            self.mpl_global_opt = (mpl_global_opt['mplayer_opt'])
            
    
    def isDiscset(self):
        """Check if fxd file describes a disc-set, returns 1 for true, 0 for false
        None for invalid file"""
        try:
            file = open(self.fxdfile + '.fxd')
        except IOError:
            return None
            
        content = file.read()
        file.close()
        if content.find('</disc-set>') != -1: return 1
        return 0

        
    def guessImdb(self, filename, label=FALSE):
        """Guess possible imdb movies from filename. Same return as searchImdb"""

        name = filename
        
        name  = os.path.basename(os.path.splitext(name)[0])
        name  = re.sub('([a-z])([A-Z])', point_maker, name)
        name  = re.sub('([a-zA-Z])([0-9])', point_maker, name)
        name  = re.sub('([0-9])([a-zA-Z])', point_maker, name.lower())
        
        if label == TRUE:
            for r in config.IMDB_REMOVE_FROM_LABEL:
                name  = re.sub(r, '', name)
        
        parts = re.split('[\._ -]', name)
        
        name = ''
        for p in parts:
            if not p.lower() in config.IMDB_REMOVE_FROM_SEARCHSTRING:
                name += '%s ' % p

        return self.searchImdb(name)

        
#------ private functions below .....

    def write_discset(self):
        """Write a <disc-set> to a fresh file"""        
    
        try:
            i = codecs.open( (self.fxdfile + '.fxd') , 'w', encoding='utf-8')
        except IOError, error:
            raise FxdImdb_IO_Error("Writing FXD file failed : " + str(error))
            return 
    
        #header
        i.write("<?xml version=\"1.0\" ?>\n<freevo>\n")
        i.write("  <copyright>\n" +
                "    The information in this file are from the Internet " +
                "Movie Database (IMDb).\n" +
                "    Please visit http://www.imdb.com for more informations.\n")
        i.write("    <source url=\"http://www.imdb.com/Title?%s\"/>\n"  % self.imdb_id +
                "  </copyright>\n")
        #disc-set    
        i.write("  <disc-set title=\"%s\">\n" % self.str2XML(self.title))
        #disc
        i.write("    <disc")
        if self.device:
            i.write(" media-id=\"%s\"" % self.str2XML(self.getmedia_id(self.device)))
        elif self.regexp:
            i.write(" label-regexp=\"%s\"" % self.str2XML(self.regexp))
        if self.mpl_global_opt:
            i.write(" mplayer-options=\"%s\">" % self.str2XML(self.mpl_global_opt))
        else: i.write(">")
        #file-opts
        if self.file_opts:
            i.write("\n")
            for opts in self.file_opts:
                mplopts, fname = opts 
                i.write("      <file-opt mplayer-options=\"%s\">" % self.str2XML(mplopts))
                i.write("%s</file-opt>\n" % self.str2XML(fname))
            i.write("    </disc>\n")
        else: i.write("    </disc>\n")
        
        #image
        if self.image:
            i.write("    <cover-img source=\"%s\">" % self.str2XML(self.image_url))
            i.write("%s</cover-img>\n" % self.str2XML(self.image))
        #print info
        i.write(self.print_info())
        
        #close tags     
        i.write("  </disc-set>\n")    
        i.write("</freevo>\n")
    
        os.system('touch /tmp/freevo-rebuild-database')
        
    def write_movie(self):
        """Write <movie> to fxd file"""
        
        try:
            i = codecs.open( (self.fxdfile + '.fxd') , 'w', encoding='utf-8')
        except IOError, error:
            raise FxdImdb_IO_Error("Writing FXD file failed : " + str(error))
            return 
        
        #header
        i.write("<?xml version=\"1.0\" ?>\n<freevo>\n")
        i.write("  <copyright>\n" +
                "    The information in this file are from the Internet " +
                "Movie Database (IMDb).\n" +
                "    Please visit http://www.imdb.com for more informations.\n")
        i.write("    <source url=\"http://www.imdb.com/Title?%s\"/>\n"  % self.imdb_id +
                "  </copyright>\n")
        # write movie
        i.write("  <movie title=\"%s\">\n" % self.str2XML(self.title))
        #image
        if self.image:
            i.write("    <cover-img source=\"%s\">" % self.str2XML(self.image_url))
            i.write("%s</cover-img>\n" % self.str2XML(self.image))
        #video
        if self.mpl_global_opt:
            i.write("    <video mplayer-options=\"%s\">\n" % self.str2XML(self.mpl_global_opt))
        else: i.write("    <video>\n")
        # videos
        i.write(self.print_video())
        i.write('    </video>\n')
        #variants <varinats !!
        if len(self.variant) != 0:
            i.write('    <variants>\n')
            i.write(self.print_variant())
            i.write('    </variants>\n')
        
        #info
        i.write(self.print_info())
        #close tags
        i.write('  </movie>\n')
        i.write('</freevo>\n')     
        
        os.system('touch /tmp/freevo-rebuild-database')
        
    def update_movie(self):
        """Updates an existing file, adds exftra dvd|vcd|file and variant tags"""
        passedvid = FALSE
        #read existing file in memory
        try:
            file = open(self.fxdfile + '.fxd')
        except IOError, error:
            raise FxdImdb_IO_Error("Updating FXD file failed : " + str(error))
            return
            
        content = file.read()
        file.close()
        
        if content.find('</video>') == -1:
            raise FxdImdb_XML_Error("FXD cannot be updated, doesn't contain <video> tag")

        regexp_variant_start = re.compile('.*<variants>.*', re.I)
        regexp_variant_end = re.compile(' *</variants>', re.I)
        regexp_video_end  = re.compile(' *</video>', re.I)
    
        file = open(self.fxdfile + '.fxd', 'w')
    

        for line in content.split('\n'):
            if passedvid == TRUE and content.find('<variants>') == -1:
                #there is no variants tag
                if len(self.variant) != 0:
                    file.write('    <variants>\n')
                    file.write(self.print_variant())
                    file.write('    </variants>\n')
                file.write(line + '\n')
                passedvid = FALSE
                
            elif regexp_video_end.match(line):
                if len(self.video) != 0:
                    file.write(self.print_video())
                file.write(line + '\n')
                passedvid = TRUE
                
            elif regexp_variant_end.match(line):
                if len(self.variant) != 0:
                    file.write(self.print_variant())
                file.write(line + '\n')
                
            else: file.write(line + '\n')
            
        file.close()
        os.system('touch /tmp/freevo-rebuild-database')
        
    def update_discset(self):
        """Updates an existing file, adds extra disc in discset"""
        
        #read existing file in memory
        try:
            file = open(self.fxdfile + '.fxd')
        except IOError, error:
            raise FxdImdb_IO_Error("Updating FXD file failed : " + str(error))
            return
            
        content = file.read()
        file.close()
        
        if content.find('</disc-set>') == -1:
            raise FxdImdb_XML_Error("FXD file cannot be updated, doesn't contain <disc-set>")
            
        regexp_discset_end  = re.compile(' *</disc-set>', re.I)
    
        file = open(self.fxdfile + '.fxd', 'w')
    
        for line in content.split('\n'):
                
            if regexp_discset_end.match(line):
                file.write("    <disc")
                if self.device:
                    file.write(" media-id=\"%s\"" % self.str2XML(self.getmedia_id(self.device)))
                elif self.regexp:
                    file.write(" label-regexp=\"%s\"" % self.str2XML(self.regexp))
                if self.mpl_global_opt:
                    file.write(" mplayer-options=\"%s\">" % self.str2XML(self.mpl_global_opt))
                else: file.write(">")
                #file-opts
                if self.file_opts:
                    file.write("\n")
                    for opts in self.file_opts:
                        mplopts, fname = opts 
                        file.write("      <file-opt mplayer-options=\"%s\">" % self.str2XML(mplopts))
                        file.write("%s</file-opt>\n" % self.str2XML(fname))
                    file.write("    </disc>\n")
                else: file.write("    </disc>\n")
                file.write(line + '\n')
                
            else: file.write(line + '\n')
            
        file.close()
        os.system('touch /tmp/freevo-rebuild-database')
        
    
    def parsedata(self, results, id=0):
        """results (imdb html page), imdb_id
        Returns tuple of (title, info(dict), image_urls)"""

        dvd = 0
        
        regexp_title   = re.compile('.*STRONG CLASS="title">(.*?)<', re.I)
        regexp_year    = re.compile('.*<A HREF="/Sections/Years/.*?([0-9]*)<', re.I)
        regexp_genre   = re.compile('.*href="/Sections/Genres(.*)$', re.I)
        regexp_tagline = re.compile('.*<B CLASS="ch">Tagline.*?</B>(.*?)<', re.I)
        regexp_plot1   = re.compile('.*<B CLASS="ch">Plot Outline.*?</B>(.*?)<', re.I)
        regexp_plot2   = re.compile('.*<B CLASS="ch">Plot Summary.*?</B>(.*?)<', re.I)
        regexp_rating  = re.compile('.*<B>([0-9\.]*)/10</B> (.[0-9,]* votes.?)', re.I)
        regexp_image   = re.compile('.*ALT="cover".*src="(http://.*?)"', re.I)
        regexp_runtime = re.compile('.*<b class="ch">Runtime', re.I)
        regexp_dvd     = re.compile('.*<a href="/DVD\?', re.I)
    
        regexp_dvd_image = re.compile('.*(http://images.amazon.com.*?ZZZZZ.*?)"')
        regexp_url   = re.compile('.*href="(http.*?)"', re.I)
    
        next_line_is = None
    
        for line in results.read().split("\n"):
            if next_line_is == 'runtime':
                next_line_is = None
                self.info['runtime'] = self.str2XML(line)
    
            if regexp_runtime.match(line):
                next_line_is = 'runtime'
                continue
    
            m = regexp_title.match(line)
            if m: self.title = self.str2XML(m.group(1))
    
            m = regexp_year.match(line)
            if m: self.info['year'] = m.group(1)
    
            m = regexp_genre.match(line)
            if m:
                for g in re.compile(' *</A>.*?> *', re.I).split(' </a>'+line+' > '):
                    if self.info['genre'] == "": self.info['genre'] = g
                    elif g != "" and g != "(more)": self.info['genre'] += " / "+ g
    
    
            m = regexp_tagline.match('%s<' % line)
            if m:
                self.info['tagline'] = self.str2XML(re.compile('[\t ]+').sub(" ", ' ' + m.group(1))[1:])
    
            m = regexp_plot1.match('%s<' % line)
            if m: self.info['plot'] = self.str2XML(re.compile('[\t ]+').sub(" ", ' ' + m.group(1))[1:])
    
            m = regexp_plot2.match('%s<' % line)
            if m: self.info['plot'] = self.str2XML(re.compile('[\t ]+').sub(" ", ' ' + m.group(1))[1:])
    
            m = regexp_rating.match(line)
            if m: self.info['rating'] = m.group(1) + '/10 ' + m.group(2)
    
            m = regexp_dvd.match(line)
            if m: dvd = 1
    
            m = regexp_image.match(line)
            if m: self.image_urls += [ m.group(1) ]
    
    
        if not id:
            return (self.title, self.info, self.image_urls)
    
    
        if dvd:
            url = 'http://us.imdb.com/DVD?%s' % id
            req = urllib2.Request(url, txdata, txheaders)
            
            try:
                r = urllib2.urlopen(req)
                for line in r.read().split("\n"):
                    m = regexp_dvd_image.match(line)
                    if m: self.image_urls += [ m.group(1) ]
                r.close()
            except urllib2.HTTPError, error:
                pass
    
        #oldcode
        #if not self.image_url_handler:
        #    return #(title, info, image_urls)
    
        url = 'http://us.imdb.com/Posters?%s' % id
        req = urllib2.Request(url, txdata, txheaders)
        try:
            r = urllib2.urlopen(req)
        except urllib2.HTTPError, error:
            print error
            return (self.title, self.info, self.image_urls)
    
        for line in r.read().split("\n"):
            m = regexp_url.match(line)
            if m:
                url = urlparse.urlsplit(m.group(1))
                if url[0] == 'http' and self.image_url_handler.has_key(url[1]):
                    self.image_urls += self.image_url_handler[url[1]](url[1], url[2])
        
        r.close()
        return (self.title, self.info, self.image_urls)
    
    
    def impawards(self, host, path):
        """parser for posters from www.impawards.com. TODO: check for licences
        of each poster and add all posters"""
        
        path = '%s/posters/%s.jpg' % (path[:path.rfind('/')], \
                                      path[path.rfind('/')+1:path.rfind('.')])
        return [ 'http://%s%s' % (host, path) ]
    
    
    def fetch_image(self):
        """Fetch the best image"""
        image_len = 0
        for image in self.image_urls:
            try:
                # get sizes of images
                req = urllib2.Request(image, txdata, txheaders)
                r = urllib2.urlopen(req)
                length = r.info()['Content-Length']
                r.close()
                if length > image_len:
                    image_len = length
                    self.image_url = image
            except:
                pass
        if not self.image_url:
            print "Image dowloading failed"
            return
        
        self.image = (self.fxdfile + '.jpg')
        
        req = urllib2.Request(self.image_url, txdata, txheaders)
        r = urllib2.urlopen(req)
        i = open(self.image, 'w')
        i.write(r.read())
        i.close()
        r.close()
        
        # try to crop the image to avoid borders by imdb 
        try:
            import Image
            image = Image.open(filename)
            width, height = image.size
            image.crop((2,2,width-4, height-4)).save(filename)
        except:
            pass
        
        print "Downloaded cover image from %s" % self.image_url
        print "Freevo knows nothing about the copyright of this image, please"
        print "go to %s to check for more informations about private." % self.image_url
        print "use of this image"
            
    def str2XML(self, line):
        """return a valid XML string"""
        
        try:
            # s = unicode(string.replace(line, "&", "&amp;"), 'latin-1')
            s = unicode(line, 'latin-1')
            while s[-1] == ' ':
                s = s[:-1]
            if s[:4] == '&#34':
                s = s[5:]
            if s[-4:] == '#34;':
                s = s[:-5]
            return s
        except:
            return line
    
    def getmedia_id(self, drive):
        """drive (device string)
        return a unique identifier for the disc"""

        if not os.path.exists(drive): return drive
        
        try:
            img = open(drive)
            img.seek(0x0000832d)
            id = img.read(16)
            img.seek(32808, 0)
            label = img.read(32)
            
            LABEL_REGEXP = re.compile("^(.*[^ ]) *$").match
            m = LABEL_REGEXP(label)
        except IOError:
            raise FxdImdb_IO_Error('No disc in drive %s' % drive)
            
            
        if m:
            label = m.group(1)
        img.close()
        
        return id+label
        
    def print_info(self):
        """return info part for FXD writing""" 
        ret = ''
        if self.info:
            ret = '    <info>\n'
            for k in self.info.keys():
                ret += '      <%s>%s</%s>\n' % (k, self.info[k], k)
            ret += '    </info>\n'
        return ret
        
    def print_video(self):
        """return info part for FXD writing""" 
        ret = ''
        for vid in self.video:
            type, idref, device, mpl_opts, fname = vid
            ret += '      <%s' % self.str2XML(type)
            ret += ' id=\"%s\"' % self.str2XML(idref)
            if device: ret += ' media-id=\"%s\"' % self.str2XML(self.getmedia_id(device))
            if mpl_opts: ret += ' mplayer-options=\"%s\">' % self.str2XML(mpl_opts)
            else: ret += '>'
            ret += '%s' % self.str2XML(fname)
            ret += '</%s>\n' % self.str2XML(type)
        return ret
        
    def print_variant(self):
        """return info part for FXD writing""" 
        ret = ''
        for x in range(len(self.variant)):
            name, idref, mpl_opts, sub, s_dev, audio, a_dev = self.variant[x]
            
            ret += '      <variant name=\"%s\"' % self.str2XML(name)
            if self.varmpl_opt: ret += ' mplayer-options=\"%s\">\n' % self.str2XML(self.varmpl_opt)
            else: ret += '>\n'
            ret += '         <part ref=\"%s\"' % self.str2XML(idref)
            if mpl_opts: ret += ' mplayer-options=\"%s\">\n' % self.str2XML(mpl_opts)
            else: ret += ">\n"
            if sub:
                ret += '          <subtitle'
                if s_dev: ret += ' media-id=\"%s\">' % self.str2XML(self.getmedia_id(s_dev))
                else: ret += '>'
                ret += '%s</subtitle>\n' % self.str2XML(sub)
            if audio:
                ret += '          <audio'
                if a_dev: ret += ' media-id=\"%s\">' % self.str2XML(self.getmedia_id(a_dev))
                else: ret += '>'
                ret += '%s</audio>\n' % self.str2XML(audio)
            ret += '        </part>\n'
            ret += '      </variant>\n'
        
        return ret
        

#--------- Exception class

class Error(Exception):
    """Base class for exceptions in Imdb_Fxd"""
    def __str__(self):
        return self.message
    def __init__(self, message):
        self.message = message
        
class FxdImdb_Error(Error):
    """used to raise exceptions"""
    pass
        
class FxdImdb_XML_Error(Error):
    """used to raise exceptions"""
    pass
        
class FxdImdb_IO_Error(Error):
    """used to raise exceptions"""
    pass
    
class FxdImdb_Net_Error(Error):
    """used to raise exceptions"""
    pass
            
#------- Helper functions for creating tuples - these functions are classless

def makeVideo(type, id_ref, file, **values):
    """Create a video tuple"""
    device = mplayer_opt = None
    types = ['dvd', 'file', 'vcd']
    if type == None or id_ref == None or file == None:
        raise FxdImdb_XML_Error("Required values missing for tuple creation")

    if type not in types:
        raise FxdImdb_XML_Error("Invalid type passed to makeVideo")
        
    if values:
        print values
        if 'device' in values: device = values['device']
        if 'mplayer_opt' in values: mplayer_opt = values['mplayer_opt']
    
    if device:
        file = relative_path(file)
        
    t = type, id_ref, device, mplayer_opt, file
    return t
    
def makePart(name, id_ref, **values):
    """Create a part tuple"""
    mplayer_opt = sub = s_dev = audio = a_dev = None

    if id_ref == None or name == None:
        raise FxdImdb_XML_Error("Required values missing for tuple creation")
        
    if values:
        if 'mplayer_opt' in values: mplayer_opt = values['mplayer_opt']
        if 'sub' in values: sub = values['sub']
        if 's_dev' in values: s_dev = values['s_dev']
        if 'audio' in values: audio = values['audio']
        if 'a_dev' in values: a_dev = values['a_dev']
    if a_dev: audio = relative_path(audio)
    if s_dev: sub = relative_path(sub)
    t = name, id_ref, mplayer_opt, sub, s_dev, audio, a_dev
    return t
    
def makeFile_opt(mplayer_opt, file):
    """Create a file_opt tuple"""
    if mplayer_opt == None or file == None:
        raise FxdImdb_XML_Error("Required values missing for tuple creation")
    file = relative_path(file)        
    t = mplayer_opt, file
    
    return t

#--------- classless private functions
    
def relative_path(filename):
    """return the relative path to a mount point for a file on a removable disc"""
    from os.path import isabs, ismount, split, join
    
    if not isabs(filename) and not ismount(filename): return filename
    drivepaths = []
    for item in config.REMOVABLE_MEDIA:
        drivepaths.append(item.mountdir)
    for path in drivepaths:
        if filename.find(path) != -1:
            head = filename
            tail = ''
            while (head != path):
                x = split(head)
                head = x[0]
                if x[0] == '/' and x[1] == '' : return filename
                elif tail == '': tail = x[1]
                else: tail = join(x[1], tail)
                
            if head == path: return tail
    
    return filename
    
def point_maker(matching):
    return '%s.%s' % (matching.groups()[0], matching.groups()[1])

diff -Naur freevo-cleancvs/src/video/plugins/imdb.py freevo-clean/src/video/plugins/imdb.py
--- freevo-cleancvs/src/video/plugins/imdb.py	2003-07-10 05:30:23.000000000 +0200
+++ freevo-clean/src/video/plugins/imdb.py	2003-07-10 14:43:42.000000000 +0200
@@ -15,6 +15,9 @@
 #
 # -----------------------------------------------------------------------
 # $Log: imdb.py,v $
+# Revision 1.16 2003/07/10 15:00:00 den_RDC
+# rewrite to use fxdimdb
+#
 # Revision 1.15  2003/07/08 20:02:27  dischi
 # small bugfix
 #
@@ -74,28 +77,23 @@
 import config
 import plugin
 import re
+import time
+from helpers.fxdimdb import FxdImdb, makeVideo, point_maker
 
 from gui.PopupBox import PopupBox
 
-
-def point_maker(matching):
-    """
-    small help function to split a movie name into parts
-    """
-    return '%s.%s' % (matching.groups()[0], matching.groups()[1])
-
-
+FALSE = 0
+TRUE = 1
 
 class PluginInterface(plugin.ItemPlugin):
+    
     def imdb_get_disc_searchstring(self, item):
         name  = item.media.label
         name  = re.sub('([a-z])([A-Z])', point_maker, name)
         name  = re.sub('([a-zA-Z])([0-9])', point_maker, name)
         name  = re.sub('([0-9])([a-zA-Z])', point_maker, name.lower())
-
         for r in config.IMDB_REMOVE_FROM_LABEL:
             name  = re.sub(r, '', name)
-
         parts = re.split('[\._ -]', name)
         
         name = ''
@@ -106,109 +104,42 @@
             return name[:-1]
         else:
             return ''
-
         
     def actions(self, item):
         self.item = item
+        print item.type
+        print item.mode
         if item.type == 'video'  and not hasattr(item, 'fxd_file'):
-            print item.mode
             if item.mode == 'file':
-                return [ ( self.imdb_search_file, 'Search IMDB for this file',
+                return [ ( self.imdb_search , 'Search IMDB for this file',
                            'imdb_search_or_cover_search') ]
             if item.mode in ('dvd', 'vcd'):
                 s = self.imdb_get_disc_searchstring(self.item)
-                print s
+                print 'Almost'
                 if s:
-                    return [ ( self.imdb_search_disc, 'Search IMDB for [%s]' % s,
+                    print 'YES'
+                    return [ ( self.imdb_search , 'Search IMDB for [%s]' % s,
                                'imdb_search_or_cover_search') ]
         return []
-
-
-    def imdb_search_disc(self, arg=None, menuw=None):
-        """
-        search imdb for this disc item
-        """
-        import helpers.imdb
-
-        box = PopupBox(text='searching IMDB...')
-        box.show()
-        
-        name = self.imdb_get_disc_searchstring(self.item)
-        items = []
-        try:
-            for id,name,year,type in helpers.imdb.search(name):
-                items += [ menu.MenuItem('%s (%s, %s)' % (name, year, type),
-                                         self.imdb_create_fxd_disc, (id, year)) ]
-            moviemenu = menu.Menu('IMDB QUERY', items)
-        except:
-            box.destroy()
-            box = PopupBox(text='Unknown error while connecting to IMDB')
-            box.show()
-            time.sleep(2)
-            box.destroy()
-            return
-
-        box.destroy()
-        menuw.pushmenu(moviemenu)
-
-
-    def imdb_create_fxd_disc(self, arg=None, menuw=None):
-        """
-        create fxd file for the disc item
-        """
-        import helpers.imdb
-
-        box = PopupBox(text='getting data...')
-        box.show()
-        
-        filename = os.path.join(config.MOVIE_DATA_DIR, self.item.media.id)
-
-        # bad hack to set the drive, helpers/imdb.py really needs
-        # a bigger update
-        helpers.imdb.drive = self.item.media.devicename
-        helpers.imdb.get_data_and_write_fxd(arg[0], filename,
-                                            self.item.media.devicename,
-                                            None, (self.item.mode, ), None)
-
-        # check if we have to go one menu back (called directly) or
-        # two (called from the item menu)
-        back = 1
-        if menuw.menustack[-2].selected != self.item:
-            back = 2
-            
-        # go back in menustack
-        for i in range(back):
-            menuw.delete_menu()
-        
-        box.destroy()
-
             
-    def imdb_search_file(self, arg=None, menuw=None):
+    def imdb_search(self, arg=None, menuw=None):
         """
         search imdb for this item
         """
-        import helpers.imdb
-
+        fxd = FxdImdb()
+        
         box = PopupBox(text='searching IMDB...')
         box.show()
-        
-        name = self.item.name
-        
-        name  = os.path.basename(os.path.splitext(name)[0])
-        name  = re.sub('([a-z])([A-Z])', point_maker, name)
-        name  = re.sub('([a-zA-Z])([0-9])', point_maker, name)
-        name  = re.sub('([0-9])([a-zA-Z])', point_maker, name.lower())
-        parts = re.split('[\._ -]', name)
-        
-        name = ''
-        for p in parts:
-            if not p.lower() in config.IMDB_REMOVE_FROM_SEARCHSTRING:
-                name += '%s ' % p
 
         items = []
+        
+        if self.item.mode in ('dvd', 'vcd'):
+            disc = TRUE
+        else:
+            disc = FALSE
 
         try:
-            for id,name,year,type in helpers.imdb.search(name):
+            for id,name,year,type in fxd.guessImdb(self.item.name, disc):
                 items += [ menu.MenuItem('%s (%s, %s)' % (name, year, type),
                                          self.imdb_create_fxd, (id, year)) ]
         except:
@@ -240,24 +171,25 @@
         """
         create fxd file for the item
         """
-        import helpers.imdb
         import directory
+        fxd = FxdImdb()
         
         box = PopupBox(text='getting data...')
         box.show()
         
-        if self.item.media and self.item.media.id: #if this exists we got a cdrom/dvdrom
-            filename = os.path.join(config.MOVIE_DATA_DIR, self.item.media.id)
-            device   = self.item.media.devicename
-            # bad hack to set the drive, helpers/imdb.py really needs
-            # a bigger update
-            helpers.imdb.drive = self.item.media.devicename
+        if self.item.media and self.item.media.devicename: #if this exists we got a cdrom/dvdrom
+            devicename = self.item.media.devicename
+        else: devicename = None
+        
+        fxd.setImdbId(arg[0])
+        
+        if self.item.mode in ('dvd', 'vcd'):
+            fxd.setDiscset(devicename, None)
         else:
-            filename = os.path.splitext(self.item.filename)[0]
-            device   = None
+            video = makeVideo('file', 'f1', self.item.filename, device=devicename)
+            fxd.setVideo(video)
 
-        helpers.imdb.get_data_and_write_fxd(arg[0], filename, device, None,
-                                            (os.path.basename(self.item.filename), ), None)
+        fxd.writeFxd()
 
         # check if we have to go one menu back (called directly) or
         # two (called from the item menu)

Title: fxdimdb documentation

fxdimdb.py documentation

by den_RDC - doc v0.11 - fxdimdb.py v0.1

Welcome.

This document tries to explain in a short and easy way how u can use fxdimdb.py's functions in order to create FXD files conforming to the Freevo XML DTD. This module is a rewrite of the old helpers.imdb module. It contains some function (mainly imdb page parsing and searching) from the old module, the rest is all written from scratch. If u encounter a bug in this document or in the module, please be so kind to report it on freevo-devel or #freevo (irc.freenode.net).

All functions wich are considered "private" are removed from this documentation. Nothing stops u from looking at the source ofcourse or running pydoc on the module. However, be aware that those functions could mess up the class variables.

Some knowledge about the FXD structure is required to use this module. i'll try to give a brief description. FXD files can be divided in 2 main category's: <disc-set> (used in setDiscset) and <movie> (used in setVideo and setVariants). Both can contain files and disc's on different (or removable) media. <disc-set> is mainly targeted on disks and disk collections, and has no options to include files on HD. <movie> is mainly targeted on files, but also for files on removable media and dvd/vcd's. For single DVD's, VCD's or a set of DVD's/VCD's, the use of discset is preferred. For all other combinations, a <movie> tag is preferred. <movie> and <disc-set> are mutually exclusive, so trying to use them both on the same FXD file will raise an FxdImdb_XML_Error with an explanation what violated the FXD DTD. If u want to know more about the Freevo FXD layout, look inside Docs/freevo_XML_data.DTD .

Notes. 1) Adding imdb information to existing files is not yet implemented, as is changing existingattributes or values. Stuff can only be added (wich should be enough for most people). You are free to add such functionality. 2) Image searching/downloading is done automaticaly. As i cannot determine wich file is the biggest/best quality, all headers are downloaded Content-Length is extracted. The largest file gets downloaded. (tell me if u know another way !) 3) Everything is fully proxyserver (transparent and regular) compliant. 4) If necessary, media_id's can be past instead of devicenames and they will make it into the fxd. However, no checking is possible to see if the media_id is valid, so be carefull. 5) If an exeisting fxd file is to be "appended", it's first passed through xml_parser to check wether it's valid. It doesn't make much sense to update invalid files. 6) Every output generated by fxdimdb.py is also (although temporarly) passed through xml_parser to make bug searching faster and easier.

Thx - den_RDC ps. my spell checker doesn't work, so blame all spelling mistakes on me :) Most recent version of this document is always found here.

Modules

codecs
os

re
string

sys
urllib

urllib2
urlparse

Classes

exceptions.Exception

Error

FxdImdb_Error
FxdImdb_IO_Error
FxdImdb_Net_Error
FxdImdb_XML_Error

FxdImdb

class FxdImdb

Class for creating fxd files and fetching imdb information

Methods defined here:

guessImdb(self, filename, LABEL=FALSE): Guess possible imdb movies from filename. Same return as searchImdb. Path and extension are stripped if present. If label is set to true, IMDB_REMOVE_FROM_LABEL is used instead of IMDB_REMOVE_FROM_SEARCHSTRING

isDiscset(self): Check if fxd file describes a disc-set, returns 1 for true, 0 for false None for invalid file This is useful when adding extra files/discs/etc to an XML file. This will always return None if you didn't call setFxdfile.

searchImdb(self, name): name (string), returns id list Search for name and returns an id list with tuples: (imdb_id , name, year, type)

setDiscset(self, device, regexp, *file_opts, **mpl_global_opt): device (string), regexp (string), file_opts (tuple (mplayer-opts,file)), mpl_global_opt (string) Set media is dvd/vcd Use this function to create a <disc-set>. Either device OR regexp needs to be set, setting them both violates the DTD an results in an FxdImdb_XML_Error. file_opts are optional, and can be made with makeFile_opts. Multiple file_opts tuples can be passed separated by commas. Optionally, u can pass a keyword (mplayer_opts=value) at the end to set the default mplayer options for all files an that disc. This function is also used when updating FXD files. file_opts of an existing disc cannot be updated at the moment.

setFxdFile(self, fxdfilename=None, overwrite=FALSE): fxdfilename (string, full path) Set fxd file to write to, may be omitted, may be an existing file (data will be added) if overwrite is true, the file will be overwritten if it exist. This method isn't mandatory. When u don't call it, the FXD dile gets written to the same path as the first file in <video>. If that happens to be a removable device, or if <disc-set> is used, the fxd file is wriiten to MOVIE_DATA_DIR, with the media_id + label as filename. Doesn't matter if file is passed with or without fxd extension

setImdbId
(self, id): id (number) Set an imdb_id number for object, and fetch data Set a imdb_id number for this FXD. WARNING: Do to code sharing between functions, using searchImdb after setImdbId can and probably will invalidate all imdb-related setting.

setVariants(self, *parts, **mplayer_opt): variants/parts (tuple (name, ref, mpl_opts, sub, s_dev, audio, a_dev)), mplayer_opt Set Variants & parts Optional. Defines a number of parts (wich include the <variant> tag). These define variations of a file (like different versions, audio tracks and subtitles). The keyword mplayer_opt=value can be passed last to set default mplayer options for all variants/parts. parts can de made by makePart Also usable for updating fxd files

setVideo(self, *videos, **mplayer_opt): videos (tuple (type, id-ref, device, mplayer-opts, file/param) (multiple allowed), global_mplayer_opts Set media file(s) for fxd (<movie>) At least 1 video tuple is required. makeVideo is used to create them, and multiple video tuples can be passed separated by commas. The mplayer_opt keyword works as usual (and is optional ofcourse) Also usable for updating fxd files

writeFxd(self): Write fxd file

Data and non-method functions defined here:

__doc__ = 'Class for creating fxd files and fetching imdb information'

__module__ = 'fxdimdb'

class Error(Error)
	`used to raise exceptions`
		Import this class to catch all FxdImdb errors in one go. All errors belong to one of the subclasses and have an error message that explains what went wrong :) There are 4 subclasses: - FxdImdb_IO_Error - IOErrors, the usual stuff (no disc in drive, file errors) - FxdImdb_XML_Error - You or the class is trying to write non-valid FXD files/elements. - FxdImdb_Error - general error, generally not used at the moment. - FxdImdb_Net_Error - Something wrong with the Net connection. 99% of the cases this is an unplugged lan cable, a disconnected modem or just Imdb.com that's down

Functions

makeFile_opt(mplayer_opt, file): Create a file_opt tuple Both mplayer_opt and file are required. If file has an absolute path it is made realtive to the mountdir.

makePart(name, id_ref, **values): Create a part tuple Name and id_ref (wich points to a video)file are required. The optional values (it makes sens to pass at least 1) are) are passed with keywords mplayer_opt=value - mplayer options for this part sub=value - a subtitle filename (if s_dev is defined, relative path will be calculated) s_dev=value - subtitle media device, media-id will be calculated audio=value - an audio file, analog to suftitle a_dev=value - audio media device, analog to s_dev

makeVideo(type, id_ref, file, **values): Create a video tuple Required: type - can be 'dvd', 'vcd' or 'file' - anything else generates a FxdImdb_XML_Error id_ref - used to link files with variants and vice versa file - the filename (if a device is optionally given, relative path is calculated). this can alse be a dvd pointer like dvd://1 or vcd://4. Optional keywords mplayer_opt=value - this should be familliar device=value - removable device If file has an absolute path, this is made relative to the mountdir.

Data
		FALSE = 0 TRUE = 1 __file__ = './helpers/fxdimdb.py' __name__ = 'fxdimdb' freevo_version = '1.3.2' imdb_info_tags = ('year', 'genre', 'tagline', 'plot', 'rating', 'runtime') imdb_title_list = '/tmp/imdb-movies.list' imdb_title_list_url = 'ftp://ftp.funet.fi/pub/mirrors/ftp.imdb.com/pub/movies.list.gz' imdb_titles = None txdata = None txheaders = {'Accept-Language': 'en-us', 'User-Agent': 'freevo 1.3.2 (linux2)'}

Examples
		#example 1 - <discset> from fxdimdb import FxdImdb, makeFile_opt fxd = FxdImdb() imdblist = fxd.searchImdb('the nightmare before christmas') fxd.setImdbId(imdblist[0][0]) opt1 = makeFile_opt('-alang=nl', '/mnt/cdrom/test.avi') opt2 = makeFile_opt ('-ni -nocache', '/mnt/cdrom/halfbroken.avi') fxd.setDiscset('/dev/dvd', None, opt1, opt2, mplayer_opt="-autoq 0") fxd.writeFxd() #example 2 - <movie> from fxdimdb import FxdImdb, makeVideo, makePart imdblist = fxd.guessImdb('/storage/vid/scifi/matrix.avi') fxd.setImdbId(imdblist[0][0]) video = makeVideo('file', 'f1', '/storage/vid/scifi/matrix.avi') part1 = makePart('Matrix in english', 'f1', audio='en.mp3') part2 = makePart('Matrix en français', 'f1', audio='fr.mp3') part3 = makePart('Matrix in het Nederlands', 'f1', audio='en.mp3', sub='nl.srt') fxd.setVideo(video) fxd.setVariants(part1, part2, part3) fxd.writeFxd()

Re: [Freevo-devel] [NEW] fxdimdb.py class for fxd and imdb handling...

Reply via email to