util

Duncan Webb Tue, 19 Dec 2006 05:32:22 -0800

Author: duncan
Date: Tue Dec 19 13:31:26 2006
New Revision: 8746

Modified:
   branches/rel-1/freevo/src/util/fxdimdb.py


Log:
[ 1618747 ] PATCH: Fix some bugs in IMDB lookup
Patch from Temar applied


Modified: branches/rel-1/freevo/src/util/fxdimdb.py
==============================================================================
--- branches/rel-1/freevo/src/util/fxdimdb.py   (original)
+++ branches/rel-1/freevo/src/util/fxdimdb.py   Tue Dec 19 13:31:26 2006
@@ -590,17 +590,43 @@
         dvd = 0
 
         soup = BeautifulSoup()
-        soup.feed(results.read())
+
+        try:
+            soup.feed(results.read())
+        except UnicodeDecodeError:
+            print "Unicode error; check that /usr/lib/python2.x/site.py has 
the correct default encoding"
+            pass
 
         title = soup.find('strong', {'class':'title'})
         image = soup.find('img', { 'title':title.next.strip() })
 
         self.title = title.next.strip()
         self.info['title'] = self.title
-        self.info['image'] = image['src']
+
+        if image:
+            self.info['image'] = image['src']
+
         self.info['year'] = title.find('a').string.strip()
-        self.info['plot'] = soup.find(text='Plot Outline:').next.strip()
-        self.info['tagline'] = soup.find(text='Tagline:').next.strip()
+
+        # Find Plot Outline/Summary:
+        # Normally the tag is named "Plot Outline:" - however sometimes
+        # the tag is "Plot Summary:". Search for both strings.
+        imdb_result = soup.find(text='Plot Outline:')
+        if not imdb_result:
+            imdb_result = soup.find(text='Plot Summary:')
+        if imdb_result:
+            self.info['plot'] = imdb_result.next.strip()
+        else:
+            self.info['plot'] = u''
+
+        # Find tagline - sometimes the tagline is missing.
+        # Use an empty string if no tagline could be found.
+        imdb_result = soup.find(text='Tagline:')
+        if imdb_result:
+            self.info['tagline'] = imdb_result.next.strip()
+        else:
+            self.info['tagline'] = u''
+
         self.info['genre'] = ''
         genre=soup.find(text='Genre:').parent
         genres = []
@@ -619,6 +645,11 @@
                 print 'items:', k, ':', v
             print 'id:', id, 'dvd:', dvd
 
+        # Add impawards.com poster URLs.
+        self.impawardsimages(self.info['title'], self.info['year'])
+
+        # Add images from IMDB database. These images are much smaller than
+        # the impawards ones.
         if not id:
             return (self.title, self.info, self.image_urls)
 
@@ -639,11 +670,72 @@
                     self.image_urls += [ image['src'] ]
             except urllib2.HTTPError, error:
                 pass
+            except UnicodeDecodeError:
+                # FIXME:
+                # This is a bad hack. Some character could not be converted to 
ascii.
+                # We ignore these errors as it does not really affect the FXD 
output.
+                pass
         print 'image_urls:', self.image_urls
 
         return (self.title, self.info, self.image_urls)
 
 
+    def impawardsimages(self, title, year):
+        """Generate URLs to the impawards movie posters and add them to the
+        global image_urls array."""
+
+        # Format of an impawards.com image URL:
+        #     http://www.impawards.com/<year>/posters/<title>.jpg
+        # 
+        # Some special characters like: blanks, ticks, ':', ','... have to be 
replaced
+        imp_image_name = title.lower()
+        imp_image_name = imp_image_name.replace(u' ', u'_')
+        imp_image_name = imp_image_name.replace(u"'", u'')
+        imp_image_name = imp_image_name.replace(u':', u'')
+        imp_image_name = imp_image_name.replace(u',', u'')
+        imp_image_name = imp_image_name.replace(u';', u'')
+        imp_image_name = imp_image_name.replace(u'.', u'')
+
+        # build up an array with all kind of image urls
+        imp_image_urls = [ ]
+        imp_base_url   = 'http://www.impawards.com/%s/posters' % year
+    
+        # add the normal poster URL to image_urls
+        imp_image_url   = '%s/%s.jpg' % (imp_base_url, imp_image_name)
+        imp_image_urls += [ imp_image_url ]
+        
+        # add the xxl poster URL to image_urls
+        imp_image_url   = '%s/%s_xlg.jpg' % (imp_base_url, imp_image_name)
+        imp_image_urls += [ imp_image_url ]
+
+        # add the ver1 poster URL in case no normal version exists
+        imp_image_url   = '%s/%s_ver1.jpg' % (imp_base_url, imp_image_name)
+        imp_image_urls += [ imp_image_url ]
+
+        # add the xxl ver1 poster URL
+        imp_image_url   = '%s/%s_ver1_xlg.jpg' % (imp_base_url, imp_image_name)
+        imp_image_urls += [ imp_image_url ]
+
+        # check for valid URLs and add them to self.image_urls
+        for imp_image_url in imp_image_urls:
+        
+            #print "IMPAWARDS: Checking image URL %s" % imp_image_url
+            try:
+                imp_req = urllib2.Request(imp_image_url, txdata, txheaders)
+
+                # an url is valid if the returned content-type is 'image/jpeg'
+                imp_r     = urllib2.urlopen(imp_req)
+                imp_ctype = imp_r.info()['Content-Type']
+                imp_r.close()
+
+                #print "IMPAWARDS: Found content-type %s for url %s" % 
(imp_ctype, imp_image_url)
+                if (imp_ctype == 'image/jpeg'):
+                    self.image_urls += [ imp_image_url ]
+
+            except:
+                pass
+
+
     def impawards(self, host, path):
         """parser for posters from www.impawards.com. TODO: check for licences
         of each poster and add all posters"""

-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
Freevo-cvslog mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freevo-cvslog

[Freevo-cvslog] r8746 - branches/rel-1/freevo/src/util

Reply via email to