Hello,
I add a new function to imdbpy that provide the url of the poster
of a movie.

for example, you can get the poster of avatar like that:

from imdb.parser.http import IMDbHTTPAccessSystem
access = IMDbHTTPAccessSystem()
print access.get_poster(movie)

you get that:
http://ia.media-imdb.com/images/M/MV5BMTYwOTEwNjAzMl5BMl5BanBnXkFtZTcwODc5MTUwMw@@._V1._SX270_SY400_.jpg

i put the diff file of the project in attachement

I hope that it could be added to imdbpy.

Sebastien
Index: imdb/Movie.py
===================================================================
--- imdb/Movie.py	(révision 790)
+++ imdb/Movie.py	(copie de travail)
@@ -127,7 +127,8 @@
                 'sales': 'merchandising links',
                 'faq': 'faqs',
                 'parental guide': 'parents guide',
-                'frequently asked questions': 'faqs'}
+                'frequently asked questions': 'faqs',
+                'media' : 'media url'}
 
     keys_tomodify_list = ('plot', 'trivia', 'alternate versions', 'goofs',
                         'quotes', 'dvd', 'laserdisc', 'news', 'soundtrack',
Index: imdb/__init__.py
===================================================================
--- imdb/__init__.py	(révision 790)
+++ imdb/__init__.py	(copie de travail)
@@ -65,6 +65,8 @@
 imdbURL_bottom100 = imdbURL_base + 'chart/bottom'
 # http://akas.imdb.com/find?%s
 imdbURL_find = imdbURL_base + 'find?%s'
+# http://akas.imdb.com/media/
+imdbURL_media = imdbURL_base + 'media/%s'
 
 # Name of the configuration file.
 confFileName = 'imdbpy.cfg'
@@ -896,4 +898,5 @@
             if isinstance(member, MethodType):
                 sm_dict.update({name: member.__doc__})
         return sm_dict
+    
 
Index: imdb/parser/http/movieParser.py
===================================================================
--- imdb/parser/http/movieParser.py	(révision 790)
+++ imdb/parser/http/movieParser.py	(copie de travail)
@@ -31,6 +31,7 @@
 import urllib
 
 from imdb import imdbURL_base
+from imdb import imdbURL_media
 from imdb.Person import Person
 from imdb.Movie import Movie
 from imdb.Company import Company
@@ -149,7 +150,18 @@
     except (TypeError, ValueError):
         return None
 
+def _process_media_url(x):   
+    """The image of the poster contain a link to the media content (diaporama).
+    Inside the link there is the reference of the poster.
+    This reference + media URL provide a page with the poster in bigger format
+    """
+    url_parts = x.split("/")
+    for part in url_parts:
+        if part.startswith('rm'):
+            return imdbURL_media % part
+    return ''
 
+
 class DOMHTMLMovieParser(DOMParserBase):
     """Parser for the "combined details" (and if instance.mdparse is
     True also for the "main details") page of a given movie.
@@ -357,7 +369,13 @@
                 Extractor(label='cover url',
                         path="//a...@name='poster']",
                         attrs=Attribute(key='cover url',
-                                        path="./img/@src"))
+                                        path="./img/@src")),
+                
+                Extractor(label='media url',
+                        path="//a...@name='poster']",
+                        attrs=Attribute(key='media url',
+                                path="./@href",
+                                postprocess=_process_media_url))
                 ]
 
     preprocessors = [
@@ -1828,8 +1846,18 @@
         if not data2:
             return {}
         return {'parents guide': data2}
+    
+class DOMHTMLImagesParser(DOMParserBase):
+    """Parser for images page.
+    The final result will be a dictionary with the url of the poster
+    """
+    extractors = [
+        Extractor(label='poster url',
+                  path="//d...@id='media_canvas']",
+                  attrs=Attribute(key='poster url',
+                  path="./descendant::img/@src"))
+    ]
 
-
 _OBJECTS = {
     'movie_parser':  ((DOMHTMLMovieParser,), None),
     'plot_parser':  ((DOMHTMLPlotParser,), None),
@@ -1874,6 +1902,7 @@
     'movie_faqs_parser':  ((DOMHTMLFaqsParser,), None),
     'airing_parser':  ((DOMHTMLAiringParser,), None),
     'synopsis_parser':  ((DOMHTMLSynopsisParser,), None),
-    'parentsguide_parser':  ((DOMHTMLParentsGuideParser,), None)
+    'parentsguide_parser':  ((DOMHTMLParentsGuideParser,), None),
+    'images_parser':  ((DOMHTMLImagesParser,), None)
 }
 
Index: imdb/parser/http/__init__.py
===================================================================
--- imdb/parser/http/__init__.py	(révision 790)
+++ imdb/parser/http/__init__.py	(copie de travail)
@@ -34,7 +34,7 @@
 from imdb import IMDbBase, imdbURL_movie_main, imdbURL_person_main, \
                 imdbURL_character_main, imdbURL_company_main, \
                 imdbURL_keyword_main, imdbURL_find, imdbURL_top250, \
-                imdbURL_bottom100
+                imdbURL_bottom100, imdbURL_media
 from imdb.utils import analyze_title
 from imdb._exceptions import IMDbDataAccessError, IMDbParserError
 
@@ -762,5 +762,16 @@
             return []
         cont = self._retrieve(url)
         return parser.parse(cont)['data']
-
-
+    
+    
+    def get_poster(self, movie):
+        """Use the media url to get the url of the poster of the movie
+        """
+        poster_url = ''
+        url = movie['media']
+        if url:
+            cont = self._retrieve(url)
+            ret = self.mProxy.images_parser.parse(cont)
+            if 'poster url' in ret['data']:
+                poster_url = ret['data']['poster url']
+        return poster_url
------------------------------------------------------------------------------
Download Intel® Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs 
proactively, and fine-tune applications for parallel performance. 
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
_______________________________________________
Imdbpy-devel mailing list
Imdbpy-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/imdbpy-devel

Reply via email to