Hello,
I add a new function to imdbpy that provide the url of the poster
of a movie.
for example, you can get the poster of avatar like that:
from imdb.parser.http import IMDbHTTPAccessSystem
access = IMDbHTTPAccessSystem()
print access.get_poster(movie)
you get that:
http://ia.media-imdb.com/images/M/MV5BMTYwOTEwNjAzMl5BMl5BanBnXkFtZTcwODc5MTUwMw@@._V1._SX270_SY400_.jpg
i put the diff file of the project in attachement
I hope that it could be added to imdbpy.
Sebastien
Index: imdb/Movie.py
===================================================================
--- imdb/Movie.py (révision 790)
+++ imdb/Movie.py (copie de travail)
@@ -127,7 +127,8 @@
'sales': 'merchandising links',
'faq': 'faqs',
'parental guide': 'parents guide',
- 'frequently asked questions': 'faqs'}
+ 'frequently asked questions': 'faqs',
+ 'media' : 'media url'}
keys_tomodify_list = ('plot', 'trivia', 'alternate versions', 'goofs',
'quotes', 'dvd', 'laserdisc', 'news', 'soundtrack',
Index: imdb/__init__.py
===================================================================
--- imdb/__init__.py (révision 790)
+++ imdb/__init__.py (copie de travail)
@@ -65,6 +65,8 @@
imdbURL_bottom100 = imdbURL_base + 'chart/bottom'
# http://akas.imdb.com/find?%s
imdbURL_find = imdbURL_base + 'find?%s'
+# http://akas.imdb.com/media/
+imdbURL_media = imdbURL_base + 'media/%s'
# Name of the configuration file.
confFileName = 'imdbpy.cfg'
@@ -896,4 +898,5 @@
if isinstance(member, MethodType):
sm_dict.update({name: member.__doc__})
return sm_dict
+
Index: imdb/parser/http/movieParser.py
===================================================================
--- imdb/parser/http/movieParser.py (révision 790)
+++ imdb/parser/http/movieParser.py (copie de travail)
@@ -31,6 +31,7 @@
import urllib
from imdb import imdbURL_base
+from imdb import imdbURL_media
from imdb.Person import Person
from imdb.Movie import Movie
from imdb.Company import Company
@@ -149,7 +150,18 @@
except (TypeError, ValueError):
return None
+def _process_media_url(x):
+ """The image of the poster contain a link to the media content (diaporama).
+ Inside the link there is the reference of the poster.
+ This reference + media URL provide a page with the poster in bigger format
+ """
+ url_parts = x.split("/")
+ for part in url_parts:
+ if part.startswith('rm'):
+ return imdbURL_media % part
+ return ''
+
class DOMHTMLMovieParser(DOMParserBase):
"""Parser for the "combined details" (and if instance.mdparse is
True also for the "main details") page of a given movie.
@@ -357,7 +369,13 @@
Extractor(label='cover url',
path="//a...@name='poster']",
attrs=Attribute(key='cover url',
- path="./img/@src"))
+ path="./img/@src")),
+
+ Extractor(label='media url',
+ path="//a...@name='poster']",
+ attrs=Attribute(key='media url',
+ path="./@href",
+ postprocess=_process_media_url))
]
preprocessors = [
@@ -1828,8 +1846,18 @@
if not data2:
return {}
return {'parents guide': data2}
+
+class DOMHTMLImagesParser(DOMParserBase):
+ """Parser for images page.
+ The final result will be a dictionary with the url of the poster
+ """
+ extractors = [
+ Extractor(label='poster url',
+ path="//d...@id='media_canvas']",
+ attrs=Attribute(key='poster url',
+ path="./descendant::img/@src"))
+ ]
-
_OBJECTS = {
'movie_parser': ((DOMHTMLMovieParser,), None),
'plot_parser': ((DOMHTMLPlotParser,), None),
@@ -1874,6 +1902,7 @@
'movie_faqs_parser': ((DOMHTMLFaqsParser,), None),
'airing_parser': ((DOMHTMLAiringParser,), None),
'synopsis_parser': ((DOMHTMLSynopsisParser,), None),
- 'parentsguide_parser': ((DOMHTMLParentsGuideParser,), None)
+ 'parentsguide_parser': ((DOMHTMLParentsGuideParser,), None),
+ 'images_parser': ((DOMHTMLImagesParser,), None)
}
Index: imdb/parser/http/__init__.py
===================================================================
--- imdb/parser/http/__init__.py (révision 790)
+++ imdb/parser/http/__init__.py (copie de travail)
@@ -34,7 +34,7 @@
from imdb import IMDbBase, imdbURL_movie_main, imdbURL_person_main, \
imdbURL_character_main, imdbURL_company_main, \
imdbURL_keyword_main, imdbURL_find, imdbURL_top250, \
- imdbURL_bottom100
+ imdbURL_bottom100, imdbURL_media
from imdb.utils import analyze_title
from imdb._exceptions import IMDbDataAccessError, IMDbParserError
@@ -762,5 +762,16 @@
return []
cont = self._retrieve(url)
return parser.parse(cont)['data']
-
-
+
+
+ def get_poster(self, movie):
+ """Use the media url to get the url of the poster of the movie
+ """
+ poster_url = ''
+ url = movie['media']
+ if url:
+ cont = self._retrieve(url)
+ ret = self.mProxy.images_parser.parse(cont)
+ if 'poster url' in ret['data']:
+ poster_url = ret['data']['poster url']
+ return poster_url
------------------------------------------------------------------------------
Download Intel® Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
_______________________________________________
Imdbpy-devel mailing list
Imdbpy-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/imdbpy-devel