Author: duncan
Date: Sun Mar 4 18:35:59 2007
New Revision: 9302
Modified:
branches/rel-1/freevo/freevo_config.py
branches/rel-1/freevo/src/util/fxdimdb.py
Log:
[ 1670296 ] Crash when fetching information from IMDB
Final, hopefully, set of fixes for imdb extraction
Added IMDB_REMOVE_FROM_NAME to allow tv programmes to be retrieved
Improved the processing of quoted titles
Modified: branches/rel-1/freevo/freevo_config.py
==============================================================================
--- branches/rel-1/freevo/freevo_config.py (original)
+++ branches/rel-1/freevo/freevo_config.py Sun Mar 4 18:35:59 2007
@@ -662,6 +662,8 @@
# list of regexp to be ignored on a disc label
IMDB_REMOVE_FROM_LABEL = ('season[\._ -][0-9]+', 'disc[\._ -][0-9]+',
'd[\._ -][0-9]+', 'german')
+# list of regexp to be ignored on a filename
+IMDB_REMOVE_FROM_NAME = ['^[0-9_]+']
# list of words to ignore when searching based on a filename
IMDB_REMOVE_FROM_SEARCHSTRING = ('the', 'a')
Modified: branches/rel-1/freevo/src/util/fxdimdb.py
==============================================================================
--- branches/rel-1/freevo/src/util/fxdimdb.py (original)
+++ branches/rel-1/freevo/src/util/fxdimdb.py Sun Mar 4 18:35:59 2007
@@ -1,6 +1,6 @@
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------
-# helpers/fxdimdb.py - class and helpers for fxd/imdb generation
+# fxdimdb.py - class and helpers for fxd/imdb generation
# -----------------------------------------------------------------------
# $Id$
#
@@ -322,7 +322,8 @@
content = file.read()
file.close()
- if content.find('</disc-set>') != -1: return 1
+ if content.find('</disc-set>') != -1:
+ return 1
return 0
@@ -345,7 +346,16 @@
if label:
for r in config.IMDB_REMOVE_FROM_LABEL:
- name = re.sub(r, '', name)
+ try:
+ name = re.sub(r, '', name)
+ except Exception, e:
+ print e
+ else:
+ for r in config.IMDB_REMOVE_FROM_NAME:
+ try:
+ name = re.sub(r, '', name)
+ except Exception, e:
+ print e
parts = re.split('[\._ -]', name)
name = ''
@@ -360,6 +370,17 @@
#------ private functions below .....
+ def convert_entities(self, contents):
+ s = contents.strip()
+ s = s.replace('\n',' ')
+ s = s.replace(' ',' ')
+ s = s.replace('<','<')
+ s = s.replace('>','>')
+ s = s.replace('"','"')
+ s = s.replace('&','&')
+ s = s.replace('&#','&#')
+ return s
+
def write_discset(self):
"""Write a <disc-set> to a fresh file"""
@@ -375,7 +396,7 @@
" The information in this file are from the Internet " +
"Movie Database (IMDb).\n" +
" Please visit http://www.imdb.com for more
informations.\n")
- i.write(" <source url=\"http://www.imdb.com/tt%s\"/>\n" %
self.imdb_id +
+ i.write(" <source url=\"http://www.imdb.com/title/tt%s\"/>\n" %
self.imdb_id +
" </copyright>\n")
#disc-set
i.write(" <disc-set title=\"%s\">\n" % self.str2XML(self.title))
@@ -427,7 +448,7 @@
" The information in this file are from the Internet " +
"Movie Database (IMDb).\n" +
" Please visit http://www.imdb.com for more
informations.\n")
- i.write(" <source url=\"http://www.imdb.com/Title?%s\"/>\n" %
self.imdb_id +
+ i.write(" <source url=\"http://www.imdb.com/title/tt%s\"/>\n" %
self.imdb_id +
" </copyright>\n")
# write movie
i.write(" <movie title=\"%s\">\n" % self.str2XML(self.title))
@@ -673,14 +694,7 @@
# Replace special characters in the items
for (k,v) in self.info.items():
- s = v.strip()
- s = s.replace('\n',' ')
- s = s.replace(' ',' ')
- s = s.replace('&','&')
- s = s.replace('<','<')
- s = s.replace('>','>')
- s = s.replace('"','"')
- self.info[k] = s
+ self.info[k] = self.convert_entities(v)
if config.DEBUG:
for (k,v) in self.info.items():
@@ -844,16 +858,21 @@
"""return a valid XML string"""
try:
s = Unicode(line)
- while s[-1] == u' ':
- s = s[:-1]
- if s[:4] == u'"':
+ # remove leading and trailing spaces
+ s = s.strip()
+ # remove leading and trailing quotes
+ s = s.strip('\'"')
+ if s[:5] == u'"':
s = s[5:]
- if s[-4:] == u'#34;':
+ if s[-5:] == u'"':
s = s[:-5]
+ if s[:6] == u'"':
+ s = s[6:]
+ if s[-6:] == u'"':
+ s = s[:-6]
# replace all & to & ...
s = s.replace(u"&", u"&")
-
- # ... but this may be wrong for &#
+ # ... but this is wrong for &#
s = s.replace(u"&#", u"&#")
return s
except:
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
Freevo-cvslog mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freevo-cvslog