Hello everybody,
The attached patch allows gPodder to use the mimetype found in the episode's
feed to guess the file extension if none is found using the current method
(parsing the url).
This fixes an annoying bug with feeds that have urls that don't actually map
to real files. For instance, mininova.org urls look
like "http://mininova.org/get/123456". The filenames are tucked away in the
Content-Disposition http header (see RFC 1806) which makes it impossible for
gPodder to find out the filetype.
I added an extension() function to the libpodcasts.podcastItem class which
takes care of getting the file extension and I had to modify a bunch of the
code to adapt to this change. So testing and feedback would be greatly
appreciated.
Once the patch is applied gPodder should run exactly the same unless you are
subscribed to a feed with weird urls like the mininova ones mentioned above.
Thanks,
nick
Index: src/gpodder/libpodcasts.py
===================================================================
--- src/gpodder/libpodcasts.py (revision 743)
+++ src/gpodder/libpodcasts.py (working copy)
@@ -495,7 +495,7 @@
icon_size = 16
if os.path.exists( local_filename):
- file_type = util.file_type_by_extension( util.file_extension_from_url(url))
+ file_type = util.file_type_by_extension( model.get_value( iter, 9))
if file_type == 'audio':
status_icon = util.get_tree_icon(ICON_AUDIO_FILE, played, locked, self.icon_cache, icon_size)
elif file_type == 'video':
@@ -520,7 +520,9 @@
"""
Return a gtk.ListStore containing episodes for this channel
"""
- new_model = gtk.ListStore( gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_BOOLEAN, gtk.gdk.Pixbuf, gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING)
+ new_model = gtk.ListStore( gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING,
+ gobject.TYPE_BOOLEAN, gtk.gdk.Pixbuf, gobject.TYPE_STRING, gobject.TYPE_STRING,
+ gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING)
new_episodes = self.get_new_episodes()
for item in self.get_all_episodes():
@@ -528,7 +530,8 @@
description = '%s\n<small>%s</small>' % (saxutils.escape(item.title), saxutils.escape(item.one_line_description()))
else:
description = saxutils.escape(item.title)
- new_iter = new_model.append((item.url, item.title, gl.format_filesize(item.length, 1), True, None, item.cute_pubdate(), description, item.description, item.local_filename()))
+ new_iter = new_model.append((item.url, item.title, gl.format_filesize(item.length, 1), True,
+ None, item.cute_pubdate(), description, item.description, item.local_filename(), item.extension()))
self.iter_set_downloading_columns( new_model, new_iter, new_episodes)
self.update_save_dir_size()
@@ -610,7 +613,9 @@
break
episode.url = util.normalize_feed_url( enclosure.get( 'href', ''))
elif hasattr(entry, 'link'):
- extension = util.file_extension_from_url(entry.link)
+ (filename, extension) = util.filename_from_url(entry.link)
+ if extension == '' and hasattr( entry, 'type'):
+ extension = util.extension_from_mimetype(e.type)
file_type = util.file_type_by_extension(extension)
if file_type is not None:
log('Adding episode with link to file type "%s".', file_type, sender=episode)
@@ -685,6 +690,14 @@
else:
return ' '.join((l.strip() for l in lines if l.strip() != ''))
+ def extension( self):
+ ( filename, ext ) = util.filename_from_url(self.url)
+ # if we can't detect the extension from the url fallback on the mimetype
+ if ext == '' or util.file_type_by_extension(ext) is None:
+ ext = util.extension_from_mimetype(self.mimetype)
+ log('Getting extension from mimetype for: %s (mimetype: %s)' % (self.title, ext), sender=self)
+ return ext
+
def is_downloaded( self):
return os.path.exists( self.local_filename())
@@ -698,8 +711,8 @@
log('Cannot delete episode from disk: %s', self.title, traceback=True, sender=self)
def local_filename( self):
- ext = util.file_extension_from_url(self.url)
-
+ ext = self.extension()
+
# For compatibility with already-downloaded episodes,
# we accept md5 filenames if they are downloaded now.
md5_filename = os.path.join(self.channel.save_dir, md5.new(self.url).hexdigest()+ext)
@@ -707,8 +720,8 @@
return md5_filename
# If the md5 filename does not exist,
- episode = util.file_extension_from_url(self.url, complete_filename=True)
- episode = util.sanitize_filename(episode)
+ ( episode, e ) = util.filename_from_url(self.url)
+ episode = util.sanitize_filename(episode) + ext
# If the episode filename looks suspicious,
# we still return the md5 filename to be on
@@ -725,7 +738,7 @@
return self.title
def file_type( self):
- return util.file_type_by_extension( util.file_extension_from_url( self.url))
+ return util.file_type_by_extension( self.extension() )
@property
def basename( self):
Index: src/gpodder/libgpodder.py
===================================================================
--- src/gpodder/libgpodder.py (revision 743)
+++ src/gpodder/libgpodder.py (working copy)
@@ -224,7 +224,7 @@
return (True, service)
# Determine the file type and set the player accordingly.
- file_type = util.file_type_by_extension(util.file_extension_from_url(episode.url))
+ file_type = util.file_type_by_extension(episode.extension())
if file_type == 'video':
player = self.config.videoplayer
Index: src/gpodder/util.py
===================================================================
--- src/gpodder/util.py (revision 743)
+++ src/gpodder/util.py (working copy)
@@ -54,6 +54,7 @@
import urllib2
import httplib
import webbrowser
+import mimetypes
import feedparser
@@ -413,12 +414,21 @@
except:
return None
+def extension_from_mimetype(extension):
+ """
+ Simply guesses what the file extension should be from the mimetype
+ """
+ ext = mimetypes.guess_extension(extension)
+ if ext is not None:
+ return ext
+ else:
+ return ''
-def file_extension_from_url(url, complete_filename=False):
+def filename_from_url(url):
"""
- Extracts the (lowercase) file name extension (with dot)
+ Extracts the filename and (lowercase) extension (with dot)
from a URL, e.g. http://server.com/file.MP3?download=yes
- will result in the string ".mp3" being returned.
+ will result in the string ("file", ".mp3") being returned.
This function will also try to best-guess the "real"
extension for a media file (audio, video, torrent) by
@@ -426,43 +436,29 @@
into the query string to find better matches, if the
original extension does not resolve to a known type.
- If the optional parameter "complete_filename" is set to
- True, this will not return the extension, but the
- complete filename (basename) of the found media file.
-
- http://my.net/redirect.php?my.net/file.ogg => ".ogg"
- http://server/get.jsp?file=/episode0815.MOV => ".mov"
- http://s/redirect.mp4?http://serv2/test.mp4 => ".mp4"
+ http://my.net/redirect.php?my.net/file.ogg => ("file", ".ogg")
+ http://server/get.jsp?file=/episode0815.MOV => ("episode0815", ".mov")
+ http://s/redirect.mp4?http://serv2/test.mp4 => ("test", ".mp4")
"""
(scheme, netloc, path, para, query, fragid) = urlparse.urlparse(url)
- filename = os.path.basename( urllib.unquote(path))
- (tmp, extension) = os.path.splitext(filename)
+ (filename, extension) = os.path.splitext(os.path.basename( urllib.unquote(path)))
if file_type_by_extension(extension) is not None and not \
query.startswith(scheme+'://'):
# We have found a valid extension (audio, video, torrent)
# and the query string doesn't look like a URL
- if complete_filename:
- return filename
- else:
- return extension.lower()
-
+ return ( filename, extension.lower() )
+
# If the query string looks like a possible URL, try that first
if len(query.strip()) > 0 and query.find('/') != -1:
query_url = '://'.join((scheme, urllib.unquote(query)))
- query_extension = file_extension_from_url(query_url)
+ (query_filename, query_extension) = filename_from_url(query_url)
if file_type_by_extension(query_extension) is not None:
- if complete_filename:
- return os.path.basename(query_url)
- else:
- return query_extension
+ return os.path.splitext(os.path.basename(query_url))
- # No exact match found, simply return the original extension
- if complete_filename:
- return filename
- else:
- return extension.lower()
+ # No exact match found, simply return the original filename & extension
+ return ( filename, extension.lower() )
def file_type_by_extension( extension):
Index: src/gpodder/gui.py
===================================================================
--- src/gpodder/gui.py (revision 743)
+++ src/gpodder/gui.py (working copy)
@@ -1042,7 +1042,7 @@
else:
can_download = True
- if util.file_type_by_extension( util.file_extension_from_url( url)) == 'torrent':
+ if util.file_type_by_extension( self.active_channel.find_episode(url).extension() ) == 'torrent':
can_download = can_download or gl.config.use_gnome_bittorrent
can_download = can_download and not can_cancel
_______________________________________________
gpodder-devel mailing list
[email protected]
https://lists.berlios.de/mailman/listinfo/gpodder-devel