My wife recently asked for all the recent conference mp3s, so I whipped out a little script to do that in python. You point it at a lds.org conference url and it will pull all the individual talks (skipping the complete session ones) into a specified directory.
I thought I'd share it in case anyone cared. enjoy, matt
""" script to download all mp3 sessions from a given conference url example use:: python downloadconference.py -u http://lds.org/conference/sessions/display/0,5239,23-1-690,00.html -d /tmp/conf Only requirement is BeautifulSoup module. Licensed under PSF license. Copyright 2007 - matt harrison """ import urllib2 import logging import optparse import sys import os from BeautifulSoup import BeautifulSoup logging.basicConfig(filename="log.txt", level=logging.DEBUG) def get_contents(url): page = urllib2.urlopen(url) return page def get_link_iter(url): html_page = get_contents(url) soup = BeautifulSoup(html_page) links = soup.findAll("a") for link in links: yield link def get_mp3_iter(url): for link in get_link_iter(url): if is_mp3(link): yield link["href"] def is_mp3(link): #filter out "Complete sessions" href = link["href"] return href.endswith(".mp3") and "Complete" not in href def copy_mp3s_to_dir(url, dest_dir): if not os.path.isdir(dest_dir): os.makedirs(dest_dir) for mp3_url in get_mp3_iter(url): #download mp3 logging.info("Downloading %s" % mp3_url) fin = urllib2.urlopen(mp3_url) mp3 = fin.read() fin.close() filename = get_filename(mp3_url) #copy to dest dest = os.path.join(dest_dir, filename) logging.info("Writing to %s" %dest) fout = open(dest, 'w') fout.write(mp3) fout.close() logging.info("Done") def get_filename(url): """strip off last part of url for filename""" return url.split("/")[-1] def main(args=None): if args is None: args = sys.argv p = optparse.OptionParser() p.add_option("-u", "--url", action="store", dest="url", help="specify url to download mp3s from") p.add_option("-d", "--destination-directory", action="store", dest="dest", help="directory in which to place mp3s") opt, args = p.parse_args(args) if opt.dest and opt.url: copy_mp3s_to_dir(opt.url, opt.dest) if __name__ == "__main__": main()
_______________________________________________ Ldsoss mailing list [email protected] http://lists.ldsoss.org/mailman/listinfo/ldsoss
