My wife recently asked for all the recent conference mp3s, so I
whipped out a little script to do that in python.  You point it at a
lds.org conference url and it will pull all the individual talks
(skipping the complete session ones) into a specified directory.

I thought I'd share it in case anyone cared.

enjoy,

matt
"""
script to download all mp3 sessions from a given conference url

example use::

  python downloadconference.py -u http://lds.org/conference/sessions/display/0,5239,23-1-690,00.html -d /tmp/conf 

Only requirement is BeautifulSoup module.

Licensed under PSF license.

Copyright 2007 - matt harrison
"""

import urllib2
import logging
import optparse
import sys
import os

from BeautifulSoup import BeautifulSoup

logging.basicConfig(filename="log.txt", level=logging.DEBUG)


def get_contents(url):
    page = urllib2.urlopen(url)
    return page


def get_link_iter(url):
    html_page = get_contents(url)
    soup = BeautifulSoup(html_page)
    links = soup.findAll("a")
    for link in links:
        yield link


def get_mp3_iter(url):
    for link in get_link_iter(url):
        if is_mp3(link):
            yield link["href"]


def is_mp3(link):
    #filter out "Complete sessions"
    href = link["href"]
    return href.endswith(".mp3") and "Complete" not in href


def copy_mp3s_to_dir(url, dest_dir):
    if not os.path.isdir(dest_dir):
        os.makedirs(dest_dir)
        
    for mp3_url in get_mp3_iter(url):
        #download mp3
        logging.info("Downloading %s" % mp3_url)
        fin = urllib2.urlopen(mp3_url)
        mp3 = fin.read()
        fin.close()
        filename = get_filename(mp3_url)
        #copy to dest
        dest = os.path.join(dest_dir, filename)
        logging.info("Writing to %s" %dest)
        fout = open(dest, 'w')
        fout.write(mp3)
        fout.close()
        logging.info("Done")

def get_filename(url):
    """strip off last part of url for filename"""
    return url.split("/")[-1]
        

def main(args=None):
    if args is None:
        args = sys.argv

    p = optparse.OptionParser()
    p.add_option("-u", "--url", action="store", dest="url",
                 help="specify url to download mp3s from")
    p.add_option("-d", "--destination-directory", action="store",
                 dest="dest", help="directory in which to place mp3s")

    opt, args = p.parse_args(args)

    if opt.dest and opt.url:
        copy_mp3s_to_dir(opt.url, opt.dest)

if __name__ == "__main__":
    main()

_______________________________________________
Ldsoss mailing list
[email protected]
http://lists.ldsoss.org/mailman/listinfo/ldsoss

Reply via email to