Update of /cvsroot/audacity/audacity-src/scripts/mw2html_audacity In directory 23jxhf1.ch3.sourceforge.com:/tmp/cvs-serv19230
Added Files: footer.html htmldata.py htmldata.pyc mw2html.py sidebar.html wiki2htm.bat Log Message: --- NEW FILE: htmldata.py --- """ Manipulate HTML or XHTML documents. Version 1.1.1. This source code has been placed in the public domain by Connelly Barnes. Features: - Translate HTML back and forth to data structures. This allows you to read and write HTML documents programmably, with much flexibility. - Extract and modify URLs in an HTML document. - Compatible with Python 2.0 - 2.5. See the L{examples} for a quick start. """ [...1497 lines suppressed...] _test_tagextract() print ' tagextract*: OK' if _python_has_unicode(): _test_tagextract(unicode) print ' tagextract (unicode)*: OK' _test_urlextract() print ' urlextract*: OK' if _python_has_unicode(): _test_urlextract(unicode) print ' urlextract (unicode)*: OK' print print '* The corresponding join method has been tested as well.' if __name__ == '__main__': _test() --- NEW FILE: htmldata.pyc --- (This appears to be a binary file; contents omitted.) --- NEW FILE: footer.html --- <div align="center"><ul id="f_list"><li>Static dump from <a href="http://audacityteam.org/manual/">Audacity's Manual</a> created on %DATE%.</li></ul></div> --- NEW FILE: wiki2htm.bat --- python mw2html.py http://audacityteam.org/manual ..\..\help\temp -s move ..\..\help\temp\audacityteam.org ..\..\help\manual rmdir ..\..\help\temp --- NEW FILE: sidebar.html --- <div class="portlet" id="p-logo"> <a style="background-image: url(images/new_audacitytransmediawikimanual.png);" href="index.html" title="Main Page"></a> </div> <script type="text/javascript"> if (window.isMSIE55) fixalpha(); </script> <div class="portlet" id="p-tb"> </div> <ul> <li><a href="index.html#Tutorials">Tutorials</a></li> <li><a href="index.html#Using_Audacity">Using Audacity</li> <li><a href="index.html#Reference">Reference</a></li> <ul> <li><a href="menu_reference.html">Menu Bar</a></li> <li><a href="index.html#Toolbars">Toolbars</a></li> <li><a href="index.html#Project_Window">Project Window</li> <li><a href="preferences.html">Preferences</a></li> </ul> <li><a href="faq.html">FAQ</a></li> <li><a href="glossary.html">Glossary</a></li> </ul> --- NEW FILE: mw2html.py --- #! /usr/bin/env python """ mw2html - Mediawiki to static HTML I use this to create a personal website from a local mediawiki installation. No search functionality. Hacks the Monobook skin and the produced HTML. Connelly Barnes 2005. Public domain. Reworked by Andre Pinto 2009. Improved performance. Improved filtering. Improved usability. Customized for Audacity's manual wiki. ... """ __version__ = '0.1.0.0' import re import sys import getopt import random import urllib import textwrap import urlparse import os, os.path import errno import sha import httplib #import pdb from time import strftime try: set except: from sets import Set as set try: import htmldata except: print 'Requires Python htmldata module:' print ' http://www.connellybarnes.com/code/htmldata/' sys.exit() MOVE_HREF = 'movehref' MADE_BY_COMMENT = '<!-- Content generated by Mediawiki and mw2html -->' INDEX_HTML = 'index.html' url_filename_cache = {} wrote_file_set = set() sidebar_content = '' footer_text = '' counter = 0 conn = None domain = '' MONOBOOK_SKIN = 'monobook' # Constant identifier for Monobook. class Config: """ Instances contain all options passed at the command line. """ def __init__(self, rooturl, outdir, flatten=True, lower=True, index=None, clean=True, sidebar=None, hack_skin=True, made_by=True, overwrite=False, footer=None, skin=MONOBOOK_SKIN, move_href=True, remove_png=True, remove_history=True, limit_parent=False, special_mode=False, debug=False, no_images=False): self.rooturl = rooturl self.outdir = os.path.abspath(outdir) self.flatten = flatten self.lower = lower self.index = index self.clean = clean self.sidebar = sidebar self.hack_skin = hack_skin self.made_by = made_by self.overwrite = overwrite self.footer = footer self.skin = skin self.move_href = move_href if self.sidebar is not None: self.sidebar = os.path.abspath(self.sidebar) if self.footer is not None: self.footer = os.path.abspath(self.footer) self.remove_png = remove_png self.remove_history = remove_history self.limit_parent = limit_parent self.special_mode = special_mode self.debug = debug self.no_images = no_images def get_domain(u): """ Get domain of URL. """ url = normalize_url(u) pos = url.find('/') if pos == -1: return url else: return url[:pos] def normalize_url(url): # url normalization curl = url.lower() nurl = url if curl.startswith('http://'): nurl = nurl[len('http://'):] if curl.startswith('www.'): nurl = nurl[len('www.'):] nurl = nurl.strip('/') return nurl def find_tag_limits(doc, start_string, end_tag, start_tag): # find tag limits - start_string must be an unique identifier within doc i1 = doc.find(start_string) if (i1 == -1): return (-1,-1) sdiv = i1 ediv = i1 + len(start_string) while(sdiv < ediv and sdiv != -1): sdiv = doc.find(start_tag, sdiv+len(start_tag)) ediv = doc.find(end_tag , ediv+len(end_tag)) return (i1, ediv) def remove_tag(doc, start_string, end_tag, start_tag): #remove tagged text function ndoc = doc while True: (i1, i2) = find_tag_limits(ndoc, start_string, end_tag, start_tag) if i1 == -1 or i2 == -1: return ndoc ndoc = ndoc[:i1]+ndoc[i2+len(end_tag):] def monobook_fix_html(doc, config, page_url): """ Sets sidebar for Mediawiki 1.4beta6 Monobook HTML output. Also returns new urls eventually found. """ global sidebar_content if config.made_by: doc = doc.replace('<html xmlns=', MADE_BY_COMMENT + '\n<html xmlns=') SIDEBAR_ID = 'SIDEBAR' + sha.new(str(random.random())).hexdigest() # Remove sidebar HTML doc = re.sub( r'(<!-- end content -->)[\s\S]+?' + r'(<!-- end of the left \(by default at least\) column -->)', r'\1<div class="visualClear"></div></div></div></div>' + SIDEBAR_ID + r'\2', doc) pre_sidebar = """ <div id="column-one"> """ post_sidebar = """ </div> <!-- end left column --> """ if config.sidebar != None and sidebar_content == '': f = open(config.sidebar, 'rU') sidebar_content = f.read() f.close() sidebar_html = pre_sidebar + sidebar_content + post_sidebar doc = doc.replace(SIDEBAR_ID, sidebar_html) #andre special mode if config.special_mode: # Remove ul list doc = remove_tag(doc,'<ul id="f-list">','</ul>', '<ul') # Remove link rel alternate and edit doc = re.sub(r'<link rel="alternate"[\s\S]+?/>',r'',doc) doc = re.sub(r'<link rel="edit"[\s\S]+?/>',r'',doc) # Remove print footer doc = re.sub(r'<div class="printfooter">[\s\S]+?</div>',r'',doc) # Remove Audacity syntax doc = re.sub('<style type="text/css">/\*<!\[CDATA\[\*/[\s\S]+?</style>','',doc) # Remove noexport doc = remove_tag(doc,'<div class="noexport"','</div>', '<div') # Remove editornote doc = remove_tag(doc,'<div class="editornote"','</div>', '<div') else: # Remove powered by MediaWiki logo doc = re.sub( r'<div id="f-poweredbyico">[\s\S]+?(<ul id="f-list">)', r'\1', doc) # Remove page has been accessed X times list item. doc = re.sub(r'<li id="f-viewcount">[\s\S]+?</li>', r'', doc) # Remove disclaimers list item. doc = re.sub(r'<li id="f-disclaimer">[\s\S]+?</li>', r'', doc) # Remove edit links doc = remove_tag(doc, '<div class="editsection"', '</div>', '<div') doc = remove_tag(doc, '<span class="editsection"', '</span>', '<span') return doc def pre_html_transform(doc, url, config): """ User-customizable HTML transform. Given an HTML document (with URLs already rewritten), returns modified HTML document and new urls from sidebar. """ new_urls = [] if config.hack_skin: if config.skin == MONOBOOK_SKIN: doc = monobook_fix_html(doc, config, url) if not config.special_mode: doc = monobook_hack_skin_html(doc, config) else: raise ValueError('unknown skin') if config.move_href: doc = fix_move_href_tags(doc, config) if config.remove_history: doc = html_remove_image_history(doc, config) return doc def pos_html_transform(doc, config): global footer_text if config.special_mode: # Remove external link rel stylesheet doc = re.sub(r'<link rel="stylesheet" href="http://[\s\S]+?/>',r'',doc) # Remove external javascript doc = re.sub(r'<script type="text/javascript" src="http://[\s\S]+?</script>',r'',doc) # Replace remaining text with footer, if available (this needs to be done after parse_html to avoid rewriting of urls if config.footer is not None: s1 = '<div id="footer">' # match correct divs (i1,i2) = find_tag_limits(doc, s1, '</div>', '<div') if (i1 == -1): return doc if footer_text == '': f = open(config.footer, 'rU') footer_text = f.read() f.close() # add static dump time footer_html = footer_text.replace('%DATE%', strftime("%Y-%m-%d %H:%M:%S")) if config.special_mode: # keep MediaWiki credits doc = doc[:i2] + footer_html + doc[i2:] else: doc = doc[:i1+len(s1)] + footer_html + doc[i2:] return doc def fix_move_href_tags(doc, config): """ Return copy of doc with all MOVE_HREF tags removed. """ while '<' + MOVE_HREF in doc: i1 = doc.index('<' + MOVE_HREF) i2 = doc.index('</' + MOVE_HREF, i1+1) i3 = doc.index('>', i2+1) (start, end) = (i1, i3+1) tags = htmldata.tagextract(doc[start:end]) assert tags[0][0] == MOVE_HREF assert tags[-1][0] == '/' + MOVE_HREF href = tags[0][1].get('href', '') new_tags = [] for tag in tags[1:-1]: if len(tag) == 2: if 'href' in tag[1]: if href == '': continue tag[1]['href'] = href new_tags += [tag] doc = doc[:start] + htmldata.tagjoin(new_tags) + doc[end:] return doc def html_remove_image_history(doc, config): """ Remove image history and links to information. """ doc = re.sub(r'<h2>Image history</h2>[\s\S]+?</ul>', r'', doc) doc = re.sub(r'<h2>Image links</h2>[\s\S]+?</ul>', r'', doc) return doc def monobook_hack_skin_html(doc, config): """ Hacks Monobook HTML output: use CSS ids for hacked skin. See monobook_hack_skin_css. """ doc = doc.replace('<div id="globalWrapper">', '<div id="globalWrapperHacked">') doc = doc.replace('<div id="footer">', '<div id="footerHacked">') doc = doc.replace('</body>', '<br></body>') return doc def monobook_hack_skin_css(doc, url, config): """ Hacks Mediawiki 1.4beta6 Monobook main CSS file for better looks. Removes flower background. Defines *Hacked CSS ids, so we can add an orange bar at the top, and clear the orange bar right above the footer. """ if not url.endswith('monobook/main.css'): return doc doc = "/* Monobook skin automatically modified by mw2html. */" + doc doc = doc.replace('url("headbg.jpg")', '') doc += """ /* Begin hacks by mw2html */ #globalWrapperHacked { font-size:127%; width: 100%; background-color: White; border-top: 1px solid #fabd23; border-bottom: 1px solid #fabd23; margin: 0.6em 0em 1em 0em; padding: 0em 0em 1.2em 0em; } #footerHacked { background-color: White; margin: 0.6em 0em 0em 0em; padding: 0.4em 0em 0em 0em; text-align: center; font-size: 90%; } #footerHacked li { display: inline; margin: 0 1.3em; } """ c1 = '#column-one { padding-top: 160px; }' c2 = '#column-one { padding-top: 3.0em; }' assert c1 in doc doc = doc.replace(c1, '/* edit by mw2html */\n' + c2 + '\n/* end edit by mw2html */\n') # Remove external link icons. if config.remove_png: doc = re.sub(r'#bodyContent a\[href \^="http://"\][\s\S]+?\}', r'', doc) return doc def post_css_transform(doc, url, config): """ User-customizable CSS transform. Given a CSS document (with URLs already rewritten), returns modified CSS document. """ if config.hack_skin and not config.special_mode: if config.skin == MONOBOOK_SKIN: doc = monobook_hack_skin_css(doc, url, config) else: raise ValueError('unknown skin') return doc def move_to_index_if_needed(config, ans): if ans.endswith(config.index): ans = ans[:len(ans)-len(config.index)] + INDEX_HTML return ans def file_exists_in_written_set(filename): return os.path.normcase(os.path.normpath(filename)) in wrote_file_set def find_unused_filename(filename, exists=os.path.exists): """ Return 'file' if 'file' doesn't exist, otherwise 'file1', 'file2', etc. Existance is determined by the callable exists(), which takes a filename and returns a boolean. """ if not exists(filename): return filename (head, tail) = os.path.split(filename) i = 1 while True: numbered = (os.path.splitext(tail)[0] + str(i) + os.path.splitext(tail)[1]) fullname = os.path.join(head, numbered) if not exists(fullname): return fullname i += 1 def clean_filename(url, config, ans): # Split outdir and our file/dir under outdir # (Note: ans may not be a valid filename) (par, ans) = (ans[:len(config.outdir)], ans[len(config.outdir):]) if ans.startswith(os.sep): ans = ans[1:] # Replace % escape codes with underscores, dashes with underscores. while '%%' in ans: ans = ans[:ans.index('%%')] + '_' + ans[ans.index('%%')+2:] while '%25' in ans: ans = ans[:ans.index('%25')] + '_' + ans[ans.index('%25')+5:] while '%' in ans: ans = ans[:ans.index('%')] + '_' + ans[ans.index('%')+3:] ans = ans.replace('-', '_') while '__' in ans: ans = ans.replace('__', '_') while '_.' in ans: ans = ans.replace('_.', '.') # Rename math thumbnails if '/math/' in url: tail = os.path.split(ans)[1] if os.path.splitext(tail)[1] == '.png': tail = os.path.splitext(tail)[0] if set(tail) <= set('0123456789abcdef') and len(tail) == 32: ans = 'math_' + sha.new(tail).hexdigest()[:4] + '.png' return os.path.join(par, ans) def flatten_filename(url, config, filename): def get_fullname(relname): return os.path.join(config.outdir, relname) orig_ext = os.path.splitext(filename)[1] (head, tail) = os.path.split(filename) if tail == INDEX_HTML: (head, tail) = os.path.split(head) ans = tail if os.path.splitext(ans)[1] != orig_ext: ans = os.path.splitext(ans)[0] + orig_ext return os.path.join(config.outdir, ans) def split_section(url): """ Splits into (head, tail), where head contains no '#' and is max length. """ if '#' in url: i = url.index('#') return (url[:i], url[i:]) else: return (url, '') def url_open(url): # download a file and retrieve its content and mimetype global conn, domain, counter redirect = 'foo' while redirect != '': pos = url.find(domain) if pos == -1: print 'ERROR: url', url, 'is in a different domain.' return ('', '') rel_url = url[pos+len(domain):] attempts = 0 while attempts < 2: counter += 1 conn.request("GET", rel_url) try: r = conn.getresponse() print 'Status',r.status,r.reason,'accessing',rel_url if r.status == 404: return ('','') attempts += 2 except httplib.HTTPException, e: print 'ERROR',e.__class__.__name__,'while retrieving', url conn.close if e.__class__.__name__ in ['BadStatusLine', 'ImproperConnectionState', 'NotConnected', 'IncompleteRead']: print "eventually this error might be recovered. let's try again." print 'reconnecting...' conn = httplib.HTTPConnection(domain) attempts += 1 else: return ('','') if attempts == 3: print "error recovered" redirect = r.getheader('Location', '').split(';')[0] url = redirect doc = r.read() mimetype = r.getheader('Content-Type', '').split(';')[0].lower() return (doc, mimetype) def url_to_filename(url, config): """ Translate a full url to a full filename (in local OS format) under outdir. Transforms web url into local url and caches it. Downloads the file to disk and works with it there instead of download the same file two times (Performance Improvement). """ url = split_section(url)[0] if url in url_filename_cache: return url_filename_cache[url] part = normalize_url(url) #ParseResult(scheme='http', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html', params='', query='', fragment='') L = list(urlparse.urlparse(url)) L[2] = L[2].strip('/') if not '.' in L[2]: # url ends with a directory name. Store it under index.html. L[2] += '/' + INDEX_HTML else: # 'title=' parsing if L[4].startswith('title=') and L[2].endswith('/index.php'): L[4] = L[4][len('title='):] L[2] = L[2][:-len('/index.php')] #don't sanitize / for path L[0] = '' L[2] = urllib.quote_plus(L[2], '/') L[3] = urllib.quote_plus(L[3]) L[4] = urllib.quote_plus(L[4]) L[5] = urllib.quote_plus(L[5]) # Local filename relative to outdir # os.sep - O.S. directory separator # (More transformations are made to this below...). FL = [] for i in L: if i != '': FL += [i] subfile = os.sep.join(FL) (doc, mimetype) = url_open(url) if doc == '' or mimetype == '': url_filename_cache[url] = '' return '' # Fix up extension based on mime type. # Maps mimetype to file extension MIME_MAP = { 'image/jpeg': 'jpg', 'image/png': 'png', 'image/gif': 'gif', 'image/tiff': 'tiff', 'text/plain': 'txt', 'text/html': 'html', 'text/rtf': 'rtf', 'text/css': 'css', 'text/sgml': 'sgml', 'text/xml': 'xml', 'application/zip': 'zip' } if mimetype in MIME_MAP: (root, ext) = os.path.splitext(subfile) ext = '.' + MIME_MAP[mimetype] subfile = root + ext if config.lower: subfile = subfile.lower() ans = os.path.join(config.outdir, subfile) if config.flatten: ans = flatten_filename(url, config, ans) if config.clean: ans = clean_filename(url, config, ans) if config.index != None: ans = move_to_index_if_needed(config, ans) ans = find_unused_filename(ans, file_exists_in_written_set) # Cache and return answer. wrote_file_set.add(os.path.normcase(os.path.normpath(ans))) url_filename_cache[url] = ans mode = ['wb', 'w'][mimetype.startswith('text')] # Make parent directory if it doesn't exist. try: os.makedirs(os.path.split(ans)[0]) except OSError, e: if e.errno != errno.EEXIST: raise # Not really needed since we checked that the directory # outdir didn't exist at the top of run(), but let's double check. if os.path.exists(ans) and not config.overwrite: out.write('File already exists: ' + str(ans)) sys.exit(1) f = open(ans, mode) f.write(doc) f.close() return ans def url_to_relative(url, cururl, config): """ Translate a full url to a filename (in URL format) relative to cururl. Relative url from curul to url. """ cururl = split_section(cururl)[0] (url, section) = split_section(url) L1 = url_to_filename(url, config).replace(os.sep, '/').split('/') if L1 == '': return '' L2 = url_to_filename(cururl, config).replace(os.sep, '/').split('/') while L1 != [] and L2 != [] and L1[0] == L2[0]: L1 = L1[1:] L2 = L2[1:] return urllib.quote('../' * (len(L2) - 1) + '/'.join(L1)) + section def parse_css(doc, url, config): """ Returns (modified_doc, new_urls), where new_urls are absolute URLs for all links found in the CSS. """ new_urls = [] L = htmldata.urlextract(doc, url, 'text/css') for item in L: # Store url locally. u = item.url if config.no_images and any(u.strip().lower().endswith(suffix) for suffix in ('.jpg', '.gif', '.png', '.ico')): item.url = '' continue new_urls += [u] item.url = url_to_relative(u, url, config) newdoc = htmldata.urljoin(doc, L) newdoc = post_css_transform(newdoc, url, config) return (newdoc, new_urls) def should_follow(url, config): """ Returns a boolean for whether url should be spidered Given that 'url' was linked to from site, return whether 'url' should be spidered as well. """ # False if different domains. if get_domain(config.rooturl) != get_domain(url): return False # False if multiple query fields or parameters found if url.count('&') >= 1 or url.count(';') > 0: return False if any(x in url for x in ('MediaWiki:', 'Special:', 'Image:', 'Talk:', 'User:', 'Help:')): return False if config.no_images and any(url.strip().lower().endswith(suffix) for suffix in ('.jpg', '.gif', '.png', '.ico')): return False # limit_parent support nurl = normalize_url(url) ncurl = normalize_url(config.rooturl) if config.limit_parent and not nurl.startswith(ncurl): L = nurl.split('/') if ('.' not in L[-1]): return False forbidden_parents = ['.php','.html','.htm'] for fp in forbidden_parents: if fp in L[-1]: return False return True def parse_html(doc, url, config): """ Returns (modified_doc, new_urls), where new_urls are absolute URLs for all links we want to spider in the HTML. """ BEGIN_COMMENT_REPLACE = '<BEGINCOMMENT-' + str(random.random()) + '>' END_COMMENT_REPLACE = '<ENDCOMMENT-' + str(random.random()) + '>' new_urls = [] doc = pre_html_transform(doc, url, config) # Temporarily "get rid" of comments so htmldata will find the URLs # in the funky "<!--[if" HTML hackery for IE. doc = doc.replace('<!--', BEGIN_COMMENT_REPLACE) doc = doc.replace('-->', END_COMMENT_REPLACE) L = htmldata.urlextract(doc, url, 'text/html') for item in L: u = item.url follow = should_follow(u, config) if follow: if config.debug: print 'ACCEPTED - ', u # Store url locally. new_urls += [u] item.url = url_to_relative(u, url, config) else: if config.debug: print 'DENIED - ', u newdoc = htmldata.urljoin(doc, L) newdoc = newdoc.replace(BEGIN_COMMENT_REPLACE, '<!--') newdoc = newdoc.replace(END_COMMENT_REPLACE, '-->') newdoc = pos_html_transform(newdoc, config) return (newdoc, new_urls) def run(config, out=sys.stdout): """ Code interface. """ global conn, domain, counter if urlparse.urlparse(config.rooturl)[1].lower().endswith('wikipedia.org'): out.write('Please do not use robots with the Wikipedia site.\n') out.write('Instead, install the Wikipedia database locally and use mw2html on\n') out.write('your local installation. See the Mediawiki site for more information.\n') sys.exit(1) # Number of files saved n = 0 if not config.overwrite and os.path.exists(config.outdir): out.write('Error: Directory exists: ' + str(config.outdir) ) sys.exit(1) #pdb.set_trace() domain = get_domain(config.rooturl) conn = httplib.HTTPConnection(domain) print 'connection established to:', domain complete = set() pending = set([config.rooturl]) while len(pending) > 0: url = pending.pop() if normalize_url(url) in complete: continue complete.add(normalize_url(url)) filename = url_to_filename(url, config) if filename == '': continue if not os.path.exists(filename): print "ERROR: ", url, '\n' continue f = open(filename, 'r') doc = f.read() f.close() new_urls = [] if filename.endswith('.html'): (doc, new_urls) = parse_html(doc, url, config) elif filename.endswith('.css'): (doc, new_urls) = parse_css(doc, url, config) # Enqueue URLs that we haven't yet spidered. for u in new_urls: if normalize_url(u) not in complete: # Strip off any #section link. if '#' in u: u = u[:u.index('#')] pending.add(u) # Save document changes to disk update = False text_ext = ( 'txt', 'html', 'rtf', 'css', 'sgml', 'xml' ) for ext in text_ext: if filename.endswith(ext): update = True break if update: f = open(filename, 'w') f.write(doc) f.close() if config.debug: out.write(url + '\n => ' + filename + '\n\n') n += 1 conn.close() print "connection to",domain,"closed." out.write(str(n) + ' files saved\n') print counter, "httplib requests done" def usage(): """ Print command line options. """ usage_str = """ mw2html url outdir [options] MW2HTML Audacity version Converts an entire Mediawiki site into static HTML. WARNING: This is a recursive robot that ignores robots.txt. Use with care. url - URL of mediawiki page to convert to static HTML. outdir - Output directory. -f, --force - Overwrite existing files in outdir. -d, --debug - Debug mode. -s, --special-mode - -f --no-flatten --limit-parent -l sidebar.html -b footer.html, keeps MediaWiki icon and more design changes. --no-flatten - Do not flatten directory structure. --no-lower - Retain original case for output filenames and dirs. --no-clean - Do not clean up filenames (clean replaces non-alphanumeric chars with _, renames math thumbs). --no-hack-skin - Do not modify skin CSS and HTML for looks. --no-made-by - Suppress "generated by" comment in HTML source. --no-move-href - Disable <movehref> tag. [1] --no-remove-png - Retain external link PNG icons. --no-remove-history - Retain image history and links to information. --no-images - Discard images --limit-parent - Do not explore .php pages outside the url path (outside css, images and other files aren't affected) -l, --left=a.html - Paste HTML fragment file into left sidebar. -t, --top=a.html - Paste HTML fragment file into top horiz bar. -b, --bottom=a.html - Paste HTML fragment file into footer horiz bar. -i, --index=filename - Move given filename in outdir to index.html. Example Usage: mw2html http://127.0.0.1/mywiki/ out -f -i main_page.html -l sidebar.html Freezes wiki into 'out' directory, moves main_page.html => index.html, assumes sidebar.html is defined in the current directory. [1]. The <movehref> tag. Wiki syntax: <html><movehref href="a"></html>...<html></movehref></html>. When enabled, this tag will cause all href= attributes inside of it to be set to the given location. This is useful for linking images. In MediaWiki, for the <html> tag to work, one needs to enable $wgRawHtml and $wgWhitelistEdit in LocalSettings.php. A <movehref> tag with no href field will remove all links inside it. """ print textwrap.dedent(usage_str.strip('\n')) sys.exit(1) def main(): """ Command line interface. """ try: (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'fsdl:t:b:i:', ['force', 'no-flatten', 'no-lower', 'no-clean', 'no-hack-skin', 'no-made-by', 'left=', 'top=', 'bottom=', 'index=', 'no-move-href', 'no-remove-png', 'no-remove-history', 'limit-parent', 'special-mode','debug','no-images']) except getopt.GetoptError: usage() # Parse non-option arguments try: (rooturl, outdir) = args except ValueError: usage() config = Config(rooturl=rooturl, outdir=outdir) # Parse option arguments for (opt, arg) in opts: if opt in ['-f', '--force', '-s', '-special-mode']: config.overwrite = True if opt in ['--no-flatten', '-s', '-special-mode']: config.flatten = False if opt in ['--no-lower']: config.lower = False if opt in ['--no-clean']: config.clean = False if opt in ['--no-hack-skin']: config.hack_skin = False if opt in ['--no-made-by']: config.made_by = False if opt in ['--no-move-href']: config.move_href = False if opt in ['--no-remove-png']: config.remove_png = False if opt in ['--no-remove-history']: config.remove_history = False if opt in ['--no-images']: config.no_images = True if opt in ['--limit-parent', '-s', '-special-mode']: config.limit_parent = True if opt in ['-s', '-special-mode']: config.special_mode = True config.sidebar = 'sidebar.html' config.footer = 'footer.html' if opt in ['-d','--debug']: config.debug = True if opt in ['-l', '--left']: config.sidebar = os.path.abspath(arg) if opt in ['-t', '--top']: raise NotImplementedError config.header = os.path.abspath(arg) if opt in ['-b', '--bottom']: config.footer = os.path.abspath(arg) if opt in ['-i', '--index']: config.index = arg # Run program run(config) if __name__ == '__main__': main() ------------------------------------------------------------------------------ Register Now for Creativity and Technology (CaT), June 3rd, NYC. CaT is a gathering of tech-side developers & brand creativity professionals. Meet the minds behind Google Creative Lab, Visual Complexity, Processing, & iPhoneDevCamp asthey present alongside digital heavyweights like Barbarian Group, R/GA, & Big Spaceship. http://www.creativitycat.com _______________________________________________ Audacity-cvs mailing list Audacity-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/audacity-cvs