Am Dienstag, den 15.05.2007, 09:09 -0600 schrieb fREW:
> possible to wiki syntax.  Could someone send out the script that was
> used to upload pages initially?  It would be helpful to see it so that
> we could set up some translation code in the script.

I adapted the script vimtips.py from the URL Tom posted and the "bulk
page creator" script for mediawiki. The latter is in php and depends on
another php script. Would be nice if someone could translate the 80lines
php-script to python !?

Then I wrote a simple bashscript that calls both scripts.

A problem with bulkinsert.php was that mediawiki uses a Captcha
mechanism when there is a <a href..> in the text. The bash-scripts tries
to detect that and echoes a message.

I attach the three scripts (without any warranty ;-) ). But it would be
better to use a svn repository when we work on it together...

Can we get commit access to
http://vimtips.googlecode.com/svn/trunk/scripts/ ?

Sebastian

Attachment: bulkinsert.php
Description: application/php

#!/usr/bin/python
# -*- coding: utf-8 -*-
# vimtips.py -- parse tips from www.vim.org
# written by Ali Polatel

import os
import sys
import re, urllib2
import commands

__author__ = 'Ali Polatel'
__contributors__ = []

# Globals
URL = 'http://www.vim.org/tips/tip.php?tip_id=%d'
headers= { 'User-Agent': 'vimtips.py' }
href_r = re.compile(r'<a.*?href=["\'](?P<link>.*?)["\']>(?P<text>.*?)</a>',re.IGNORECASE)
             


class comment:
    """ Type for additional comments
    """
    email = None
    date = None
    text = None

def htmltowiki(str):
    """ Take care of html stuffed 
        that's not recognized by wikipedia
    """
    # Remove ^M's
    str = str.replace('\r','')
    
    # Convert href tags
    #m = href_r.findall(str)
    #for tag in m:
        # Add www.vim.org if the link starts with /
        # TODO change this to the corresponding wiki adress of the tip ;)
    #    if tag[0].startswith('/'):
    #        link = 'http://www.vim.org' + tag[0]
    #    str = re.sub(r'<a.*?href=.*?>%s</a>' % tag[1]
    #                 ,'[%s %s]' % (link, tag[1])
    #                 ,str)
    
    # Initial support for html2wiki
    # Very ugly but it does the job well
    fp = open('vimtip_tmp.html','w')
    fp.write(str)
    fp.close()
    status,tmpstr = commands.getstatusoutput('html2wiki --dialect MediaWiki vimtip_tmp.html')
    if status != 0: # html2wiki is not in PATH or it exited with error
        sys.stderr.write('html2wiki not found, skipping conversion\n')
    else:
        str= tmpstr
    os.remove('vimtip_tmp.html')
    return str

def gettip(id):
    """ Get the tip from www.vim.org
    """
    tip_url = URL % id    
    req = urllib2.Request(tip_url, None, headers)
    data = urllib2.urlopen(req)
    return data.read()

def parsetip(tip):
    """ Parse the tip
    """
    
    title_r = re.compile('Tip #\d*\ -\ (.*?)\s:')
    rating_r = re.compile('Rating\s<b>(-?\d+/\d+)')
    body_r = re.compile('<p><code>(.*)</code></p>')
    add_r = re.compile('<td class="lightbg">\n\t\s*\t\s*(?P<email>.*?),\s\n\t\s*\t\s*<.*>\s(?P<date>.*?)</font>')
    comment_r = re.compile('<td><tt>(.*)')

    try:
        title = title_r.search(tip).groups()[0]
    except AttributeError: # Tip doesn't exist
        return None
    rating = rating_r.search(tip).groups()[0]
    body = body_r.search(tip).groups()[0]
    
    # Additional comments
    comments = []
    m = add_r.finditer(tip)
    for match in m:
        new = comment()
        new.email = htmltowiki(match.group('email'))
        new.date = match.group('date')
        comments.append(new)
    
    m = comment_r.finditer(tip)
    index = 0
    for match in m:
        comments[index].text = htmltowiki(match.groups()[0])
        index+=1

    lst = tip.split('\n')
    
    # The lines we want are two lines below these lines
    created_td = '    <td class="prompt">created:</td>'
    complexity_td = '    <td class="prompt">complexity:</td>'
    author_td = '    <td class="prompt">author:</td>'
    version_td = '    <td class="prompt">as of Vim:</td>'

    created = re.sub('\s*<.?td>','',lst[lst.index(created_td)+2])
    complexity = re.sub('\s*<.?td>','',lst[lst.index(complexity_td)+2])
    author = re.sub('\s*<.?td>','',lst[lst.index(author_td)+2])
    version = re.sub('\s*<.?td>','',lst[lst.index(version_td)+2])
    
    if not version: # Version empty
        version = 'n/a'

    return htmltowiki(title), author, created, complexity, version, rating, htmltowiki(body), comments

if __name__ == '__main__':

    if len(sys.argv) != 2:
        print 'Usage %s tip_id' % sys.argv[0]
        sys.exit(-1) 
    id = int(sys.argv[1])
    tip = gettip(id)
    parsedtip = parsetip(tip)
   
    if parsedtip is None:
        print 'No tip with id %d exists on www.vim.org' % id
        sys.exit(-2)
    
    print 'VimTip%d' % id
    print '--ENDTITLE--'
    print '{{Tip2'
    print '|id=%d' % id
    print '|title=%s' % parsedtip[0]
    print '|created=%s' % parsedtip[2]
    print '|complexity=%s' % parsedtip[3]
    print '|author=%s' % parsedtip[1]
    print '|version=%s' % parsedtip[4]
    print '|ratin=%s' % parsedtip[5]
    print '|text='
    print parsedtip[6]
    print '}}'
    # And additional comments
    comment = parsedtip[7]
    index = 0
    while index < len(comment):
        #print '==== Note %d ====' % index
	print '===' + comment[index].email + ' writes on ' + comment[index].date + '==='
        print comment[index].text
        index +=1
    print '<!-- parsed by vimtips.py :-) -->' 
    # End the Page
    print '--ENDPAGE--'

Attachment: posttip.sh
Description: application/shellscript

Reply via email to