Hi Hans, When I did a similar migration (1.5.8 --> 1.8.1) recently. I ran the migration several times in order to make sure I was happy with it, and had various people play with the intermediate results. One thing I didn't like in the conversion scripts was that the default conversion will take the latest version of a page, revise it, and create a new revision. While useful for debugging the migration, I found it caused two problems:
Other items we encountered:
I've attached our patched version of the conversion script for your enjoyment (!). The changes are very small (attached version is our patched version from MoinMoin 1.8.2). I contemplated filing a bug about this change to the conversion script, but I couldn't figure out how to roll this up into a code change that would let people doing the migration choose the approach they want to take. -Eric. Hans-Joachim Ehlers wrote: Hi Im in the happy position to upgrade from 1.5.2 to 1.8.2 and would like to know whether or not the info fromhttp://moinmo.in/HowTo/Migrate%20from%201.5%20to%201.6 is still valid. I run already the ./152_to_1050300.py script to create the ./meta file. BTW: I added a section into http://moinmo.in/MoinMoinPackages about howto build a package for OpenSuse ( tested for 10.3) tia Hajo ------------------------------------------------------------------------------ Stay on top of everything new and different, both inside and around Java (TM) technology - register by April 22, and save $200 on the JavaOne (SM) conference, June 2-5, 2009, San Francisco. 300 plus technical and hands-on sessions. Register today. Use priority code J9JMT32. http://p.sf.net/sfu/p _______________________________________________ Moin-user mailing list Moin-user@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/moin-user |
# -*- coding: iso-8859-1 -*- """ MoinMoin - migration from 1.5.8 to 1.6.0 (creole link style) What it does: a) reverse underscore == blank stuff in pagenames (introducing this was a fault) pagename quoted pagename ----------------------------------------------------- old MainPage/Sub_Page MainPage(2f)Sub_Page new MainPage/Sub Page MainPage(2f)Sub(20)Page or new MainPage/Sub_Page MainPage(2f)Sub_Page (user has to decide by editing rename1.txt) markup ---------------------------------------------------- old MoinMoin:MainPage/Sub_Page ../Sub_Page2 new [[MoinMoin:MainPage/Sub Page]] [[../Sub Page2]] b) decode url encoded chars in attachment names (and quote the whole fname): markup ---------------------------------------------------- old attachment:file%20with%20blanks.txt new [[attachment:file with blanks.txt]] c) users: move bookmarks from separate files into user profile d) users: generate new name[] for lists and name{} for dicts e) kill all */MoinEditorBackup pages (replaced by drafts functionality) @copyright: 2007 by Thomas Waldmann @license: GNU GPL, see COPYING for details. """ import os.path import re import time import codecs, urllib, glob from MoinMoin import config, wikiutil from MoinMoin.script.migration.migutil import opj, listdir, copy_file, move_file, copy_dir import mimetypes # this MUST be after wikiutil import! from _conv160_wiki import convert_wiki create_rev = False # Do not create a new revision of each document. def markup_converter(request, pagename, text, renames): """ Convert the <text> content of page <pagename>, using <renames> dict to rename links correctly. Additionally, convert some changed markup. """ if text.startswith('<?xml'): # would be done with xslt processor return text pis, body = wikiutil.get_processing_instructions(text) for pi, val in pis: if pi == 'format' and val != 'wiki': # not wiki page return text text = convert_wiki(request, pagename, text, renames) return text class EventLog: def __init__(self, request, fname): self.request = request self.fname = fname self.data = None self.renames = {} def read(self): """ read complete event-log from disk """ data = [] try: lineno = 0 f = file(self.fname, 'r') for line in f: lineno += 1 line = line.replace('\r', '').replace('\n', '') if not line.strip(): # skip empty lines continue fields = line.split('\t') try: timestamp, action, kvpairs = fields[:3] timestamp = int(timestamp) kvdict = wikiutil.parseQueryString(kvpairs) data.append((timestamp, action, kvdict)) except ValueError, err: # corrupt event log line, log error and skip it print "Error: invalid event log (%s) line %d, err: %s, SKIPPING THIS LINE!" % (self.fname, lineno, str(err)) f.close() except IOError, err: # no event-log pass self.data = data def write(self, fname): """ write complete event-log to disk """ if self.data: f = file(fname, 'w') for timestamp, action, kvdict in self.data: pagename = kvdict.get('pagename') if pagename and ('PAGE', pagename) in self.renames: kvdict['pagename'] = self.renames[('PAGE', pagename)] kvpairs = wikiutil.makeQueryString(kvdict, want_unicode=False) fields = str(timestamp), action, kvpairs line = '\t'.join(fields) + '\n' f.write(line) f.close() def copy(self, destfname, renames): self.renames = renames self.read() self.write(destfname) class EditLog: def __init__(self, request, fname): self.request = request self.fname = fname self.data = None self.renames = {} def read(self): """ read complete edit-log from disk """ data = {} try: f = file(self.fname, 'r') for line in f: line = line.replace('\r', '').replace('\n', '') if not line.strip(): # skip empty lines continue fields = line.split('\t') + [''] * 9 timestamp, rev, action, pagename, ip, hostname, userid, extra, comment = fields[:9] timestamp = int(timestamp) rev = int(rev) pagename = wikiutil.unquoteWikiname(pagename) data[(timestamp, rev, pagename)] = (timestamp, rev, action, pagename, ip, hostname, userid, extra, comment) f.close() except IOError, err: # no edit-log pass self.data = data def write(self, fname, deleted=False): """ write complete edit-log to disk """ if self.data: editlog = self.data.items() editlog.sort() f = file(fname, "w") max_rev = 0 for key, fields in editlog: timestamp, rev, action, pagename, ip, hostname, userid, extra, comment = fields if action.startswith('ATT'): try: fname = urllib.unquote(extra).decode('utf-8') except UnicodeDecodeError: fname = urllib.unquote(extra).decode('iso-8859-1') if ('FILE', pagename, fname) in self.renames: fname = self.renames[('FILE', pagename, fname)] extra = urllib.quote(fname.encode('utf-8')) if ('PAGE', pagename) in self.renames: pagename = self.renames[('PAGE', pagename)] timestamp = str(timestamp) if rev != 99999999: max_rev = max(rev, max_rev) revstr = '%08d' % rev pagename = wikiutil.quoteWikinameFS(pagename) fields = timestamp, revstr, action, pagename, ip, hostname, userid, extra, comment log_str = '\t'.join(fields) + '\n' f.write(log_str) if create_rev and not deleted: timestamp = str(wikiutil.timestamp2version(time.time())) revstr = '%08d' % (max_rev + 1) action = 'SAVE' ip = '127.0.0.1' hostname = 'localhost' userid = '' extra = '' comment = "converted to 1.6 markup" fields = timestamp, revstr, action, pagename, ip, hostname, userid, extra, comment log_str = '\t'.join(fields) + '\n' f.write(log_str) f.close() def copy(self, destfname, renames, deleted=False): self.renames = renames self.read() self.write(destfname, deleted) class PageRev: """ a single revision of a page """ def __init__(self, request, pagename, rev_dir, rev): self.request = request self.pagename = pagename self.rev_dir = rev_dir self.rev = rev def read(self): fname = opj(self.rev_dir, '%08d' % self.rev) f = file(fname, "rb") data = f.read() f.close() data = data.decode(config.charset) return data def write(self, data, rev_dir, convert, rev=None): if rev is None: rev = self.rev if convert: data = markup_converter(self.request, self.pagename, data, self.renames) fname = opj(rev_dir, '%08d' % rev) data = data.encode(config.charset) f = file(fname, "wb") f.write(data) f.close() def copy(self, rev_dir, renames, convert=False, new_rev=None): self.renames = renames data = self.read() self.write(data, rev_dir, convert, new_rev) class Attachment: """ a single attachment """ def __init__(self, request, attach_dir, attfile): self.request = request self.path = opj(attach_dir, attfile) self.name = attfile.decode('utf-8', 'replace') def copy(self, attach_dir): """ copy attachment file from orig path to new destination """ attfile = self.name.encode('utf-8') dest = opj(attach_dir, attfile) copy_file(self.path, dest) class Page: """ represents a page with all related data """ def __init__(self, request, pages_dir, qpagename): self.request = request self.name = wikiutil.unquoteWikiname(qpagename) self.name_old = self.name # renaming: still original name when self.name has the new name self.page_dir = opj(pages_dir, qpagename) self.current = None # int current self.editlog = None # dict (see read_editlog) self.revlist = None # list of ints (page text revisions) self.revisions = None # dict int: pagerev obj self.attachments = None # dict of unicode fname: full path self.renames = {} # info for renaming pages/attachments def read(self): """ read a page, including revisions, log, attachments from disk """ page_dir = self.page_dir # read current file current_fname = opj(page_dir, 'current') if os.path.exists(current_fname): current_file = file(current_fname, "r") current_rev = current_file.read() current_file.close() try: self.current = int(current_rev) except ValueError: print "Error: invalid current file %s, SKIPPING THIS PAGE!" % current_fname return # read edit-log editlog_fname = opj(page_dir, 'edit-log') if os.path.exists(editlog_fname): self.editlog = EditLog(self.request, editlog_fname) # read page revisions rev_dir = opj(page_dir, 'revisions') if os.path.exists(rev_dir): revlist = listdir(rev_dir) revlist = [int(rev) for rev in revlist] revlist.sort() self.revlist = revlist self.revisions = {} for rev in revlist: self.revisions[rev] = PageRev(self.request, self.name_old, rev_dir, rev) # set deleted status self.is_deleted = not self.revisions or self.current not in self.revisions # read attachment filenames attach_dir = opj(page_dir, 'attachments') if os.path.exists(attach_dir): self.attachments = {} attlist = listdir(attach_dir) for attfile in attlist: a = Attachment(self.request, attach_dir, attfile) self.attachments[a.name] = a def write(self, pages_dir): """ write a page, including revisions, log, attachments to disk """ if ('PAGE', self.name) in self.renames: name_new = self.renames[('PAGE', self.name)] if name_new != self.name: print "Renaming page %r -> %r" % (self.name, name_new) self.name_old = self.name self.name = name_new qpagename = wikiutil.quoteWikinameFS(self.name) page_dir = opj(pages_dir, qpagename) os.makedirs(page_dir) # write current file current = self.current if current is not None: if create_rev and not self.is_deleted: current += 1 current_fname = opj(page_dir, 'current') current_file = file(current_fname, "w") current_str = '%08d\n' % current current_file.write(current_str) current_file.close() # copy edit-log if self.editlog is not None: editlog_fname = opj(page_dir, 'edit-log') self.editlog.copy(editlog_fname, self.renames, deleted=self.is_deleted) # copy page revisions if self.revisions is not None: rev_dir = opj(page_dir, 'revisions') os.makedirs(rev_dir) for rev in self.revlist: if create_rev: self.revisions[rev].copy(rev_dir, self.renames) else: # if int(rev) == self.current: self.revisions[rev].copy(rev_dir, self.renames, convert=True) # else: # self.revisions[rev].copy(rev_dir, self.renames) if create_rev and not self.is_deleted: self.revisions[rev].copy(rev_dir, self.renames, convert=True, new_rev=rev+1) # copy attachments if self.attachments is not None: attach_dir = opj(page_dir, 'attachments') os.makedirs(attach_dir) for fn, att in self.attachments.items(): # we have to check for renames here because we need the (old) pagename, too: if ('FILE', self.name_old, fn) in self.renames: fn_new = self.renames[('FILE', self.name_old, fn)] if fn_new != fn: print "Renaming file %r %r -> %r" % (self.name_old, fn, fn_new) att.name = fn_new att.copy(attach_dir) def copy(self, pages_dir, renames): self.renames = renames self.read() self.write(pages_dir) class User: """ represents a user with all related data """ def __init__(self, request, users_dir, uid): self.request = request self.uid = uid self.users_dir = users_dir self.profile = None self.bookmarks = None def read(self): """ read profile and bookmarks data from disk """ self.profile = {} fname = opj(self.users_dir, self.uid) # read user profile f = codecs.open(fname, 'r', config.charset) for line in f: line = line.replace(u'\r', '').replace(u'\n', '') if not line.strip() or line.startswith(u'#'): # skip empty or comment lines continue try: key, value = line.split(u'=', 1) except Exception, err: print "Error: User reader can not parse line %r from profile %r (%s)" % (line, fname, str(err)) continue self.profile[key] = value f.close() # read bookmarks self.bookmarks = {} fname_pattern = opj(self.users_dir, "%s.*.bookmark" % self.uid) for fname in glob.glob(fname_pattern): f = file(fname, "r") bookmark = f.read() f.close() wiki = fname.replace('.bookmark', '').replace(opj(self.users_dir, self.uid+'.'), '') self.bookmarks[wiki] = int(bookmark) # don't care about trail def write(self, users_dir): """ write profile and bookmarks data to disk """ fname = opj(users_dir, self.uid) f = codecs.open(fname, 'w', config.charset) for key, value in self.profile.items(): if key in (u'subscribed_pages', u'quicklinks'): pages = value.split(u'\t') for i in range(len(pages)): pagename = pages[i] try: interwiki, pagename = pagename.split(u':', 1) except: interwiki, pagename = u'Self', pagename if interwiki == u'Self' or interwiki == self.request.cfg.interwikiname: if ('PAGE', pagename) in self.renames: pagename = self.renames[('PAGE', pagename)] pages[i] = u'%s:%s' % (interwiki, pagename) key += '[]' # we have lists here value = u'\t'.join(pages) f.write(u"%s=%s\n" % (key, value)) else: f.write(u"%s=%s\n" % (key, value)) bookmark_entries = [u'%s:%s' % item for item in self.bookmarks.items()] key = u"bookmarks{}" value = u'\t'.join(bookmark_entries) f.write(u"%s=%s\n" % (key, value)) f.close() # don't care about trail def copy(self, users_dir, renames): self.renames = renames self.read() self.write(users_dir) class DataConverter(object): def __init__(self, request, src_data_dir, dest_data_dir): self.request = request self.sdata = src_data_dir self.ddata = dest_data_dir self.pages = {} self.users = {} self.complete = {} self.renames = {} self.complete_fname = opj(self.sdata, 'complete.txt') self.rename_fname1 = opj(self.sdata, 'rename1.txt') self.rename_fname2 = opj(self.sdata, 'rename2.txt') def pass1(self): """ First create the rename list - the user has to review/edit it as we can't decide about page/attachment names automatically. """ self.read_src() # pages for pn, p in self.pages.items(): p.read() if not p.revisions: continue # we don't care for pages with no revisions (trash) if pn.endswith('/MoinEditorBackup'): continue # we don't care for old editor backups self.complete[('PAGE', pn)] = None if "_" in pn: # log all pagenames with underscores self.renames[('PAGE', pn)] = None if p.attachments is not None: for fn in p.attachments: try: fn_str = fn.encode('ascii') log = False # pure ascii filenames are no problem except UnicodeEncodeError: log = True # this file maybe has a strange representation in wiki markup else: if ' ' in fn_str or '%' in fn_str: # files with blanks need quoting log = True self.complete[('FILE', pn, fn)] = None if log: # log all strange attachment filenames fn_str = fn.encode('utf-8') self.renames[('FILE', pn, fn)] = None self.save_list(self.complete_fname, self.complete) self.save_list(self.rename_fname1, self.renames) LIST_FIELDSEP = u'|' # in case | makes trouble, one can use \t tab char def save_list(self, fname, what): what_sorted = what.keys() # make sure we have 3-tuples: what_sorted = [(k + (None, ))[:3] for k in what_sorted] # we only have python 2.3, thus no cmp keyword for the sort() call, # thus we need to do it the more complicated way: what_sorted = [(pn, fn, rtype) for rtype, pn, fn in what_sorted] # shuffle what_sorted.sort() # sort what_sorted = [(rtype, pn, fn) for pn, fn, rtype in what_sorted] # shuffle f = codecs.open(fname, 'w', 'utf-8') for rtype, pn, fn in what_sorted: if rtype == 'PAGE': line = (rtype, pn, pn) elif rtype == 'FILE': line = (rtype, pn, fn, fn) line = self.LIST_FIELDSEP.join(line) f.write(line + u'\n') f.close() def load_list(self, fname, what): f = codecs.open(fname, 'r', 'utf-8') for line in f: line = line.rstrip() if not line: continue t = line.split(self.LIST_FIELDSEP) rtype, p1, p2, p3 = (t + [None]*3)[:4] if rtype == u'PAGE': what[(str(rtype), p1)] = p2 elif rtype == u'FILE': what[(str(rtype), p1, p2)] = p3 f.close() def pass2(self): """ Second, read the (user edited) rename list and do the renamings everywhere. """ self.read_src() #self.load_list(self.complete_fname, self.complete) self.load_list(self.rename_fname2, self.renames) self.write_dest() def read_src(self): # create Page objects in memory pages_dir = opj(self.sdata, 'pages') pagelist = listdir(pages_dir) for qpagename in pagelist: p = Page(self.request, pages_dir, qpagename) self.pages[p.name] = p # create User objects in memory users_dir = opj(self.sdata, 'user') user_re = re.compile(r'^\d+\.\d+(\.\d+)?$') userlist = listdir(users_dir) userlist = [f for f in userlist if user_re.match(f)] for userid in userlist: u = User(self.request, users_dir, userid) self.users[u.uid] = u # create log objects in memory self.editlog = EditLog(self.request, opj(self.sdata, 'edit-log')) self.eventlog = EventLog(self.request, opj(self.sdata, 'event-log')) def write_dest(self): self.init_dest() # copy pages pages_dir = opj(self.ddata, 'pages') for pn, page in self.pages.items(): if pn.endswith('/MoinEditorBackup'): continue # we don't care for old editor backups page.copy(pages_dir, self.renames) # copy users users_dir = opj(self.ddata, 'user') for user in self.users.values(): user.copy(users_dir, self.renames) # copy logs self.editlog.copy(opj(self.ddata, 'edit-log'), self.renames) self.eventlog.copy(opj(self.ddata, 'event-log'), self.renames) def init_dest(self): try: os.makedirs(self.ddata) except: pass os.makedirs(opj(self.ddata, 'pages')) os.makedirs(opj(self.ddata, 'user')) copy_dir(opj(self.sdata, 'plugin'), opj(self.ddata, 'plugin')) copy_file(opj(self.sdata, 'intermap.txt'), opj(self.ddata, 'intermap.txt'))
------------------------------------------------------------------------------ Stay on top of everything new and different, both inside and around Java (TM) technology - register by April 22, and save $200 on the JavaOne (SM) conference, June 2-5, 2009, San Francisco. 300 plus technical and hands-on sessions. Register today. Use priority code J9JMT32. http://p.sf.net/sfu/p
_______________________________________________ Moin-user mailing list Moin-user@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/moin-user