http://www.mediawiki.org/wiki/Special:Code/MediaWiki/99655
Revision: 99655
Author: ariel
Date: 2011-10-12 23:24:40 +0000 (Wed, 12 Oct 2011)
Log Message:
-----------
initial checkin of adds/changes dumps
Added Paths:
-----------
branches/ariel/xmldumps-backup/incrementals/
branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
branches/ariel/xmldumps-backup/incrementals/README.config
branches/ariel/xmldumps-backup/incrementals/README.txt
branches/ariel/xmldumps-backup/incrementals/all.dblist
branches/ariel/xmldumps-backup/incrementals/closed.dblist
branches/ariel/xmldumps-backup/incrementals/dumpincr.conf.sample
branches/ariel/xmldumps-backup/incrementals/generateincrementals.py
branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py
branches/ariel/xmldumps-backup/incrementals/incrmonitor
branches/ariel/xmldumps-backup/incrementals/incrmonitor.py
branches/ariel/xmldumps-backup/incrementals/incrs-index.html
branches/ariel/xmldumps-backup/incrementals/private.dblist
Added: branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
(rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py 2011-10-12
23:24:40 UTC (rev 99655)
@@ -0,0 +1,390 @@
+# shared classes for incrementals
+import os
+import sys
+import re
+import ConfigParser
+import WikiDump
+from WikiDump import FileUtils, TimeUtils, MiscUtils
+from os.path import exists
+import socket
+import subprocess
+from subprocess import Popen, PIPE
+
+class ContentFile(object):
+ def __init__(self, config, date, wikiName):
+ self._config = config
+ self.date = date
+ self.incrDir = IncrementDir(self._config, date)
+ self.wikiName = wikiName
+
+ # override this.
+ def getFileName(self):
+ return "content.txt"
+
+ def getPath(self):
+ return
os.path.join(self.incrDir.getIncDir(self.wikiName),self.getFileName())
+
+ def getFileInfo(self):
+ return FileUtils.fileInfo(self.getPath())
+
+class MaxRevIDFile(ContentFile):
+ def getFileName(self):
+ return "maxrevid.txt"
+
+class StubFile(ContentFile):
+ def getFileName(self):
+ return "%s-%s-stubs-meta-hist-incr.xml.gz" % ( self.wikiName,
self.date )
+
+class RevsFile(ContentFile):
+ def getFileName(self):
+ return "%s-%s-pages-meta-hist-incr.xml.bz2" % ( self.wikiName,
self.date )
+
+class StatusFile(ContentFile):
+ def getFileName(self):
+ return "status.txt"
+
+ def getPath(self, date = None):
+ return os.path.join(self.incrDir.getIncDir(self.wikiName,
date),self.getFileName())
+
+class LockFile(ContentFile):
+ def getFileName(self):
+ return "%s-%s.lock" % ( self.wikiName, self.date )
+
+ def getPath(self):
+ return
os.path.join(self.incrDir.getIncDirNoDate(self.wikiName),self.getFileName())
+
+class MaxRevIDLockFile(LockFile):
+ def getFileName(self):
+ return "%s-%s-maxrevid.lock" % ( self.wikiName, self.date )
+
+class IncrDumpLockFile(LockFile):
+ def getFileName(self):
+ return "%s-%s-incrdump.lock" % ( self.wikiName, self.date )
+
+class MD5File(ContentFile):
+ def getFileName(self):
+ return "%s-%s-md5sums.txt" % ( self.wikiName, self.date )
+
+class IndexFile(ContentFile):
+ def __init__(self, config):
+ self._config = config
+ self.incrDir = IncrementDir(self._config)
+
+ def getFileName(self):
+ return "index.html"
+
+ def getPath(self):
+ return os.path.join(self.incrDir.getIncDirBase(),self.getFileName())
+
+class StatusInfo(object):
+ def __init__(self, config, date, wikiName):
+ self._config = config
+ self.date = date
+ self.wikiName = wikiName
+ self.statusFile = StatusFile(self._config, self.date, self.wikiName)
+
+ def getStatus(self, date = None):
+ if exists(self.statusFile.getPath(date)):
+ status = FileUtils.readFile(self.statusFile.getPath(date)).rstrip()
+ if status == "done":
+ return True
+ return False
+
+ def setStatus(self, status):
+ FileUtils.writeFileInPlace(self.statusFile.getPath(),status,
self._config.fileperms)
+
+class Lock(object):
+ def __init__(self, config, date, wikiName):
+ self._config = config
+ self.date = date
+ self.wikiName = wikiName
+ self.lockFile = LockFile(self._config, self.date, self.wikiName)
+
+ def isLocked(self):
+ return exists(self.lockFile.getPath())
+
+ def getLock(self):
+ try:
+ if not exists(self._config.incrementalsDir):
+ os.makedirs(self._config.incrementalsDir)
+ f = FileUtils.atomicCreate(self.lockFile.getPath(), "w")
+ f.write("%s %d" % (socket.getfqdn(), os.getpid()))
+ f.close()
+ return True
+ except:
+ return False
+
+ def unlock(self):
+ os.remove(self.lockFile.getPath())
+
+ def getLockInfo(self):
+ try:
+ timestamp = os.stat(self.lockFile.getPath()).st_mtime
+ return time.strftime("%Y-%m-%d %H:%M:%S",timestamp)
+ except:
+ return None
+
+class IncrDumpLock(Lock):
+ def __init__(self, config, date, wikiName):
+ self._config = config
+ self.date = date
+ self.wikiName = wikiName
+ self.lockFile = IncrDumpLockFile(self._config, self.date,
self.wikiName)
+
+class MaxRevIDLock(Lock):
+ def __init__(self,config, date, wikiName):
+ self._config = config
+ self.date = date
+ self.wikiName = wikiName
+ self.lockFile = MaxRevIDLockFile(self._config, self.date,
self.wikiName)
+
+class Config(object):
+ def __init__(self, configFile=False):
+ self.projectName = False
+
+ home = os.path.dirname(sys.argv[0])
+ if (not configFile):
+ configFile = "dumpincr.conf"
+ self.files = [
+ os.path.join(home,configFile),
+ "/etc/dumpincrementals.conf",
+ os.path.join(os.getenv("HOME"), ".dumpincr.conf")]
+ defaults = {
+ #"wiki": {
+ "allwikislist": "",
+ "privatewikislist": "",
+ "closedwikislist": "",
+ #"output": {
+ "incrementalsdir": "/dumps/public/incr",
+ "templatedir": home,
+ "temp":"/dumps/temp",
+ "webroot": "http://localhost/dumps/incr",
+ "fileperms": "0640",
+ "delay": "43200",
+ #"database": {
+ "user": "root",
+ "password": "",
+ #"tools": {
+ "mediawiki" : "",
+ "php": "/bin/php",
+ "gzip": "/usr/bin/gzip",
+ "bzip2": "/usr/bin/bzip2",
+ "mysql": "/usr/bin/mysql",
+ "checkforbz2footer": "/usr/local/bin/checkforbz2footer",
+ "writeuptopageid": "/usr/local/bin/writeuptopageid",
+ "multiversion": "",
+ #"cleanup": {
+ "keep": "3",
+ }
+
+ self.conf = ConfigParser.SafeConfigParser(defaults)
+ self.conf.read(self.files)
+
+ if not self.conf.has_section("wiki"):
+ print "The mandatory configuration section 'wiki' was not defined."
+ raise ConfigParser.NoSectionError('wiki')
+
+ if not self.conf.has_option("wiki","mediawiki"):
+ print "The mandatory setting 'mediawiki' in the section 'wiki' was
not defined."
+ raise ConfigParser.NoOptionError('wiki','mediawiki')
+
+ self.parseConfFile()
+
+ def parseConfFile(self):
+ self.mediawiki = self.conf.get("wiki", "mediawiki")
+ self.allWikisList = MiscUtils.dbList(self.conf.get("wiki",
"allwikislist"))
+ self.privateWikisList = MiscUtils.dbList(self.conf.get("wiki",
"privatewikislist"))
+ self.closedWikisList = MiscUtils.dbList(self.conf.get("wiki",
"closedwikislist"))
+
+ if not self.conf.has_section('output'):
+ self.conf.add_section('output')
+ self.incrementalsDir = self.conf.get("output", "incrementalsdir")
+ self.tempDir = self.conf.get("output", "temp")
+ self.templateDir = self.conf.get("output", "templateDir")
+ self.webRoot = self.conf.get("output", "webroot")
+ self.fileperms = self.conf.get("output", "fileperms")
+ self.fileperms = int(self.fileperms,0)
+ self.delay = self.conf.get("output", "delay")
+ self.delay = int(self.delay,0)
+
+ if not self.conf.has_section('tools'):
+ self.conf.add_section('tools')
+ self.php = self.conf.get("tools", "php")
+ self.gzip = self.conf.get("tools", "gzip")
+ self.bzip2 = self.conf.get("tools", "bzip2")
+ self.mysql = self.conf.get("tools", "mysql")
+ self.checkforbz2footer = self.conf.get("tools","checkforbz2footer")
+ self.writeuptopageid = self.conf.get("tools","writeuptopageid")
+ self.multiversion = self.conf.get("tools","multiversion")
+
+ if not self.conf.has_section('cleanup'):
+ self.conf.add_section('cleanup')
+ self.keep = self.conf.getint("cleanup", "keep")
+
+ if not self.conf.has_section('database'):
+ self.conf.add_section('database')
+ self.dbUser = self.conf.get("database", "user")
+ self.dbPassword = self.conf.get("database", "password")
+
+ def readTemplate(self, name):
+ template = os.path.join(self.templateDir, name)
+ return FileUtils.readFile(template)
+
+class RunSimpleCommand(object):
+ def runWithOutput(command, maxtries = 3, shell=False):
+ """Run a command and return the output as a string.
+ Raises IncrementDumpsError on non-zero return code."""
+ success = False
+ tries = 0
+ while (not success and tries < maxtries):
+ proc = Popen(command, shell = shell, stdout = PIPE, stderr = PIPE)
+ output, error = proc.communicate()
+ if not proc.returncode:
+ success = True
+ tries = tries + 1
+ if not success:
+ if type(command).__name__=='list':
+ commandString = " ".join(command)
+ else:
+ commandString = command
+ if proc:
+ raise IncrementDumpsError("command '" + commandString + ( "'
failed with return code %s " % proc.returncode ) + " and error '" + error + "'")
+ else:
+ raise IncrementDumpsError("command '" + commandString + ( "'
failed" ) + " and error '" + error + "'")
+ return output
+
+ def runWithNoOutput(command, maxtries = 3, shell=False):
+ """Run a command, expecting no output.
+ Raises IncrementDumpsError on non-zero return code."""
+ success = False
+ tries = 0
+ while ((not success) and tries < maxtries):
+ proc = Popen(command, shell = shell, stderr = PIPE)
+ # output will be None, we can ignore it
+ output, error = proc.communicate()
+ if not proc.returncode:
+ success = True
+ tries = tries + 1
+ if not success:
+ if type(command).__name__=='list':
+ commandString = " ".join(command)
+ else:
+ commandString = command
+ raise IncrementDumpsError("command '" + commandString + ( "'
failed with return code %s " % proc.returncode ) + " and error '" + error + "'")
+
+ runWithOutput = staticmethod(runWithOutput)
+ runWithNoOutput = staticmethod(runWithNoOutput)
+
+class MultiVersion(object):
+ def MWScriptAsString(config, maintenanceScript):
+ return(" ".join(MultiVersion.MWScriptAsArray(config,
maintenanceScript)))
+
+ def MWScriptAsArray(config, maintenanceScript):
+ if config.multiversion != "":
+ if exists(config.multiversion):
+ return [ config.multiversion, maintenanceScript ]
+ return [ "%s/maintenance/%s" % (config.mediawiki, maintenanceScript) ]
+
+ MWScriptAsString = staticmethod(MWScriptAsString)
+ MWScriptAsArray = staticmethod(MWScriptAsArray)
+
+class DBServer(object):
+ def __init__(self, config, wikiName):
+ self.config = config
+ self.wikiName = wikiName
+ self.dbServer = self.defaultServer()
+
+ def defaultServer(self):
+ if (not exists( self.config.php ) ):
+ raise BackupError("php command %s not found" % self.config.php)
+ commandList = MultiVersion.MWScriptAsArray(self.config,
"getSlaveServer.php")
+ command = [ self.config.php, "-q" ]
+ command.extend(commandList)
+ command.extend( [ "--wiki=%s" % self.wikiName, "--group=dump" ])
+ return RunSimpleCommand.runWithOutput(command, shell=False).rstrip()
+
+ def buildSqlCommand(self, query):
+ """Put together a command to execute an sql query to the server for
this DB."""
+ if (not exists( self.config.mysql ) ):
+ raise BackupError("mysql command %s not found" % self.config.mysql)
+ command = "/bin/echo '%s' | %s -h %s -u %s " % ( query,
self.config.mysql, self.dbServer, self.config.dbUser )
+ if self.config.dbPassword != "":
+ command = command + "-p" + self.config.dbPassword
+ command = command + " -r --silent " + self.wikiName
+ return command
+
+class IncrementDumpsError(Exception):
+ pass
+
+class IncrementDir(object):
+ def __init__(self, config, date = None):
+ self._config = config
+ self.date = date
+
+ def getIncDirBase(self):
+ return self._config.incrementalsDir
+
+ def getIncDirNoDate(self, wikiName):
+ return os.path.join(self.getIncDirBase(), wikiName)
+
+ def getIncDir(self, wikiName, date = None):
+ if (date == None):
+ return os.path.join(self.getIncDirBase(), wikiName, self.date)
+ else:
+ return os.path.join(self.getIncDirBase(), wikiName, date)
+
+class IncrementDumpsError(Exception):
+ pass
+
+class IncDumpDirs(object):
+ def __init__(self, config, wikiName):
+ self._config = config
+ self.wikiName = wikiName
+ self.incrDir = IncrementDir(self._config)
+
+ def getIncDumpDirs(self):
+ base = self.incrDir.getIncDirNoDate(self.wikiName)
+ digits = re.compile(r"^\d{4}\d{2}\d{2}$")
+ dates = []
+ try:
+ for dir in os.listdir(base):
+ if digits.match(dir):
+ dates.append(dir)
+ except OSError:
+ return []
+ dates.sort()
+ return dates
+
+ def cleanupOldIncrDumps(self, date):
+ old = self.getIncDumpDirs()
+ if old:
+ if old[-1] == date:
+ old = old[:-1]
+ if self._config.keep > 0:
+ old = old[:-(self._config.keep)]
+ for dump in old:
+ toRemove =
os.path.join(self.incrDir.getIncDirNoDate(self.wikiName), dump)
+ shutil.rmtree("%s" % toRemove)
+
+ def getPrevIncrDate(self, date):
+ # find the most recent incr dump before the
+ # specified date that completed successfully
+ previous = None
+ old = self.getIncDumpDirs()
+ if old:
+ for dump in old:
+ if dump == date:
+ return previous
+ else:
+ statusInfo = StatusInfo(self._config, dump, self.wikiName)
+ if statusInfo.getStatus(dump) == "done":
+ previous = dump
+ return previous
+
+ def getLatestIncrDate(self):
+ # find the most recent incr dump
+ dirs = self.getIncDumpDirs()
+ if dirs:
+ return(dirs[-1])
+ else:
+ return(None)
Property changes on: branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
___________________________________________________________________
Added: svn:eol-style
+ native
Added: branches/ariel/xmldumps-backup/incrementals/README.config
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/README.config
(rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/README.config 2011-10-12
23:24:40 UTC (rev 99655)
@@ -0,0 +1,38 @@
+By default, all configuration options are read from the file "dumpincr.conf"
in the current directory.
+A different filename may be specified at run time.
+
+The following configuration options are accepted:
+
+In the "wiki" section,
+mediawiki -- full path to the directory of the MediaWiki installation
+allwikislist -- full path to a list of all projects to be dumped, as they
appear in MySql
+privatewikislist -- full path to a list of all projects that are private and
hence should not be dumped, if any
+closedwikislist -- full path to a list of all projects that are closed and
hence should not be dumped, if any
+
+In the "output" section,
+incrementalsdir -- full path to the top level directory where adds/changes
dumps will be written; this should
+ be web-accessible
+templatedir -- full path to the directory containing template html files
such as incrs-index.html (typically
+ the same directory as that which contains the dump scripts)
+temp -- full path to a directory which is used to the generation
of temporary files; this should
+ not be web-accessible
+webroot -- url to top level directory with the main index page, for
example http://localhost/mydumps
+fileperms -- read and write permissions that will be assigned to
created files; this is in octal four-digit
+ format, for example 0644
+delay -- number of seconds to wait after a max rev_id has been
recorded, before dumping revisions
+
+In the "database" section,
+user -- the name of a database user with read access to all tables in the
databases
+ which will be dumped
+password -- the password for the above user
+
+In the "tools" section,
+php -- the full path to the php command
+mysql -- the full path to the mysql command
+gzip -- the full path to the gzip command
+bzip2 -- the full path to the bzip2 command
+checkforbz2footer -- the full path to the checkforbz2footer command
+writeuptopageid -- the full path to the writeuptopageid command
+
+In the "cleanup" section,
+keep -- the number of old dumps to keep, per project.
Added: branches/ariel/xmldumps-backup/incrementals/README.txt
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/README.txt
(rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/README.txt 2011-10-12
23:24:40 UTC (rev 99655)
@@ -0,0 +1,66 @@
+The adds/changes dumps are a supplementary set of dumps intended to accompany
+the regular XML dump files.
+
+The adds/changes dumps are produced in two stages.
+
+In stage one, the max rev_id value at the time of the run is written out to a
file for each project for the given date. Script name: generatemaxrevids.py
+
+In stage two, intended to be run at a later time, a stub file containing all
+revisions from the previous adds/changes dump through the max rev_id just
+written. This file is sorted by page id, just as the regular XML stubs files
+are. Next a history file containing metadata and page text for those
+revisions is written, in the same format as the pages-meta-history file
+generated for the regular XML dumps. A status file is written to indicate
+that the job is done, and the md5sums of the stub and revision text files
+is written to a file as well. Script name: generateincrementals.py
+
+The reason that there are two stages run via two separate scripts is that
+you may want to allow editors time to delete or hide sensitive or offensive
+material newly entered. A delay of an arbitrary number of seconds between
+the recording of the max rev_id to dump and the start of the stub and
+revision text dump is configurable in the configuration file; see
+README.config for information on that.
+
+Installation:
+
+Seriously? You want to install this already? This is version 0.0.1. Know
+what that means? It's buggy, risky, and could eat your data.
+
+However, if you just want to play around with it on your laptop, fine.
+* Put the files generateincrementals.py, generatemaxrevids.py, incrmonitor.py,
+ incrmonitor and IncrDumpLib.py together with the sample configuration file
+ dumpincr.conf into a directory from which the job will run.
+ Make sure you have a copy or a symlink of WikiDump.py from the regular XML
+ dumps in this same directory.
+ Also make sure you have a template for the top level index.html file, called
+ "incrs-index.html" in the same directory with these scripts. See the
existing
+ incrs-index.html file for the format; the key here is that you want the
+ string "%(items)s" in between <ul> and </ul> tags. The status of the dump
+ for each wiki, along with links to the stub and revisions files, will be
+ included as a list item in that spot in the file.
+* See README.config for information on the various options in the config file.
+* Create the top level directory underneath which there will be a directory
+ for each project you want to generate additions/changes. You needn't create
+ the subdirectories, this will be done for you at run time.
+* Do a test run; run generatemaxrevids.py by hand. Then look in the top level
+ directory you created earlier. Is there a directory for each project? Is
+ there a subdirectory under each of these with the date, in YYYYMMDD format?
+ In the date subdirectory are there a file maxrevid.txt containing a positive
+ integer?
+* Do the phase 2 test run: run generateincrementals.py by hand. If you have
+ configured a large delay, you will need to wait at least that amount of time
+ before running this script. When it has completed, check the subdirectory
+ from phase 1; are there files analogous to the following?
+ mywiki-yyyymmdd-md5sums.txt
+ mywiki-yyyymmdd-pages-meta-hist-incr.xml.bz2
+ mywiki-yyyymmdd-stubs-meta-hist-incr.xml.gz
+ maxrevid.txt
+ status.txt
+ Does the status.txt file contain "done"?
+* If the runs look like they are producing the right files, do the html
+ generation by hand; run monitor.py. In the top level directory for the
+ adds/changes dumps, do you see the file index.html? If you view that
+ file in a browser, do the contents look reasonable?
+* If that looks good, put phase 1 and phase 2 into separate cron jobs,
+ spacing them out as appropriate.
+
Property changes on: branches/ariel/xmldumps-backup/incrementals/README.txt
___________________________________________________________________
Added: svn:eol-style
+ native
Added: branches/ariel/xmldumps-backup/incrementals/all.dblist
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/all.dblist
(rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/all.dblist 2011-10-12
23:24:40 UTC (rev 99655)
@@ -0,0 +1,5 @@
+elwikidb
+simplewikidb
+testAw118wmf1
+testBw118wmf1
+testCw118wmf1
\ No newline at end of file
Added: branches/ariel/xmldumps-backup/incrementals/closed.dblist
===================================================================
Added: branches/ariel/xmldumps-backup/incrementals/dumpincr.conf.sample
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/dumpincr.conf.sample
(rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/dumpincr.conf.sample
2011-10-12 23:24:40 UTC (rev 99655)
@@ -0,0 +1,32 @@
+# sample configuration file
+
+[wiki]
+mediawiki=/src/mediawiki/118wmf1/1.18wmf1
+allwikislist=/home/backups/incrementals/all.dblist
+privatewikislist=/home/backups/incrementals/private.dblist
+closedwikislist=/home/backups/incrementals/closed.dblist
+
+[output]
+incrementalsdir=/dumps/public/incr
+templatedir=/home/backups/incrementals
+temp=/dumps/temp
+webroot=http://localhost/mydumps
+fileperms=0644
+# minimum number of seconds from revision creation
+# til it can be dumped
+delay=43200
+
+[database]
+user=dbuser
+password=leet
+
+[tools]
+php=/usr/bin/php
+mysql=/usr/bin/mysql
+gzip=/usr/bin/gzip
+bzip2=/usr/bin/bzip2
+checkforbz2footer=/usr/local/bin/checkforbz2footer
+writeuptopageid=/usr/local/bin/writeuptopageid
+
+[cleanup]
+keep=20
Added: branches/ariel/xmldumps-backup/incrementals/generateincrementals.py
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/generateincrementals.py
(rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/generateincrementals.py
2011-10-12 23:24:40 UTC (rev 99655)
@@ -0,0 +1,266 @@
+# for every wiki, read the maxid and the prev maxid
+# recorded for incrementals, dump stubs and dump history file
+# based on stubs.
+# this is phase 2 of daily xml change/adds dumps.
+
+import ConfigParser
+import getopt
+import os
+import re
+import sys
+import WikiDump
+from WikiDump import FileUtils, TimeUtils, MiscUtils
+import subprocess
+import socket
+import time
+import IncrDumpLib
+from IncrDumpLib import Lock, Config, RunSimpleCommand, MultiVersion,
DBServer, IncrementDir, IncrementDumpsError, MaxRevIDFile, StatusFile,
IncrDumpLockFile, StubFile, RevsFile, MD5File, IncDumpDirs, IncrDumpLock,
MaxRevIDLock, StatusInfo
+from subprocess import Popen, PIPE
+from os.path import exists
+import hashlib
+import traceback
+
+class DumpResults(object):
+ def __init__(self):
+ self.TODO = 1
+ self.FAILED = -1
+ self.OK = 0
+
+class IncrDump(object):
+ def __init__(self,config, date, wikiName, doStubs, doRevs, dryrun,
verbose):
+ self._config = config
+ self.date = date
+ self.wikiName = wikiName
+ self.incrDir = IncrementDir(self._config, self.date)
+ self.doStubs = doStubs
+ self.doRevs = doRevs
+ self.dryrun = dryrun
+ self.maxRevIDFile = MaxRevIDFile(self._config, self.date,
self.wikiName)
+ self.statusInfo = StatusInfo(self._config, self.date, self.wikiName)
+ self.stubFile = StubFile(self._config, self.date, self.wikiName)
+ self.revsFile = RevsFile(self._config, self.date, self.wikiName)
+ self.incrDumpsDirs = IncDumpDirs(self._config, self.wikiName)
+ self.verbose = verbose
+
+ def getMaxRevIdFromFile(self, date = None):
+ if date == None:
+ date = self.date
+ maxRevIDFile = MaxRevIDFile(self._config, date, self.wikiName)
+ return FileUtils.readFile(maxRevIDFile.getPath().rstrip())
+
+ def doOneWiki(self):
+ retCodes = DumpResults()
+ if self.wikiName not in self._config.privateWikisList and
self.wikiName not in self._config.closedWikisList:
+ if not exists(self.incrDir.getIncDir(self.wikiName)):
+ os.makedirs(self.incrDir.getIncDir(self.wikiName))
+ status = self.statusInfo.getStatus()
+ if status == "done":
+ if (self.verbose):
+ print "wiki",self.wikiName,"skipped, adds/changes dump
already complete"
+ return retCodes.OK
+ if time.time() - os.path.getmtime(self.maxRevIDFile.getPath()) <
self._config.delay:
+ if (self.verbose):
+ print "wiki",self.wikiName,"skipped, must wait for
configured delay interval"
+ return retCodes.TODO
+ if not dryrun:
+ lock = IncrDumpLock(self._config, self.date, self.wikiName)
+ if not lock.getLock():
+ if (self.verbose):
+ print "wiki",self.wikiName,"skipped, wiki is locked,
another process should be doing the job"
+ return retCodes.TODO
+ try:
+ if not dryrun:
+ self.incrDumpsDirs.cleanupOldIncrDumps(self.date)
+ maxRevID = self.getMaxRevIdFromFile()
+ prevDate = self.incrDumpsDirs.getPrevIncrDate(self.date)
+ prevRevID = None
+ if prevDate:
+ prevRevID = self.getMaxRevIdFromFile(prevDate)
+ if not prevRevID:
+ prevRevID = str(int(maxRevID) - 10)
+ if int(prevRevID) < 1:
+ prevRevID = str(1)
+ else:
+ # this incr will cover every revision from the last
incremental
+ # through the maxid we wrote out in phase one of this job.
+ prevRevID = str(int(prevRevID) + 1)
+ if doStubs:
+ maxRevID = str(int(maxRevID) + 1) # end rev id is not
included in dump
+ if not self.dumpStub(prevRevID, maxRevID):
+ return retCodes.FAILED
+ if doRevs:
+ if not self.dumpRevs():
+ return retCodes.FAILED
+ if not dryrun:
+ if not self.md5sums():
+ return retCodes.FAILED
+ self.statusInfo.setStatus("done")
+ lock.unlock()
+ except:
+ if (self.verbose):
+ traceback.print_exc(file=sys.stdout)
+ if not dryrun:
+ lock.unlock()
+ return retCodes.FAILED
+ if (self.verbose):
+ print "Success! Wiki", self.wikiName, "incremental dump complete."
+ return retCodes.OK
+
+ def dumpStub(self, startRevID, endRevID):
+ scriptCommand = MultiVersion.MWScriptAsArray(self._config,
"dumpBackup.php")
+ command = [ "%s" % self._config.php, "-q" ]
+ command.extend(scriptCommand)
+ command.extend(["--wiki=%s" % self.wikiName, "--stub", "--quiet",
+ "--force-normal", "--output=gzip:%s" %
self.stubFile.getPath(),
+ "--revrange", "--revstart=%s" % startRevID,
"--revend=%s" % endRevID ])
+ if dryrun:
+ print "would run command for stubs dump:", command
+ else:
+ error = RunSimpleCommand.runWithNoOutput(command, shell = False)
+ if (error):
+ if (self.verbose):
+ print ("error producing stub files for wiki" %
self.wikiName)
+ return False
+ return True
+
+ def dumpRevs(self):
+ scriptCommand = MultiVersion.MWScriptAsArray(self._config,
"dumpTextPass.php")
+ command = [ "%s" % self._config.php, "-q" ]
+ command.extend(scriptCommand)
+ command.extend(["--wiki=%s" % self.wikiName, "--stub=gzip:%s" %
self.stubFile.getPath(),
+ "--force-normal", "--quiet", "--spawn=%s" %
self._config.php,
+ "--output=bzip2:%s" % self.revsFile.getPath()
+ ])
+ if dryrun:
+ print "would run command for revs dump:", command
+ else:
+ error = RunSimpleCommand.runWithNoOutput(command, shell = False)
+ if (error):
+ if (self.verbose):
+ print("error producing revision text files for wiki" %
self.wikiName)
+ return False
+ return True
+
+ def md5sumOneFile(self, filename):
+ summer = hashlib.md5()
+ infile = file(filename, "rb")
+ bufsize = 4192 * 32
+ buffer = infile.read(bufsize)
+ while buffer:
+ summer.update(buffer)
+ buffer = infile.read(bufsize)
+ infile.close()
+ return summer.hexdigest()
+
+ def md5sums(self):
+ try:
+ md5File = MD5File(self._config, self.date, self.wikiName)
+ text = ""
+ summer = hashlib.md5()
+ files = []
+ if self.doStubs:
+ files.append(self.stubFile.getPath())
+ if self.doRevs:
+ files.append(self.revsFile.getPath())
+ for f in files:
+ text = text + "%s\n" % self.md5sumOneFile(f)
+ FileUtils.writeFileInPlace(md5File.getPath(), text,
self._config.fileperms)
+ return True
+ except:
+ return False
+
+class IncrDumpLoop(object):
+ def __init__(self, config, date, doStubs, doRevs, dryrun, verbose):
+ self._config = config
+ self.date = date
+ self.doStubs = doStubs
+ self.doRevs = doRevs
+ self.dryrun = dryrun
+ self.verbose = verbose
+
+ def doRunOnAllWikis(self):
+ retCodes = DumpResults()
+ failures = 0
+ todos = 0
+ for w in self._config.allWikisList:
+ dump = IncrDump(config, date, w, doStubs, doRevs, dryrun,
self.verbose)
+ result = dump.doOneWiki()
+ if result == retCodes.FAILED:
+ failures = failures + 1
+ elif result == retCodes.TODO:
+ todos = todos + 1
+ return (failures, todos)
+
+ def doAllWikisTilDone(self,numFails):
+ fails = 0
+ while 1:
+ (failures, todos) = self.doRunOnAllWikis()
+ if not failures and not todos:
+ break
+ fails = fails + 1
+ if fails > numFails:
+ raise IncrementDumpsError("Too many consecutive failures,
giving up")
+ # wait 5 minutes and try another loop
+# raise IncrementDumpsError("would sleep")
+ time.sleep(300)
+
+def usage(message = None):
+ if message:
+ print message
+ print "Usage: python generateincrementals.py [options] [wikidbname]"
+ print "Options: --configfile, --date, --dryrun, --revsonly,
--stubsonly, --verbose"
+ print "--configfile: Specify an alternate config file to read.
Default file is 'dumpincr.conf' in the current directory."
+ print "--date: (Re)run incremental of a given date (use with
care)."
+ print "--dryrun: Don't actually dump anything but print the
commands that would be run."
+ print "--revsonly: Do only the stubs part of the dumps."
+ print "--stubsonly: Do only the revision text part of the dumps."
+ print "--verbose: Print error messages and other informative
messages (normally the"
+ print " script runs silently)."
+ print "wikiname: Run the dumps only for the specific wiki."
+ sys.exit(1)
+
+if __name__ == "__main__":
+ configFile = False
+ result = False
+ date = None
+ doStubs = True
+ doRevs = True
+ dryrun = False
+ verbose = False
+
+ try:
+ (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "",
+ ['date=', 'configfile=',
'stubsonly', 'revsonly', 'dryrun', 'verbose' ])
+ except:
+ usage("Unknown option specified")
+
+ for (opt, val) in options:
+ if opt == "--date":
+ date = val
+ elif opt == "--configfile":
+ configFile = val
+ elif opt == "--stubsonly":
+ doRevs = False
+ elif opt == "--revsonly":
+ doStubs = False
+ elif opt == "--dryrun":
+ dryrun = True
+ elif opt == "--verbose":
+ verbose = True
+
+ if not doRevs and not doStubs:
+ usage("You may not specify stubsonly and revsonly options together.")
+
+ if (configFile):
+ config = Config(configFile)
+ else:
+ config = Config()
+
+ if not date:
+ date = TimeUtils.today()
+
+ if len(remainder) > 0:
+ dump = IncrDump(config, date, remainder[0], doStubs, doRevs, dryrun,
verbose)
+ else:
+ dump = IncrDumpLoop(config, date, doStubs, doRevs, dryrun, verbose)
+ dump.doAllWikisTilDone(3)
Property changes on:
branches/ariel/xmldumps-backup/incrementals/generateincrementals.py
___________________________________________________________________
Added: svn:eol-style
+ native
Added: branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py
(rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py
2011-10-12 23:24:40 UTC (rev 99655)
@@ -0,0 +1,138 @@
+# for every wiki, find and record the max rev_id in use.
+# this is phase 1 of daily xml change/adds dumps.
+
+import ConfigParser
+import getopt
+import os
+import re
+import sys
+import WikiDump
+from WikiDump import FileUtils, TimeUtils, MiscUtils
+import subprocess
+import socket
+import time
+import IncrDumpLib
+from IncrDumpLib import Lock, Config, RunSimpleCommand, MultiVersion,
DBServer, IncrementDir, IncrementDumpsError, MaxRevIDFile, MaxRevIDLockFile,
IncrDumpLock, MaxRevIDLock
+from subprocess import Popen, PIPE
+from os.path import exists
+import traceback
+
+class MaxRevID(object):
+ def __init__(self, config, wikiName, date):
+ self._config = config
+ self.wikiName = wikiName
+ self.date = date
+ self.maxID = 0
+ self.maxRevIdFile = MaxRevIDFile(self._config, self.date,
self.wikiName)
+
+ def getMaxRevID(self):
+ query = "select MAX(rev_id) from revision";
+ db = DBServer(self._config, self.wikiName)
+ # get the result
+ self.maxID = RunSimpleCommand.runWithOutput(db.buildSqlCommand(query),
shell = True)
+
+ def recordMaxRevID(self):
+ self.getMaxRevID()
+ # write the max id in a file in the right place
+ FileUtils.writeFileInPlace(self.maxRevIdFile.getPath(), self.maxID,
self._config.fileperms)
+
+ def exists(self):
+ return exists(self.maxRevIdFile.getPath())
+
+class MaxIDDump(object):
+ def __init__(self,config, date, verbose):
+ self._config = config
+ self.date = date
+ self.incrDir = IncrementDir(self._config, self.date)
+ self.verbose = verbose
+
+ def doOneWiki(self, w):
+ success = True
+ if w not in self._config.privateWikisList and w not in
self._config.closedWikisList:
+ if not exists(self.incrDir.getIncDir(w)):
+ os.makedirs(self.incrDir.getIncDir(w))
+ lock = MaxRevIDLock(self._config, self.date, w)
+ if lock.getLock():
+ try:
+ maxRevID = MaxRevID(self._config, w, self.date)
+ if not maxRevID.exists():
+ maxRevID.recordMaxRevID()
+ except:
+ if (self.verbose):
+ print "Wiki ", w, "failed to get max revid."
+ traceback.print_exc(file=sys.stdout)
+ success = False
+ lock.unlock()
+ else:
+ if (self.verbose):
+ print "Wiki ", w, "failed to get lock."
+ traceback.print_exc(file=sys.stdout)
+ if success:
+ if (self.verbose):
+ print "Success! Wiki", w, "adds/changes dump complete."
+ return success
+
+ def doRunOnAllWikis(self):
+ failures = 0
+ for w in self._config.allWikisList:
+ if not self.doOneWiki(w):
+ failures = failures + 1
+ return failures
+
+ def doAllWikisTilDone(self,numFails):
+ fails = 0
+ while 1:
+ result = self.doRunOnAllWikis()
+ if not result:
+ break
+ fails = fails + 1
+ if fails > numFails:
+ raise("Too many consecutive failures, giving up")
+ # wait 5 minutes and try another loop
+ time.sleep(300)
+
+def usage(message = None):
+ if message:
+ print message
+ print "Usage: python generateincrementals.py [options] [wikidbname]"
+ print "Options: --configfile, --date, --verbose"
+ print "--configfile: Specify an alternate config file to read.
Default file is 'dumpincr.conf' in the current directory."
+ print "--date: (Re)run incremental of a given date (use with
care)."
+ print "--verbose: Print error messages and other informative
messages (normally the"
+ print " script runs silently)."
+ print "wikiname: Run the dumps only for the specific wiki."
+ sys.exit(1)
+
+if __name__ == "__main__":
+ configFile = False
+ result = False
+ date = None
+ verbose = False
+
+ try:
+ (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "",
+ ['date=', 'configfile=',
'verbose' ])
+ except:
+ usage("Unknown option specified")
+
+ for (opt, val) in options:
+ if opt == "--date":
+ date = val
+ elif opt == "--configfile":
+ configFile = val
+ elif opt == "--verbose":
+ verbose = True
+
+ if (configFile):
+ config = Config(configFile)
+ else:
+ config = Config()
+
+ if not date:
+ date = TimeUtils.today()
+
+ dump = MaxIDDump(config, date, verbose)
+ if len(remainder) > 0:
+ dump.doOneWiki(remainder[0])
+ else:
+ dump.doAllWikisTilDone(3)
Property changes on:
branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py
___________________________________________________________________
Added: svn:eol-style
+ native
Added: branches/ariel/xmldumps-backup/incrementals/incrmonitor
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/incrmonitor
(rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/incrmonitor 2011-10-12
23:24:40 UTC (rev 99655)
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+WIKIDUMP_BASE=`dirname "$0"`
+
+if [ ! -z "$1" ]; then
+ configFile="$1"
+else
+ configFile=""
+fi
+
+if [ ! -z "$2" ]; then
+ if [ "$2" == "verbose" ]; then
+ verbose="--verbose"
+ else
+ echo "Unknown option $2"
+ exit 1
+ fi
+fi
+
+while true; do
+ echo ""
+ echo "Sweeping!"
+ python $WIKIDUMP_BASE/incrmonitor.py "$configFile" "$verbose"
+ echo "sleeping"
+ sleep 15
+done
Property changes on: branches/ariel/xmldumps-backup/incrementals/incrmonitor
___________________________________________________________________
Added: svn:executable
+ *
Added: branches/ariel/xmldumps-backup/incrementals/incrmonitor.py
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/incrmonitor.py
(rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/incrmonitor.py 2011-10-12
23:24:40 UTC (rev 99655)
@@ -0,0 +1,134 @@
+# generate an index page covering the status of and links to
+# incremental files for the latest date for each project
+
+import ConfigParser
+import getopt
+import os
+import re
+import sys
+import WikiDump
+from WikiDump import FileUtils, TimeUtils, MiscUtils
+import subprocess
+import socket
+import time
+import IncrDumpLib
+from IncrDumpLib import Lock, Config, RunSimpleCommand, MultiVersion,
DBServer, IncrementDir, IncrementDumpsError, IndexFile, IncrDumpLockFile,
IncDumpDirs, IncrDumpLock, MaxRevIDLock, StubFile, RevsFile, StatusFile
+from subprocess import Popen, PIPE
+from os.path import exists
+import hashlib
+import traceback
+
+class Link(object):
+
+ def makeLink(path, linkText):
+ return('<a href = "' + path + '">' + linkText + "</a>")
+
+ makeLink = staticmethod(makeLink)
+
+class Index(object):
+ def __init__(self, config, verbose):
+ self._config = config
+ self.indexFile = IndexFile(self._config)
+ self.incrDir = IncrementDir(self._config)
+ self.verbose = verbose
+
+ def doAllWikis(self):
+ text = ""
+ for w in self._config.allWikisList:
+ result = self.doOneWiki(w)
+ if result:
+ text = text + "<li>"+ result + "</li>\n"
+ indexText = self._config.readTemplate("incrs-index.html") % { "items"
: text }
+ FileUtils.writeFileInPlace(self.indexFile.getPath(), indexText,
self._config.fileperms)
+
+ def doOneWiki(self, w):
+ if w not in self._config.privateWikisList and w not in
self._config.closedWikisList:
+ self.incrDumpsDirs = IncDumpDirs(self._config, w)
+ if not exists(self.incrDir.getIncDirNoDate(w)):
+ if (self.verbose):
+ print "No dump for wiki ", w
+ next
+
+ incrDate = self.incrDumpsDirs.getLatestIncrDate()
+ if not incrDate:
+ if (self.verbose):
+ print "No dump for wiki ", w
+ next
+
+ try:
+ lock = IncrDumpLock(self._config, incrDate, w)
+ lockDate = lock.getLockInfo()
+
+ stub = StubFile(self._config, incrDate, w)
+ (stubDate, stubSize) = stub.getFileInfo()
+ revs = RevsFile(self._config, incrDate, w)
+ (revsDate, revsSize) = revs.getFileInfo()
+ stat = StatusFile(self._config, incrDate, w)
+ statContents = FileUtils.readFile(stat.getPath())
+
+ except:
+ if (self.verbose):
+ traceback.print_exc(file=sys.stdout)
+ return "Error encountered, no information available for wiki",
w
+
+ try:
+ wikinameText = "<strong>%s</strong>" % w
+ if lockDate:
+ lockText = "run started on %s." % lockDate
+ else:
+ lockText = None
+ if stubDate:
+ stubText = "stubs: %s (size %s)" %
(Link.makeLink(os.path.join(w, incrDate, stub.getFileName()),stubDate),
stubSize)
+ else:
+ stubText = None
+ if revsDate:
+ revsText = "revs: %s (size %s)" %
(Link.makeLink(os.path.join(w, incrDate, revs.getFileName()),revsDate),
revsSize)
+ else:
+ revsText = None
+ if statContents:
+ statText = "(%s)" % (statContents)
+ else:
+ statText = None
+
+ wikiInfo = " ".join( filter( None, [ wikinameText, lockText,
statText ] ) ) + "<br />"
+ wikiInfo = wikiInfo + " " + " | ".join( filter(
None, [ stubText, revsText ] ))
+ except:
+ if (self.verbose):
+ traceback.print_exc(file=sys.stdout)
+ return "Error encountered formatting information for wiki", w
+
+ return wikiInfo
+
+def usage(message = None):
+ if message:
+ print message
+ print "Usage: python monitor.py [options] [wikidbname]"
+ print "Options: --configfile, --verbose"
+ print "--configfile: Specify an alternate config file to read.
Default file is 'dumpincr.conf' in the current directory."
+ print "--verbose: Print error messages and other informative
messages (normally the"
+ print " script runs silently)."
+ sys.exit(1)
+
+if __name__ == "__main__":
+ configFile = False
+ verbose = False
+
+ try:
+ (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "",
+ ['configfile=', 'verbose' ])
+ except:
+ usage("Unknown option specified")
+
+ for (opt, val) in options:
+ if opt == "--configfile":
+ configFile = val
+ elif opt == '--verbose':
+ verbose = True
+
+ if (configFile):
+ config = Config(configFile)
+ else:
+ config = Config()
+
+ index = Index(config, verbose)
+ index.doAllWikis()
Property changes on: branches/ariel/xmldumps-backup/incrementals/incrmonitor.py
___________________________________________________________________
Added: svn:eol-style
+ native
Added: branches/ariel/xmldumps-backup/incrementals/incrs-index.html
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/incrs-index.html
(rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/incrs-index.html
2011-10-12 23:24:40 UTC (rev 99655)
@@ -0,0 +1,118 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+ <title>Incremental dumps</title>
+ <style type="text/css">
+ html, body {
+ background-color: #ffffff;
+ color: black;
+ }
+ .siteinfo {
+ text-align: center;
+ }
+ li {
+ background-color: #ffffff;
+ list-style-type: none;
+ }
+ li li {
+ background-color: white;
+ }
+ li ul {
+ margin-top: 4px;
+ margin-bottom: 8px;
+ }
+ .detail {
+ font-weight: normal;
+ font-style: italic;
+ }
+ .updates {
+ font: monospace;
+ font-size: smaller;
+ }
+ .status {
+ font-weight: bold;
+ padding-left: 1em;
+ padding-right: 1em;
+ }
+ .in-progress {
+ font-weight: bold;
+ }
+ .failed {
+ color: Maroon;
+ font-weight: bold;
+ }
+ .waiting {
+ color: Silver; /* Gray ? */
+ }
+ .progress {
+ font-family: monospace;
+ font-size: 80%%;
+ margin-left: .5in;
+ }
+ </style>
+</head>
+
+<body>
+ <h1>Adds/changes dumps</h1>
+
+ <p class="siteinfo">
+ This is the Wikimedia adds/changes dump service.
+ Please read the <a href='legal.html'>copyrights</a> information.
+ See <a
href="http://meta.wikimedia.org/wiki/Data_dumps">Meta:Data dumps</a>
+ for documentation on the provided data formats.
+ </p>
+ <p>
+ Here's the big fat disclaimer.
+ </p>
+ <p>
+ This service is experimental. At any time it may not be working, for
a day, a week or a month.
+ It is not intended to replace the full XML dumps. We don't expect
users to be able to construct
+ full dumps of a given date from the incrementals and an older dump.
+ </p>
+ <p>
+ The data provided in these files is ''partial data''. To be precise:
+ <ul>
+ <li>* Revisions included in these dumps are not up to the minute.
We write out those that were
+ created up to 18 hours ago; this gives local editing communities
time to delete revisions
+ with sensitive information, vulgarities and other vandalism,
etc.</li>
+ <li>* New pages entered for the first time during the time interval
are included</li>
+ <li>* Revisions of undeleted pages will be included only if new
revision IDs need to be assigned to
+ the restored revisions. For most revisions this will not be the
case. </li>
+ <li>* Information about moves and deletes are not included.</li>
+ <li>* Imported revisions will be included if they were imported
during the time interval, since they
+ will have new revisions IDs.</li>
+ <li>* As with all dumps, hidden revisions or more generally
revisions not readable by the general public
+ are not provided.</li>
+ </ul>
+ </p>
+ <p>
+ What is in these files:
+ </p>
+ <p>
+ The stubs file consists of the metadata for revision texts of each
page, where the revision texts were
+ added within the time interval. These look just like the history
stubs files you would find on our XML data dumps
+ page, having the exact same format but only new revisions since the
last adds/changes dump. This means you get
+ metadata for articles, user pages, discussion pages, etc. If you
want articles only, you will need to write a
+ filter to grab just those entries.
+ </p>
+ <p>
+ The revs file consists of the metadata plus the wikitext for each new
revision since the last adds/changes dump.
+ This is in the same format as the pages-meta-history files you would
find on our XML data dumps page. This means
+ you get articles, user pages, discussion pages, etc. If you want
articles only, you will need to write a
+ filter to grab just those entries.
+ </p>
+ <h2>Adds/changes dump listing</h2>
+ <ul>
+ %(items)s
+ </ul>
+ <hr>
+ <p>
+ Return to <a href="http://dumps.wikimedia.org/other/">our other
datasets</a>, the
+ <a href="http://dumps.wikimedia.org/backup-index.html">XML data
dumps</a>, or
+ <a href="http://dumps.wikimedia.org/index.html">the main index</a>.
+ <p/>
+</body>
+</html>
Added: branches/ariel/xmldumps-backup/incrementals/private.dblist
===================================================================
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs