Hashar has uploaded a new change for review.
https://gerrit.wikimedia.org/r/50370
Change subject: pep8: E302 expected 2 blank lines, found 1
......................................................................
pep8: E302 expected 2 blank lines, found 1
Change-Id: I0b60eccc88c32e96637abeb84c82abf099ed9a44
---
M WikiDump.py
M monitor.py
M worker.py
3 files changed, 32 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/dumps
refs/changes/70/50370/1
diff --git a/WikiDump.py b/WikiDump.py
index cca4160..49699b4 100644
--- a/WikiDump.py
+++ b/WikiDump.py
@@ -8,13 +8,16 @@
import threading
import time
+
def fileAge(filename):
return time.time() - os.stat(filename).st_mtime
+
def atomicCreate(filename, mode='w'):
"""Create a file, aborting if it already exists..."""
fd = os.open(filename, os.O_EXCL + os.O_CREAT + os.O_WRONLY)
return os.fdopen(fd, mode)
+
def shellEscape(param):
"""Escape a string parameter, or set of strings, for the shell."""
@@ -26,10 +29,12 @@
else:
return tuple([shellEscape(x) for x in param])
+
def prettySize(size):
"""Return a string with an attractively formatted file size."""
quanta = ("%d bytes", "%d KB", "%0.1f MB", "%0.1f GB", "%0.1f TB")
return _prettySize(size, quanta)
+
def _prettySize(size, quanta):
if size < 1024 or len(quanta) == 1:
@@ -37,15 +42,19 @@
else:
return _prettySize(size / 1024.0, quanta[1:])
+
def today():
return time.strftime("%Y%m%d", time.gmtime())
+
def prettyTime():
return time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
+
def prettyDate(key):
"Prettify a MediaWiki date key"
return "-".join((key[0:4], key[4:6], key[6:8]))
+
def dumpFile(filename, text):
"""Dump a string to a file, as atomically as possible, via a temporary
file in the same directory."""
@@ -61,11 +70,13 @@
# Of course nothing else will work on Windows. ;)
os.rename(tempFilename, filename)
+
def readFile(filename):
file = open(filename, "r")
text = file.read()
file.close()
return text
+
def dbList(filename):
"""Read database list from a file"""
@@ -78,6 +89,7 @@
infile.close()
dbs.sort()
return dbs
+
class Config(object):
def __init__(self):
@@ -353,6 +365,7 @@
def lockAge(self):
return fileAge(self.lockFile())
+
class LockWatchdog(threading.Thread):
"""Touch the given file every 10 seconds until asked to stop."""
@@ -389,6 +402,7 @@
"""Run me inside..."""
os.utime(self.lockfile, None)
+
def cleanup():
"""Call cleanup handlers for any background threads..."""
for watchdog in LockWatchdog.threads:
diff --git a/monitor.py b/monitor.py
index 5c229a4..be2e429 100644
--- a/monitor.py
+++ b/monitor.py
@@ -5,6 +5,7 @@
config = WikiDump.Config()
+
def generateIndex():
running = False
states = []
@@ -34,6 +35,7 @@
"status": status,
"items": "\n".join(states)}
+
def updateIndex():
outputFileName = os.path.join(config.publicDir, config.index)
WikiDump.dumpFile(outputFileName, generateIndex())
diff --git a/worker.py b/worker.py
index cafc246..6092f8a 100644
--- a/worker.py
+++ b/worker.py
@@ -12,6 +12,7 @@
from os.path import dirname, exists, getsize, join, realpath
from WikiDump import prettyTime, prettySize, shellEscape
+
def splitPath(path):
# For some reason, os.path.split only does one level.
parts = []
@@ -24,6 +25,7 @@
(path, file) = os.path.split(path)
return parts
+
def relativePath(path, base):
"""Return a relative path to 'path' from the directory 'base'."""
path = splitPath(path)
@@ -34,6 +36,7 @@
for prefix in base:
path.insert(0, "..")
return os.path.join(*path)
+
def md5File(filename):
summer = md5.new()
@@ -46,14 +49,18 @@
infile.close()
return summer.hexdigest()
+
def md5FileLine(filename):
return "%s %s\n" % (md5File(filename), os.path.basename(filename))
+
def xmlEscape(text):
return text.replace("&", "&").replace("<", "<").replace(">",
">")
+
class BackupError(Exception):
pass
+
class Runner(object):
@@ -559,6 +566,7 @@
rssPath = self.latestPath(file + "-rss.xml")
WikiDump.dumpFile(rssPath, rssText)
+
class Dump(object):
def __init__(self, desc):
self._desc = desc
@@ -608,6 +616,7 @@
def matchCheckpoint(self, checkpoint):
return checkpoint == self.__class__.__name__
+
class PublicTable(Dump):
"""Dump of a table using MySQL's mysqldump utility."""
@@ -629,6 +638,7 @@
def matchCheckpoint(self, checkpoint):
return checkpoint == self.__class__.__name__ + "." + self._table
+
class PrivateTable(PublicTable):
"""Hidden table dumps for private data."""
@@ -693,6 +703,7 @@
articles))
runner.runCommand(command, callback=self.progressCallback)
+
class XmlLogging(Dump):
""" Create a logging dump of all page activity """
@@ -725,6 +736,7 @@
runner.dbServer,
logging))
runner.runCommand(command, callback=self.progressCallback)
+
class XmlDump(Dump):
"""Primary XML dumps, one section at a time."""
@@ -840,6 +852,7 @@
def matchCheckpoint(self, checkpoint):
return checkpoint == self.__class__.__name__ + "." +
self._subset
+
class BigXmlDump(XmlDump):
"""XML page dump for something larger, where a 7-Zip compressed copy
could save 75% of download time for some users."""
@@ -847,6 +860,7 @@
def buildEta(self, runner):
"""Tell the dumper script whether to make ETA estimate on page
or revision count."""
return "--full"
+
class XmlRecompressDump(Dump):
"""Take a .bz2 and recompress it as 7-Zip."""
@@ -892,6 +906,7 @@
def matchCheckpoint(self, checkpoint):
return checkpoint == self.__class__.__name__ + "." +
self._subset
+
class AbstractDump(Dump):
"""XML dump for Yahoo!'s Active Abstracts thingy"""
@@ -947,6 +962,7 @@
def listFiles(self, runner):
return [self._variantFile(x) for x in self._variants(runner)]
+
class TitleDump(Dump):
"""This is used by "wikiproxy", a program to add Wikipedia links to BBC
news online"""
def run(self, runner):
--
To view, visit https://gerrit.wikimedia.org/r/50370
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I0b60eccc88c32e96637abeb84c82abf099ed9a44
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: master
Gerrit-Owner: Hashar <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits