ArielGlenn has uploaded a new change for review.
https://gerrit.wikimedia.org/r/266775
Change subject: dumps: stash some current dump run config settings in file and
reuse
......................................................................
dumps: stash some current dump run config settings in file and reuse
this allows us to rerun jobs of dumps after the configuration files have
been changed, using the previous settings as expected
Change-Id: I649d7a1774db3e3e81c3eefa993cd2cb9961a294
---
M xmldumps-backup/dumps/runner.py
M xmldumps-backup/dumps/runnerutils.py
2 files changed, 86 insertions(+), 3 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/dumps
refs/changes/75/266775/1
diff --git a/xmldumps-backup/dumps/runner.py b/xmldumps-backup/dumps/runner.py
index e6a3d33..d628640 100644
--- a/xmldumps-backup/dumps/runner.py
+++ b/xmldumps-backup/dumps/runner.py
@@ -15,7 +15,7 @@
from dumps.xmljobs import XmlDump, XmlLogging, XmlStub, BigXmlDump,
AbstractDump
from dumps.recompressjobs import XmlMultiStreamDump, XmlRecompressDump
-from dumps.runnerutils import SymLinks, Feeds, NoticeFile
+from dumps.runnerutils import RunSettings, SymLinks, Feeds, NoticeFile
from dumps.runnerutils import Checksummer, IndexHtml, StatusHtml,
FailureHandler
from dumps.runnerutils import Maintenance, RunInfoFile, DumpRunJobData
@@ -474,7 +474,7 @@
if self.enabled is None:
self.enabled = {}
for setting in [StatusHtml.NAME, IndexHtml.NAME, Checksummer.NAME,
- RunInfoFile.NAME, SymLinks.NAME,
+ RunInfoFile.NAME, SymLinks.NAME, RunSettings.NAME,
Feeds.NAME, NoticeFile.NAME, "makedir",
"clean_old_dumps",
"cleanup_old_files", "check_trunc_files"]:
self.enabled[setting] = True
@@ -484,7 +484,7 @@
if self.dryrun or self._partnum_todo is not None or
self.checkpoint_file is not None:
for setting in [StatusHtml.NAME, IndexHtml.NAME, Checksummer.NAME,
- RunInfoFile.NAME, SymLinks.NAME,
+ RunInfoFile.NAME, SymLinks.NAME, RunSettings.NAME,
Feeds.NAME, NoticeFile.NAME, "makedir",
"clean_old_dumps"]:
del self.enabled[setting]
diff --git a/xmldumps-backup/dumps/runnerutils.py
b/xmldumps-backup/dumps/runnerutils.py
index d5d61bf..c17d59d 100644
--- a/xmldumps-backup/dumps/runnerutils.py
+++ b/xmldumps-backup/dumps/runnerutils.py
@@ -6,6 +6,7 @@
import traceback
from email.mime import text as MIMEText
import smtplib
+import json
from os.path import exists
from dumps.exceptions import BackupError
@@ -610,6 +611,80 @@
os.remove(os.path.join(latest_dir, fname))
+class RunSettings(object):
+ NAME = 'runsettings'
+
+ def __init__(self, wiki, dump_dir, logfn=None, debugfn=None,
+ enabled=None, verbose=False):
+ self.wiki = wiki
+ self.dump_dir = dump_dir
+ self.logfn = logfn
+ self.debugfn = debugfn
+ self.enabled = enabled
+ self.verbose = verbose
+
+ def get_settings_path(self):
+ file_obj = DumpFilename(self.wiki, None, "runsettings.txt")
+ return self.dump_dir.filename_public_path(file_obj)
+
+ def get_settings_from_config(self):
+ return [self.wiki.conf.parts_enabled,
+ self.wiki.conf.pages_per_filepart_history,
+ self.wiki.conf.revs_per_filepart_history,
+ self.wiki.conf.numparts_for_abstract,
+ self.wiki.conf.pages_per_filepart_abstract,
+ self.wiki.conf.recombine_history,
+ self.wiki.conf.checkpoint_time]
+
+ def write_settings(self):
+ '''
+ stash current run settings in file in dump directory if
+ such file does not already exist
+ '''
+ if RunSettings.NAME not in self.enabled:
+ return
+
+ settings_path = self.get_settings_path()
+ if os.path.exists(settings_path):
+ return
+ setting_info = self.get_settings_from_config()
+
+ with open(settings_path, "w+") as settings_fd:
+ settings_fd.write(json.dumps(setting_info) + "\n")
+
+ def read_settings(self):
+ '''
+ retrieve current run settings from file in dump directory
+ '''
+ settings_path = self.get_settings_path()
+ if not os.path.exists(settings_path):
+ return None
+ with open(settings_path, "r") as settings_fd:
+ contents = settings_fd.read()
+ settings_fd.close()
+ if contents[-1] == '\n':
+ contents = contents[:-1]
+ return json.loads(contents)
+
+ def apply_settings_to_config(self, settings=None):
+ '''
+ apply settings to wiki configuration, retrieving
+ them from the settings stash file first if they are
+ not passed in as an argument
+ '''
+ if settings is None:
+ settings = self.read_settings()
+ if settings is None:
+ return
+ self.wiki.conf.parts_enabled = settings[0]
+ self.wiki.conf.pages_per_filepart_history = settings[1]
+ self.wiki.conf.revs_per_filepart_history = settings[2]
+ self.wiki.conf.numparts_for_abstract = settings[3]
+ self.wiki.conf.pages_per_filepart_abstract = settings[4]
+ self.wiki.conf.recombine_history = settings[5]
+ self.wiki.conf.checkpoint_time = settings[6]
+
+
class DumpRunJobData(object):
def __init__(self, wiki, dump_dir, notice, logfn=None, debugfn=None,
enabled=None, verbose=False):
@@ -621,6 +696,14 @@
self.debugfn = debugfn
self.enabled = enabled
self.verbose = verbose
+
+ # write config settings down if not already present
+ self.settings_stash = RunSettings(wiki, dump_dir, enabled, logfn,
debugfn, verbose)
+ self.settings_stash.write_settings()
+ # if there was a settings stash, use it to override config values
+ self.settings_stash.apply_settings_to_config()
+
+ # now we can set up everything else
self.runinfofile = RunInfoFile(wiki, enabled, verbose)
self.checksummer = Checksummer(wiki, dump_dir, enabled, verbose)
self.feeds = Feeds(wiki, dump_dir, wiki.db_name, debugfn, enabled)
--
To view, visit https://gerrit.wikimedia.org/r/266775
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I649d7a1774db3e3e81c3eefa993cd2cb9961a294
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits