ArielGlenn has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/266775

Change subject: dumps: stash some current dump run config settings in file and 
reuse
......................................................................

dumps: stash some current dump run config settings in file and reuse

this allows us to rerun jobs of dumps after the configuration files have
been changed, using the previous settings as expected

Change-Id: I649d7a1774db3e3e81c3eefa993cd2cb9961a294
---
M xmldumps-backup/dumps/runner.py
M xmldumps-backup/dumps/runnerutils.py
2 files changed, 86 insertions(+), 3 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps 
refs/changes/75/266775/1

diff --git a/xmldumps-backup/dumps/runner.py b/xmldumps-backup/dumps/runner.py
index e6a3d33..d628640 100644
--- a/xmldumps-backup/dumps/runner.py
+++ b/xmldumps-backup/dumps/runner.py
@@ -15,7 +15,7 @@
 from dumps.xmljobs import XmlDump, XmlLogging, XmlStub, BigXmlDump, 
AbstractDump
 from dumps.recompressjobs import XmlMultiStreamDump, XmlRecompressDump
 
-from dumps.runnerutils import SymLinks, Feeds, NoticeFile
+from dumps.runnerutils import RunSettings, SymLinks, Feeds, NoticeFile
 from dumps.runnerutils import Checksummer, IndexHtml, StatusHtml, 
FailureHandler
 from dumps.runnerutils import Maintenance, RunInfoFile, DumpRunJobData
 
@@ -474,7 +474,7 @@
         if self.enabled is None:
             self.enabled = {}
         for setting in [StatusHtml.NAME, IndexHtml.NAME, Checksummer.NAME,
-                        RunInfoFile.NAME, SymLinks.NAME,
+                        RunInfoFile.NAME, SymLinks.NAME, RunSettings.NAME,
                         Feeds.NAME, NoticeFile.NAME, "makedir", 
"clean_old_dumps",
                         "cleanup_old_files", "check_trunc_files"]:
             self.enabled[setting] = True
@@ -484,7 +484,7 @@
 
         if self.dryrun or self._partnum_todo is not None or 
self.checkpoint_file is not None:
             for setting in [StatusHtml.NAME, IndexHtml.NAME, Checksummer.NAME,
-                            RunInfoFile.NAME, SymLinks.NAME,
+                            RunInfoFile.NAME, SymLinks.NAME, RunSettings.NAME,
                             Feeds.NAME, NoticeFile.NAME, "makedir", 
"clean_old_dumps"]:
                 del self.enabled[setting]
 
diff --git a/xmldumps-backup/dumps/runnerutils.py 
b/xmldumps-backup/dumps/runnerutils.py
index d5d61bf..c17d59d 100644
--- a/xmldumps-backup/dumps/runnerutils.py
+++ b/xmldumps-backup/dumps/runnerutils.py
@@ -6,6 +6,7 @@
 import traceback
 from email.mime import text as MIMEText
 import smtplib
+import json
 
 from os.path import exists
 from dumps.exceptions import BackupError
@@ -610,6 +611,80 @@
                         os.remove(os.path.join(latest_dir, fname))
 
 
+class RunSettings(object):
+    NAME = 'runsettings'
+
+    def __init__(self, wiki, dump_dir, logfn=None, debugfn=None,
+                 enabled=None, verbose=False):
+        self.wiki = wiki
+        self.dump_dir = dump_dir
+        self.logfn = logfn
+        self.debugfn = debugfn
+        self.enabled = enabled
+        self.verbose = verbose
+
+    def get_settings_path(self):
+        file_obj = DumpFilename(self.wiki, None, "runsettings.txt")
+        return self.dump_dir.filename_public_path(file_obj)
+
+    def get_settings_from_config(self):
+        return [self.wiki.conf.parts_enabled,
+                self.wiki.conf.pages_per_filepart_history,
+                self.wiki.conf.revs_per_filepart_history,
+                self.wiki.conf.numparts_for_abstract,
+                self.wiki.conf.pages_per_filepart_abstract,
+                self.wiki.conf.recombine_history,
+                self.wiki.conf.checkpoint_time]
+
+    def write_settings(self):
+        '''
+        stash current run settings in file in dump directory if
+        such file does not already exist
+        '''
+        if RunSettings.NAME not in self.enabled:
+            return
+
+        settings_path = self.get_settings_path()
+        if os.path.exists(settings_path):
+            return
+        setting_info = self.get_settings_from_config()
+
+        with open(settings_path, "w+") as settings_fd:
+            settings_fd.write(json.dumps(setting_info) + "\n")
+
+    def read_settings(self):
+        '''
+        retrieve current run settings from file in dump directory
+        '''
+        settings_path = self.get_settings_path()
+        if not os.path.exists(settings_path):
+            return None
+        with open(settings_path, "r") as settings_fd:
+            contents = settings_fd.read()
+            settings_fd.close()
+        if contents[-1] == '\n':
+            contents = contents[:-1]
+        return json.loads(contents)
+
+    def apply_settings_to_config(self, settings=None):
+        '''
+        apply settings to wiki configuration, retrieving
+        them from the settings stash file first if they are
+        not passed in as an argument
+        '''
+        if settings is None:
+            settings = self.read_settings()
+        if settings is None:
+            return
+        self.wiki.conf.parts_enabled = settings[0]
+        self.wiki.conf.pages_per_filepart_history = settings[1]
+        self.wiki.conf.revs_per_filepart_history = settings[2]
+        self.wiki.conf.numparts_for_abstract = settings[3]
+        self.wiki.conf.pages_per_filepart_abstract = settings[4]
+        self.wiki.conf.recombine_history = settings[5]
+        self.wiki.conf.checkpoint_time = settings[6]
+
+
 class DumpRunJobData(object):
     def __init__(self, wiki, dump_dir, notice, logfn=None, debugfn=None,
                  enabled=None, verbose=False):
@@ -621,6 +696,14 @@
         self.debugfn = debugfn
         self.enabled = enabled
         self.verbose = verbose
+
+        # write config settings down if not already present
+        self.settings_stash = RunSettings(wiki, dump_dir, enabled, logfn, 
debugfn, verbose)
+        self.settings_stash.write_settings()
+        # if there was a settings stash, use it to override config values
+        self.settings_stash.apply_settings_to_config()
+
+        # now we can set up everything else
         self.runinfofile = RunInfoFile(wiki, enabled, verbose)
         self.checksummer = Checksummer(wiki, dump_dir, enabled, verbose)
         self.feeds = Feeds(wiki, dump_dir, wiki.db_name, debugfn, enabled)

-- 
To view, visit https://gerrit.wikimedia.org/r/266775
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I649d7a1774db3e3e81c3eefa993cd2cb9961a294
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: ArielGlenn <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to