jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/346833 )

Change subject: Record job state and history in a file
......................................................................


Record job state and history in a file

Includes a script which will return Icinga-ready text and exit codes.

Change-Id: I893579da632fde8df2a1ea6c2e0e564c859dc950
---
A bin/check-jobs-icinga
M bin/run-job
M examples/process-control.example.yaml
M processcontrol/config.py
M processcontrol/crontab.py
A processcontrol/job_state.py
M processcontrol/runner.py
M setup.py
M tests/data/global_config/global_defaults.yaml
9 files changed, 150 insertions(+), 4 deletions(-)

Approvals:
  jenkins-bot: Verified
  Ejegg: Looks good to me, approved



diff --git a/bin/check-jobs-icinga b/bin/check-jobs-icinga
new file mode 100755
index 0000000..d2a3618
--- /dev/null
+++ b/bin/check-jobs-icinga
@@ -0,0 +1,53 @@
+#!/usr/bin/python3
+#
+# Report job status in a format that can be consumed by Icinga.
+
+import argparse
+import sys
+
+
+from processcontrol import job_spec
+from processcontrol import job_state
+
+
+def report_statuses():
+       """Build response string and exit code from statuses."""
+       statuses = load_statuses()
+       bad_jobs = []
+       for job, status in statuses.items():
+               # Be conservative about what is failure, for now.  Eventually, 
we
+               # should warn about invalid and unknown.
+               if status == "failure":
+                       bad_jobs.append(job)
+
+       if len(bad_jobs) == 0:
+               print("JOBS OK")
+               sys.exit(0)
+       else:
+               bad_jobs_message = ", ".join(bad_jobs)
+               print("FAILING JOBS: {jobs}".format(jobs=bad_jobs_message))
+               sys.exit(2)
+
+
+def load_statuses():
+       """Fetch statuses from history files."""
+       statuses = {}
+       jobs = job_spec.list()
+       for job in jobs:
+               try:
+                       state = job_state.load_state(job)
+               except:
+                       statuses[job] = "invalid"
+               if state.last_completion_status is None:
+                       statuses[job] = "unknown"
+               else:
+                       statuses[job] = state.last_completion_status
+
+       return statuses
+
+
+if __name__ == "__main__":
+       parser = argparse.ArgumentParser(description="Report the status of all 
`process-control` jobs in a format that can be consumed by Icinga.")
+       args = parser.parse_args()
+
+       report_statuses()
diff --git a/bin/run-job b/bin/run-job
index 851e606..2f7fa3f 100755
--- a/bin/run-job
+++ b/bin/run-job
@@ -6,6 +6,7 @@
 
 from processcontrol import runner
 from processcontrol import job_spec
+from processcontrol import job_state
 
 
 def list_jobs():
@@ -25,6 +26,10 @@
 
                        if len(job.tags) > 0:
                                message += "\n    tags: " + ", ".join(job.tags)
+
+                       stored_state = job_state.load_state(job_slug)
+                       message += "\n    last status: " + 
stored_state.last_completion_status
+
                except AssertionError:
                        message = "{job} ***Invalid 
configuration***".format(job=job_slug)
                print(message)
diff --git a/examples/process-control.example.yaml 
b/examples/process-control.example.yaml
index 746af0a..2f43e63 100644
--- a/examples/process-control.example.yaml
+++ b/examples/process-control.example.yaml
@@ -77,6 +77,9 @@
 #
 # TODO: The deb install should create this directory and do something about
 # permissions.
+# TODO: rename to `lock_directory`
 #run_directory: /var/run/process-control
 #
 run_directory: /tmp
+
+state_directory: /var/cache/process-control
diff --git a/processcontrol/config.py b/processcontrol/config.py
index 9934806..806bf3d 100644
--- a/processcontrol/config.py
+++ b/processcontrol/config.py
@@ -63,8 +63,7 @@
         raw value if it's already a list."""
         value = self.get(path)
         if hasattr(value, "encode"):
-            # Is stringlike, so cast to a list and handle along with the plural
-            # case below.
+            # Is stringlike, so cast to a list.
             return [value]
 
         # Otherwise, it's already a list.
diff --git a/processcontrol/crontab.py b/processcontrol/crontab.py
index 82266ca..e22ba0a 100644
--- a/processcontrol/crontab.py
+++ b/processcontrol/crontab.py
@@ -1,5 +1,3 @@
-from __future__ import print_function
-
 from . import config
 from . import job_spec
 
diff --git a/processcontrol/job_state.py b/processcontrol/job_state.py
new file mode 100644
index 0000000..f034d0a
--- /dev/null
+++ b/processcontrol/job_state.py
@@ -0,0 +1,80 @@
+import datetime
+import yaml
+
+
+from . import config
+
+
+def load_state(slug):
+    state = JobState(slug)
+    state.load()
+    return state
+
+
+def statefile_path(slug):
+    global_config = config.GlobalConfiguration()
+    path = "{root}/{job}.yaml".format(
+        root=global_config.get("state_directory"),
+        job=slug)
+    return path
+
+
+class JobState(object):
+    """Manage a statefile for each job, with information about recent run
+    history."""
+
+    def __init__(self, slug):
+        self.slug = slug
+        self.path = statefile_path(slug)
+        self.history = []
+        self.last_completion_status = "unknown"
+
+    def load(self):
+        try:
+            with open(self.path, "r") as f:
+                storage = yaml.safe_load(f)
+        except IOError:
+            # TODO: Might want to remove the file and stuff.
+            return
+
+        self.history = storage["history"]
+        self.last_completion_status = storage["last_completion_status"]
+
+    def write(self):
+        # TODO: Ensure that we've called load() first, so we aren't overwriting
+        # history.
+        if len(self.history) > 20:
+            self.history = self.history[-20:]
+
+        contents = {
+            "history": self.history,
+        }
+
+        contents["last_completion_status"] = self.last_completion_status
+
+        with open(self.path, "w") as f:
+            yaml.dump(contents, stream=f)
+
+    def record_started(self, start_time):
+        self.history.append({
+            "status": "started",
+            "time": start_time.isoformat(" "),
+        })
+        self.write()
+
+    # TODO: We want job duration, etc.
+    def record_success(self):
+        self.history.append({
+            "status": "completed",
+            "time": datetime.datetime.utcnow().isoformat(" "),
+        })
+        self.last_completion_status = "success"
+        self.write()
+
+    def record_failure(self):
+        self.history.append({
+            "status": "failed",
+            "time": datetime.datetime.utcnow().isoformat(" "),
+        })
+        self.last_completion_status = "failure"
+        self.write()
diff --git a/processcontrol/runner.py b/processcontrol/runner.py
index d365d2c..0c39dab 100644
--- a/processcontrol/runner.py
+++ b/processcontrol/runner.py
@@ -6,6 +6,7 @@
 import threading
 
 from . import config
+from . import job_state
 from . import lock
 from . import mailer
 from . import output_streamer
@@ -32,6 +33,8 @@
 
         lock.begin(job_tag=self.job.slug)
         self.start_time = datetime.datetime.utcnow()
+        job_history = job_state.load_state(self.job.slug)
+        job_history.record_started(self.start_time)
 
         config.log.info("Running job {name} 
({slug})".format(name=self.job.name, slug=self.job.slug))
 
@@ -47,9 +50,11 @@
                 return_code = self.run_command(command_line)
                 if return_code != 0:
                     self.fail_exitcode(return_code)
+            job_history.record_success()
         except JobFailure as ex:
             config.log.error(str(ex))
             self.mailer.fail_mail(str(ex), logfile=self.logfile)
+            job_history.record_failure()
             raise
         finally:
             if self.job.timeout > 0:
diff --git a/setup.py b/setup.py
index a6a07ab..7dc41b5 100755
--- a/setup.py
+++ b/setup.py
@@ -10,6 +10,7 @@
     url='https://github.com/adamwight/process-control',
     packages=['processcontrol'],
     scripts=[
+        'bin/check-jobs-icinga',
         'bin/cron-generate',
         'bin/run-job',
     ],
diff --git a/tests/data/global_config/global_defaults.yaml 
b/tests/data/global_config/global_defaults.yaml
index 00c6fd7..13ca90c 100644
--- a/tests/data/global_config/global_defaults.yaml
+++ b/tests/data/global_config/global_defaults.yaml
@@ -26,3 +26,5 @@
 output_directory: /tmp
 
 run_directory: /tmp
+
+state_directory: /tmp

-- 
To view, visit https://gerrit.wikimedia.org/r/346833
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I893579da632fde8df2a1ea6c2e0e564c859dc950
Gerrit-PatchSet: 12
Gerrit-Project: wikimedia/fundraising/process-control
Gerrit-Branch: master
Gerrit-Owner: Awight <[email protected]>
Gerrit-Reviewer: Cdentinger <[email protected]>
Gerrit-Reviewer: Ejegg <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to