[MediaWiki-commits] [Gerrit] Integrated logging - change (analytics/limn-mobile-data)

jenkins-bot (Code Review) Thu, 19 Feb 2015 12:08:02 -0800

jenkins-bot has submitted this change and it was merged.

Change subject: Integrated logging
......................................................................



Integrated logging

allowing logging to console, file and logstash.

Bug: T84892
Change-Id: Ia030b599180189b05409e1085941bd01c33f7fbb
---
M .gitignore
M README.md
M generate.py
A logger.py
M requirements.txt
A test-requirements.txt
A tests/__init__.py
A tests/test_logger.py
8 files changed, 258 insertions(+), 13 deletions(-)

Approvals:
  Bmansurov: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/.gitignore b/.gitignore
index 57d8753..1f404b3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,4 @@
 .DS_Store
 /.idea
 *.iml
+test.log
diff --git a/README.md b/README.md
index ca5dc85..67c214b 100644
--- a/README.md
+++ b/README.md
@@ -51,6 +51,13 @@
 - Add the graph to `mobile/config.yaml`
 - Generate the datafile (csv) for the graph `python generate.py -c 
scripts/config.yaml mobile -g <graphname>`
 
+## Executing unit tests
+
+Install the test requirements and run nosetests:
+
+    $ pip install -r test-requirements.txt
+    $ nosetests
+
 ## Testing using local data
 
 By default the instance you run will show graphs using production data.
diff --git a/generate.py b/generate.py
index d2f13cd..db7360c 100644
--- a/generate.py
+++ b/generate.py
@@ -15,6 +15,7 @@
 import datetime
 from dateutil.relativedelta import relativedelta
 import json
+from logger import get_logger
 
 from traceback import format_exc
 
@@ -22,7 +23,9 @@
 class DataGenerator(object):
     """Executes queries and generates CSV reports based on YAML configs."""
 
-    def __init__(self, folder, debug_folder=None, config_override=None, 
graph=None, force=None):
+    def __init__(self, folder, debug_folder=None, config_override=None,
+                 graph=None, force=None, log_file_path=None,
+                 log_logstash_endpoint=None):
         """Reads configuration 'config.yaml' in `folder_path`."""
         self.folder = folder
         self.debug_folder = debug_folder
@@ -35,6 +38,8 @@
         self.load_config(config_main)
         if config_override:
             self.load_config(config_override)
+        self.logger = get_logger(
+            self.config.get('logging'), log_file_path, log_logstash_endpoint)
 
     def load_config(self, config_path):
         with io.open(config_path, encoding='utf-8') as config_file:
@@ -92,7 +97,7 @@
             rows = cursor.fetchall()
             headers = [field[0] for field in cursor.description]
         except Exception, e:
-            print "Issue executing SQL for %s (%s)" % (graph_key, e)
+            self.logger.error("Issue executing SQL for %s (%s)" % (graph_key, 
e))
             headers = []
             rows = []
         finally:
@@ -106,7 +111,7 @@
             res = module.execute(self)
             return res
         except Exception, e:
-            print "Issue executing Python script for %s (%s)" % (graph_key, e)
+            self.logger.error("Issue executing Python script for %s (%s)" % 
(graph_key, e), exc_info=True)
             return ([], [])
 
     def save_history(self, data):
@@ -124,7 +129,7 @@
             try:
                 return json.loads(data)
             except ValueError:
-                print('invalid JSON - someone tweaked the history file!')
+                self.logger.error('invalid JSON - someone tweaked the history 
file!')
                 return {}
         except IOError:
             return {}
@@ -156,7 +161,7 @@
             due_at = last_run_time + increment
 
             if due_at < now or self.force:
-                print('Generating {0}'.format(value['title']))
+                self.logger.info('Generating {0}'.format(value['title']))
                 if "timeboxed" in value and "starts" in value:
                     from_date = value["starts"]
 
@@ -177,7 +182,7 @@
                         if history[key] == now:
                             self.save_history(history)
             else:
-                print('Skipping generation of {0}: not enough time has 
passed'.format(value['title']))
+                self.logger.info('Skipping generation of {0}: not enough time 
has passed'.format(value['title']))
 
     def generate_graph_timeboxed(self, graph_key, value, from_date, 
to_date=None):
         csv_filename = self.get_csv_filename(graph_key)
@@ -228,7 +233,7 @@
                 to_timestamp = from_date.strftime('%Y%m%d%H%M%S')
                 # Generate a graph if not in cache or the current month
                 if graph_date_key not in cache or from_date >= this_month:
-                    print 'Generating data for timestamp %s to %s' % 
(from_timestamp, to_timestamp)
+                    self.logger.info('Generating data for timestamp %s to %s' 
% (from_timestamp, to_timestamp))
                     db_name = value.get('db', self.config['defaults']['db'])
                     query = self.get_sql_string(sql_path)
                     # apply timeboxing
@@ -239,7 +244,7 @@
                     headers, rows = self.execute_sql(query, db_name, graph_key)
                     if headers and csv_header:
                         if len(headers) + 1 != len(csv_header) and not 
self.force:
-                            print "Data format has changed. Aborting."
+                            self.logger.info("Data format has changed. 
Aborting.")
                             self.clear_csv_data(graph_key)
                             self.generate_graph_timeboxed(graph_key, value, 
from_date, to_date)
                             return
@@ -250,13 +255,13 @@
                     try:
                         cache[graph_date_key] = list(rows[0])
                     except Exception, e:
-                        print('{}: Error: {}, db: {}, graph key: {}, rows: {}, 
query: \n{}'.format(
+                        self.logger.error('{}: Error: {}, db: {}, graph key: 
{}, rows: {}, query: \n{}'.format(
                             datetime.datetime.now(), format_exc(e), db_name, 
graph_key, rows, query
                         ))
                         # stop the loop because one exception is likely to 
mean total fail
                         break
                 else:
-                    print 'Skip generation of %s' % graph_date_key
+                    self.logger.info('Skip generation of %s' % graph_date_key)
 
             rows = []
             for month, row in iter(sorted(cache.iteritems())):
@@ -265,9 +270,9 @@
             if len(rows) > 0:
                 self.save_graph_as_csv(graph_key, csv_header, rows)
             else:
-                print 'No data to write.'
+                self.logger.info('No data to write.')
         else:
-            print 'Bad SQL given'
+            self.logger.error('Bad SQL given')
 
     def generate_graph_full(self, key, value):
         """
@@ -319,6 +324,12 @@
     parser.add_argument('-g', '--graph', help='the name of a single graph you 
want to generate for')
     parser.add_argument('-f', '--force', dest='force', action='store_true',
                         help='Force generation of graph regardless of when 
last generated')
+    parser.add_argument(
+        '-l', '--log-file-path', dest='log_file_path',
+        help='Path for log file.')
+    parser.add_argument(
+        '-L', '--log-logstash-endpoint', dest='log_logstash_endpoint',
+        help='Endpoint for logstash (HOST:PORT).')
     parser.set_defaults(force=False)
     args = parser.parse_args()
 
diff --git a/logger.py b/logger.py
new file mode 100644
index 0000000..482a610
--- /dev/null
+++ b/logger.py
@@ -0,0 +1,128 @@
+# -*- coding: utf-8 -*-
+import copy
+import logging
+import logging.config
+
+__all__ = ['get_logger']
+
+
+DEFAULT_LOGGING_CONFIG = {
+    'version': 1,
+    'formatters': {
+        'stream': {
+            'format': '[%(asctime)s %(name)s %(levelname)s] %(message)s'
+        },
+        'file': {
+            'format': '[%(asctime)s %(levelname)s] %(message)s'
+        }
+    },
+    'handlers': {
+        'console': {
+            'level': 'DEBUG',
+            'class': 'logging.StreamHandler',
+            'formatter': 'stream'
+        },
+        'log_to_file': {
+            'level': 'DEBUG',
+            'class': 'logging.FileHandler',
+            'filename': 'limn.log',
+            'mode': 'a+',
+            'formatter': 'file',
+        },
+        'logstash': {
+            'level': 'INFO',
+            'class': 'logstash.LogstashHandler',
+            'host': 'localhost',
+            'port': 5959,
+            'version': 1,
+            'message_type': 'logstash',
+            'fqdn': False,
+            'tags': ['limn_mobile_data'],
+        }
+    },
+    'loggers': {
+        'limn_mobile': {
+            'level': 'DEBUG',
+            'handlers': ['console'],
+            'propagate': False
+        }
+    }
+}
+
+
+def parse_logstash_endpoint(logstash_endpoint):
+    try:
+        host, port = logstash_endpoint.split(':') if isinstance(
+            logstash_endpoint, basestring) else (None, None)
+        port = int(port) if port else None
+    except ValueError:
+        host, port = None, None
+    return host, port
+
+
+def update_config(source_config, config):
+    """
+    Update source config dictionary with values from
+    another config dictionary in a granular way, rather
+    than only top level substitution.
+    """
+    for key, value in config.items():
+        source_value = source_config.get(key)
+        if not source_value:
+            continue
+        if isinstance(source_value, dict) and isinstance(value, dict):
+            update_config(source_value, value)
+        elif isinstance(source_value, list) and not isinstance(value, list):
+            source_value.append(value)
+        elif type(source_value) == type(value):
+            source_config[key] = value
+
+
+def update_logging_config(default_config, config, log_file_path,
+                          logstash_endpoint):
+    """
+    Update logging config dict.
+
+    Args:
+        default_config: A dictionary for default logging config.
+        config: A dictionary for customizing logging config.
+        log_file_path: A string
+        logstash_endpoint: A string of the form 'host:port'
+
+    Returns:
+        A dictionary for the updated logging config.
+    """
+    updated_config = copy.deepcopy(default_config)
+    if isinstance(config, dict):
+        update_config(updated_config, config)
+    config_handlers = updated_config['loggers']['limn_mobile']['handlers']
+    if log_file_path:
+        updated_config['handlers']['log_to_file']['filename'] = log_file_path
+        if 'log_to_file' not in config_handlers:
+            config_handlers.append('log_to_file')
+    elif 'log_to_file' in config_handlers:
+        config_handlers.remove('log_to_file')
+
+    logstash_host, logstash_port = parse_logstash_endpoint(logstash_endpoint)
+    if logstash_host and logstash_port:
+        updated_config['handlers']['logstash'].update({
+            'host': logstash_host,
+            'port': logstash_port
+        })
+        if 'logstash' not in config_handlers:
+            config_handlers.append('logstash')
+    elif 'logstash' in config_handlers:
+        config_handlers.remove('logstash')
+    return updated_config
+
+
+def get_logger(config={}, log_file_path=None, logstash_endpoint=None):
+    """
+    Get a logger instance for logging.
+    """
+    config = copy.deepcopy(config)
+    logging_config = update_logging_config(
+        DEFAULT_LOGGING_CONFIG, config, log_file_path, logstash_endpoint)
+    logging.config.dictConfig(logging_config)
+    logger = logging.getLogger('limn_mobile')
+    return logger
diff --git a/requirements.txt b/requirements.txt
index 30f9421..4bd1893 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
 Jinja2==2.6
-MySQL-python==1.2.4
+MySQL-python==1.2.5
 PyYAML==3.10
+python-dateutil==2.3
+python-logstash==0.4.2
diff --git a/test-requirements.txt b/test-requirements.txt
new file mode 100644
index 0000000..f3c7e8e
--- /dev/null
+++ b/test-requirements.txt
@@ -0,0 +1 @@
+nose
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/__init__.py
diff --git a/tests/test_logger.py b/tests/test_logger.py
new file mode 100644
index 0000000..d1b8128
--- /dev/null
+++ b/tests/test_logger.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+import unittest
+import logging
+import copy
+from logger import (
+    update_config, update_logging_config, get_logger,
+    parse_logstash_endpoint)
+
+
+class TestUpdateConfig(unittest.TestCase):
+
+    def test_success(self):
+        SOURCE_CONFIG = {
+            'a': 1,
+            'b': {
+                'b1': ['blah']
+            },
+            'c': ['foo']
+        }
+
+        CONFIG = {
+            'a': 2,
+            'b': {
+                'b1': 'foobar',
+                'b2': 'whatever'
+            },
+            'c': [1, 2]
+        }
+        UPDATED_CONFIG = {
+            'a': 2,
+            'b': {
+                'b1': ['blah', 'foobar']
+            },
+            'c': [1, 2]
+        }
+        update_config(SOURCE_CONFIG, CONFIG)
+        self.assertEqual(SOURCE_CONFIG, UPDATED_CONFIG)
+
+
+class TestUpdateLoggingConfig(unittest.TestCase):
+
+    def test_success(self):
+        from logger import DEFAULT_LOGGING_CONFIG
+        UPDATED_LOGGING = copy.deepcopy(DEFAULT_LOGGING_CONFIG)
+        UPDATED_LOGGING['loggers']['limn_mobile']['handlers'] = [
+            'console', 'logstash', 'log_to_file']
+        UPDATED_LOGGING['handlers']['log_to_file']['filename'] = 'test.log'
+        UPDATED_LOGGING['handlers']['logstash'].update({
+            'host': 'foo',
+            'port': 8000
+        })
+        updated_logging_config = update_logging_config(
+            DEFAULT_LOGGING_CONFIG, {
+                'loggers': {
+                    'limn_mobile': {
+                        'handlers': ['console', 'logstash']
+                    }
+                }
+            }, 'test.log', 'foo:8000')
+        self.assertEqual(updated_logging_config, UPDATED_LOGGING)
+
+
+class TestParseLogstashEndpoint(unittest.TestCase):
+
+    def test_success(self):
+        host, port = parse_logstash_endpoint('foo:8000')
+        self.assertEqual(host, 'foo')
+        self.assertEqual(port, 8000)
+
+    def test_error(self):
+        host, port = parse_logstash_endpoint('foo:bar')
+        self.assertIsNone(host)
+        self.assertIsNone(port)
+        host, port = parse_logstash_endpoint('foo')
+        self.assertIsNone(host)
+        self.assertIsNone(port)
+        host, port = parse_logstash_endpoint(None)
+        self.assertIsNone(host)
+        self.assertIsNone(port)
+
+
+class TestGetLogger(unittest.TestCase):
+
+    def test_success(self):
+        logger = get_logger(
+            config={
+                'loggers': {
+                    'limn_mobile': {
+                        'handlers': ['console', 'logstash']
+                    }
+                }
+            },
+            log_file_path='test.log')
+        self.assertTrue(isinstance(logger, logging.Logger))
+        self.assertEqual(len(logger.handlers), 2)

-- 
To view, visit https://gerrit.wikimedia.org/r/180828
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ia030b599180189b05409e1085941bd01c33f7fbb
Gerrit-PatchSet: 5
Gerrit-Project: analytics/limn-mobile-data
Gerrit-Branch: master
Gerrit-Owner: Rtnpro <[email protected]>
Gerrit-Reviewer: Bmansurov <[email protected]>
Gerrit-Reviewer: Mforns <[email protected]>
Gerrit-Reviewer: Milimetric <[email protected]>
Gerrit-Reviewer: Rtnpro <[email protected]>
Gerrit-Reviewer: Yuvipanda <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] Integrated logging - change (analytics/limn-mobile-data)

Reply via email to