jenkins-bot has submitted this change and it was merged.
Change subject: Integrated logging
......................................................................
Integrated logging
allowing logging to console, file and logstash.
Bug: T84892
Change-Id: Ia030b599180189b05409e1085941bd01c33f7fbb
---
M .gitignore
M README.md
M generate.py
A logger.py
M requirements.txt
A test-requirements.txt
A tests/__init__.py
A tests/test_logger.py
8 files changed, 258 insertions(+), 13 deletions(-)
Approvals:
Bmansurov: Looks good to me, approved
jenkins-bot: Verified
diff --git a/.gitignore b/.gitignore
index 57d8753..1f404b3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,4 @@
.DS_Store
/.idea
*.iml
+test.log
diff --git a/README.md b/README.md
index ca5dc85..67c214b 100644
--- a/README.md
+++ b/README.md
@@ -51,6 +51,13 @@
- Add the graph to `mobile/config.yaml`
- Generate the datafile (csv) for the graph `python generate.py -c
scripts/config.yaml mobile -g <graphname>`
+## Executing unit tests
+
+Install the test requirements and run nosetests:
+
+ $ pip install -r test-requirements.txt
+ $ nosetests
+
## Testing using local data
By default the instance you run will show graphs using production data.
diff --git a/generate.py b/generate.py
index d2f13cd..db7360c 100644
--- a/generate.py
+++ b/generate.py
@@ -15,6 +15,7 @@
import datetime
from dateutil.relativedelta import relativedelta
import json
+from logger import get_logger
from traceback import format_exc
@@ -22,7 +23,9 @@
class DataGenerator(object):
"""Executes queries and generates CSV reports based on YAML configs."""
- def __init__(self, folder, debug_folder=None, config_override=None,
graph=None, force=None):
+ def __init__(self, folder, debug_folder=None, config_override=None,
+ graph=None, force=None, log_file_path=None,
+ log_logstash_endpoint=None):
"""Reads configuration 'config.yaml' in `folder_path`."""
self.folder = folder
self.debug_folder = debug_folder
@@ -35,6 +38,8 @@
self.load_config(config_main)
if config_override:
self.load_config(config_override)
+ self.logger = get_logger(
+ self.config.get('logging'), log_file_path, log_logstash_endpoint)
def load_config(self, config_path):
with io.open(config_path, encoding='utf-8') as config_file:
@@ -92,7 +97,7 @@
rows = cursor.fetchall()
headers = [field[0] for field in cursor.description]
except Exception, e:
- print "Issue executing SQL for %s (%s)" % (graph_key, e)
+ self.logger.error("Issue executing SQL for %s (%s)" % (graph_key,
e))
headers = []
rows = []
finally:
@@ -106,7 +111,7 @@
res = module.execute(self)
return res
except Exception, e:
- print "Issue executing Python script for %s (%s)" % (graph_key, e)
+ self.logger.error("Issue executing Python script for %s (%s)" %
(graph_key, e), exc_info=True)
return ([], [])
def save_history(self, data):
@@ -124,7 +129,7 @@
try:
return json.loads(data)
except ValueError:
- print('invalid JSON - someone tweaked the history file!')
+ self.logger.error('invalid JSON - someone tweaked the history
file!')
return {}
except IOError:
return {}
@@ -156,7 +161,7 @@
due_at = last_run_time + increment
if due_at < now or self.force:
- print('Generating {0}'.format(value['title']))
+ self.logger.info('Generating {0}'.format(value['title']))
if "timeboxed" in value and "starts" in value:
from_date = value["starts"]
@@ -177,7 +182,7 @@
if history[key] == now:
self.save_history(history)
else:
- print('Skipping generation of {0}: not enough time has
passed'.format(value['title']))
+ self.logger.info('Skipping generation of {0}: not enough time
has passed'.format(value['title']))
def generate_graph_timeboxed(self, graph_key, value, from_date,
to_date=None):
csv_filename = self.get_csv_filename(graph_key)
@@ -228,7 +233,7 @@
to_timestamp = from_date.strftime('%Y%m%d%H%M%S')
# Generate a graph if not in cache or the current month
if graph_date_key not in cache or from_date >= this_month:
- print 'Generating data for timestamp %s to %s' %
(from_timestamp, to_timestamp)
+ self.logger.info('Generating data for timestamp %s to %s'
% (from_timestamp, to_timestamp))
db_name = value.get('db', self.config['defaults']['db'])
query = self.get_sql_string(sql_path)
# apply timeboxing
@@ -239,7 +244,7 @@
headers, rows = self.execute_sql(query, db_name, graph_key)
if headers and csv_header:
if len(headers) + 1 != len(csv_header) and not
self.force:
- print "Data format has changed. Aborting."
+ self.logger.info("Data format has changed.
Aborting.")
self.clear_csv_data(graph_key)
self.generate_graph_timeboxed(graph_key, value,
from_date, to_date)
return
@@ -250,13 +255,13 @@
try:
cache[graph_date_key] = list(rows[0])
except Exception, e:
- print('{}: Error: {}, db: {}, graph key: {}, rows: {},
query: \n{}'.format(
+ self.logger.error('{}: Error: {}, db: {}, graph key:
{}, rows: {}, query: \n{}'.format(
datetime.datetime.now(), format_exc(e), db_name,
graph_key, rows, query
))
# stop the loop because one exception is likely to
mean total fail
break
else:
- print 'Skip generation of %s' % graph_date_key
+ self.logger.info('Skip generation of %s' % graph_date_key)
rows = []
for month, row in iter(sorted(cache.iteritems())):
@@ -265,9 +270,9 @@
if len(rows) > 0:
self.save_graph_as_csv(graph_key, csv_header, rows)
else:
- print 'No data to write.'
+ self.logger.info('No data to write.')
else:
- print 'Bad SQL given'
+ self.logger.error('Bad SQL given')
def generate_graph_full(self, key, value):
"""
@@ -319,6 +324,12 @@
parser.add_argument('-g', '--graph', help='the name of a single graph you
want to generate for')
parser.add_argument('-f', '--force', dest='force', action='store_true',
help='Force generation of graph regardless of when
last generated')
+ parser.add_argument(
+ '-l', '--log-file-path', dest='log_file_path',
+ help='Path for log file.')
+ parser.add_argument(
+ '-L', '--log-logstash-endpoint', dest='log_logstash_endpoint',
+ help='Endpoint for logstash (HOST:PORT).')
parser.set_defaults(force=False)
args = parser.parse_args()
diff --git a/logger.py b/logger.py
new file mode 100644
index 0000000..482a610
--- /dev/null
+++ b/logger.py
@@ -0,0 +1,128 @@
+# -*- coding: utf-8 -*-
+import copy
+import logging
+import logging.config
+
+__all__ = ['get_logger']
+
+
+DEFAULT_LOGGING_CONFIG = {
+ 'version': 1,
+ 'formatters': {
+ 'stream': {
+ 'format': '[%(asctime)s %(name)s %(levelname)s] %(message)s'
+ },
+ 'file': {
+ 'format': '[%(asctime)s %(levelname)s] %(message)s'
+ }
+ },
+ 'handlers': {
+ 'console': {
+ 'level': 'DEBUG',
+ 'class': 'logging.StreamHandler',
+ 'formatter': 'stream'
+ },
+ 'log_to_file': {
+ 'level': 'DEBUG',
+ 'class': 'logging.FileHandler',
+ 'filename': 'limn.log',
+ 'mode': 'a+',
+ 'formatter': 'file',
+ },
+ 'logstash': {
+ 'level': 'INFO',
+ 'class': 'logstash.LogstashHandler',
+ 'host': 'localhost',
+ 'port': 5959,
+ 'version': 1,
+ 'message_type': 'logstash',
+ 'fqdn': False,
+ 'tags': ['limn_mobile_data'],
+ }
+ },
+ 'loggers': {
+ 'limn_mobile': {
+ 'level': 'DEBUG',
+ 'handlers': ['console'],
+ 'propagate': False
+ }
+ }
+}
+
+
+def parse_logstash_endpoint(logstash_endpoint):
+ try:
+ host, port = logstash_endpoint.split(':') if isinstance(
+ logstash_endpoint, basestring) else (None, None)
+ port = int(port) if port else None
+ except ValueError:
+ host, port = None, None
+ return host, port
+
+
+def update_config(source_config, config):
+ """
+ Update source config dictionary with values from
+ another config dictionary in a granular way, rather
+ than only top level substitution.
+ """
+ for key, value in config.items():
+ source_value = source_config.get(key)
+ if not source_value:
+ continue
+ if isinstance(source_value, dict) and isinstance(value, dict):
+ update_config(source_value, value)
+ elif isinstance(source_value, list) and not isinstance(value, list):
+ source_value.append(value)
+ elif type(source_value) == type(value):
+ source_config[key] = value
+
+
+def update_logging_config(default_config, config, log_file_path,
+ logstash_endpoint):
+ """
+ Update logging config dict.
+
+ Args:
+ default_config: A dictionary for default logging config.
+ config: A dictionary for customizing logging config.
+ log_file_path: A string
+ logstash_endpoint: A string of the form 'host:port'
+
+ Returns:
+ A dictionary for the updated logging config.
+ """
+ updated_config = copy.deepcopy(default_config)
+ if isinstance(config, dict):
+ update_config(updated_config, config)
+ config_handlers = updated_config['loggers']['limn_mobile']['handlers']
+ if log_file_path:
+ updated_config['handlers']['log_to_file']['filename'] = log_file_path
+ if 'log_to_file' not in config_handlers:
+ config_handlers.append('log_to_file')
+ elif 'log_to_file' in config_handlers:
+ config_handlers.remove('log_to_file')
+
+ logstash_host, logstash_port = parse_logstash_endpoint(logstash_endpoint)
+ if logstash_host and logstash_port:
+ updated_config['handlers']['logstash'].update({
+ 'host': logstash_host,
+ 'port': logstash_port
+ })
+ if 'logstash' not in config_handlers:
+ config_handlers.append('logstash')
+ elif 'logstash' in config_handlers:
+ config_handlers.remove('logstash')
+ return updated_config
+
+
+def get_logger(config={}, log_file_path=None, logstash_endpoint=None):
+ """
+ Get a logger instance for logging.
+ """
+ config = copy.deepcopy(config)
+ logging_config = update_logging_config(
+ DEFAULT_LOGGING_CONFIG, config, log_file_path, logstash_endpoint)
+ logging.config.dictConfig(logging_config)
+ logger = logging.getLogger('limn_mobile')
+ return logger
diff --git a/requirements.txt b/requirements.txt
index 30f9421..4bd1893 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
Jinja2==2.6
-MySQL-python==1.2.4
+MySQL-python==1.2.5
PyYAML==3.10
+python-dateutil==2.3
+python-logstash==0.4.2
diff --git a/test-requirements.txt b/test-requirements.txt
new file mode 100644
index 0000000..f3c7e8e
--- /dev/null
+++ b/test-requirements.txt
@@ -0,0 +1 @@
+nose
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/__init__.py
diff --git a/tests/test_logger.py b/tests/test_logger.py
new file mode 100644
index 0000000..d1b8128
--- /dev/null
+++ b/tests/test_logger.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+import unittest
+import logging
+import copy
+from logger import (
+ update_config, update_logging_config, get_logger,
+ parse_logstash_endpoint)
+
+
+class TestUpdateConfig(unittest.TestCase):
+
+ def test_success(self):
+ SOURCE_CONFIG = {
+ 'a': 1,
+ 'b': {
+ 'b1': ['blah']
+ },
+ 'c': ['foo']
+ }
+
+ CONFIG = {
+ 'a': 2,
+ 'b': {
+ 'b1': 'foobar',
+ 'b2': 'whatever'
+ },
+ 'c': [1, 2]
+ }
+ UPDATED_CONFIG = {
+ 'a': 2,
+ 'b': {
+ 'b1': ['blah', 'foobar']
+ },
+ 'c': [1, 2]
+ }
+ update_config(SOURCE_CONFIG, CONFIG)
+ self.assertEqual(SOURCE_CONFIG, UPDATED_CONFIG)
+
+
+class TestUpdateLoggingConfig(unittest.TestCase):
+
+ def test_success(self):
+ from logger import DEFAULT_LOGGING_CONFIG
+ UPDATED_LOGGING = copy.deepcopy(DEFAULT_LOGGING_CONFIG)
+ UPDATED_LOGGING['loggers']['limn_mobile']['handlers'] = [
+ 'console', 'logstash', 'log_to_file']
+ UPDATED_LOGGING['handlers']['log_to_file']['filename'] = 'test.log'
+ UPDATED_LOGGING['handlers']['logstash'].update({
+ 'host': 'foo',
+ 'port': 8000
+ })
+ updated_logging_config = update_logging_config(
+ DEFAULT_LOGGING_CONFIG, {
+ 'loggers': {
+ 'limn_mobile': {
+ 'handlers': ['console', 'logstash']
+ }
+ }
+ }, 'test.log', 'foo:8000')
+ self.assertEqual(updated_logging_config, UPDATED_LOGGING)
+
+
+class TestParseLogstashEndpoint(unittest.TestCase):
+
+ def test_success(self):
+ host, port = parse_logstash_endpoint('foo:8000')
+ self.assertEqual(host, 'foo')
+ self.assertEqual(port, 8000)
+
+ def test_error(self):
+ host, port = parse_logstash_endpoint('foo:bar')
+ self.assertIsNone(host)
+ self.assertIsNone(port)
+ host, port = parse_logstash_endpoint('foo')
+ self.assertIsNone(host)
+ self.assertIsNone(port)
+ host, port = parse_logstash_endpoint(None)
+ self.assertIsNone(host)
+ self.assertIsNone(port)
+
+
+class TestGetLogger(unittest.TestCase):
+
+ def test_success(self):
+ logger = get_logger(
+ config={
+ 'loggers': {
+ 'limn_mobile': {
+ 'handlers': ['console', 'logstash']
+ }
+ }
+ },
+ log_file_path='test.log')
+ self.assertTrue(isinstance(logger, logging.Logger))
+ self.assertEqual(len(logger.handlers), 2)
--
To view, visit https://gerrit.wikimedia.org/r/180828
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ia030b599180189b05409e1085941bd01c33f7fbb
Gerrit-PatchSet: 5
Gerrit-Project: analytics/limn-mobile-data
Gerrit-Branch: master
Gerrit-Owner: Rtnpro <[email protected]>
Gerrit-Reviewer: Bmansurov <[email protected]>
Gerrit-Reviewer: Mforns <[email protected]>
Gerrit-Reviewer: Milimetric <[email protected]>
Gerrit-Reviewer: Rtnpro <[email protected]>
Gerrit-Reviewer: Yuvipanda <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits