DCausse has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/362329 )
Change subject: Setup python logging
......................................................................
Setup python logging
Initializes python logging in the shell scripts so we can get useful
debugging information. This is particularly useful when debugging kafka
to see what the library is doing.
This is a bit of duplicate code, maybe some refactoring needs to be done
to have a single entry point, or some sort of shared entry point code
being called, but for now this probably works fine.
Change-Id: I2e4200f6e12e77e4ffb230280de3eefe1f9b3f20
---
M mjolnir/cli/data_pipeline.py
M mjolnir/cli/kafka_daemon.py
M mjolnir/cli/training_pipeline.py
3 files changed, 45 insertions(+), 0 deletions(-)
Approvals:
DCausse: Verified; Looks good to me, approved
diff --git a/mjolnir/cli/data_pipeline.py b/mjolnir/cli/data_pipeline.py
index 2c9c333..45a395d 100644
--- a/mjolnir/cli/data_pipeline.py
+++ b/mjolnir/cli/data_pipeline.py
@@ -11,6 +11,7 @@
"""
import argparse
+import logging
import mjolnir.dbn
import mjolnir.metrics
import mjolnir.norm_query
@@ -182,6 +183,12 @@
+ ' form to bootstrap access. Query normalization will still use
the '
+ ' --search-cluster option')
parser.add_argument(
+ '-v', '--verbose', dest='verbose', default=False, action='store_true',
+ help='Increase logging to INFO')
+ parser.add_argument(
+ '-vv', '--very-verbose', dest='very_verbose', default=False,
action='store_true',
+ help='Increase logging to DEBUG')
+ parser.add_argument(
'wikis', metavar='wiki', type=str, nargs='+',
help='A wiki to generate features and labels for')
@@ -191,6 +198,14 @@
if __name__ == "__main__":
args = parse_arguments()
+ if args['very_verbose']:
+ logging.basicConfig(level=logging.DEBUG)
+ elif args['verbose']:
+ logging.basicConfig(level=logging.INFO)
+ else:
+ logging.basicConfig()
+ del args['verbose']
+ del args['very_verbose']
sc = SparkContext(appName="MLR: data collection pipeline")
# spark info logging is incredibly spammy. Use warn to have some hope of
# human decipherable output
diff --git a/mjolnir/cli/kafka_daemon.py b/mjolnir/cli/kafka_daemon.py
index b54869c..da61b07 100644
--- a/mjolnir/cli/kafka_daemon.py
+++ b/mjolnir/cli/kafka_daemon.py
@@ -5,6 +5,7 @@
"""
import argparse
+import logging
import mjolnir.kafka.daemon
@@ -17,10 +18,24 @@
'-w', '--num-workers', dest='n_workers', type=int, default=5,
help='Number of workers to issue elasticsearch queries in parallel. '
+ 'Defaults to 5.')
+ parser.add_argument(
+ '-v', '--verbose', dest='verbose', default=False, action='store_true',
+ help='Increase logging to INFO')
+ parser.add_argument(
+ '-vv', '--very-verbose', dest='very_verbose', default=False,
action='store_true',
+ help='Increase logging to DEBUG')
args = parser.parse_args()
return dict(vars(args))
if __name__ == '__main__':
args = parse_arguments()
+ if args['very_verbose']:
+ logging.basicConfig(level=logging.DEBUG)
+ elif args['verbose']:
+ logging.basicConfig(level=logging.INFO)
+ else:
+ logging.basicConfig()
+ del args['verbose']
+ del args['very_verbose']
mjolnir.kafka.daemon.Daemon(**args).run()
diff --git a/mjolnir/cli/training_pipeline.py b/mjolnir/cli/training_pipeline.py
index 3cb1af2..dab8301 100644
--- a/mjolnir/cli/training_pipeline.py
+++ b/mjolnir/cli/training_pipeline.py
@@ -10,6 +10,7 @@
"""
import argparse
+import logging
import mjolnir.training.xgboost
import os
import pickle
@@ -110,6 +111,12 @@
+ 'trees used in the final result. Default uses 100 trees rather '
+ 'than dynamically choosing based on max_depth. (Default: None)')
parser.add_argument(
+ '-v', '--verbose', dest='verbose', default=False, action='store_true',
+ help='Increase logging to INFO')
+ parser.add_argument(
+ '-vv', '--very-verbose', dest='very_verbose', default=False,
action='store_true',
+ help='Increase logging to DEBUG')
+ parser.add_argument(
'wikis', metavar='wiki', type=str, nargs='+',
help='A wiki to perform model training for.')
@@ -121,6 +128,14 @@
if __name__ == "__main__":
args = parse_arguments()
+ if args['very_verbose']:
+ logging.basicConfig(level=logging.DEBUG)
+ elif args['verbose']:
+ logging.basicConfig(level=logging.INFO)
+ else:
+ logging.basicConfig()
+ del args['verbose']
+ del args['very_verbose']
# TODO: Set spark configuration? Some can't actually be set here though,
so best might be to set all of it
# on the command line for consistency.
sc = SparkContext(appName="MLR: training pipeline")
--
To view, visit https://gerrit.wikimedia.org/r/362329
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I2e4200f6e12e77e4ffb230280de3eefe1f9b3f20
Gerrit-PatchSet: 3
Gerrit-Project: search/MjoLniR
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <[email protected]>
Gerrit-Reviewer: DCausse <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits