This is an automated email from the ASF dual-hosted git repository.
asalamon74 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/oozie.git
The following commit(s) were added to refs/heads/master by this push:
new 08ddfb7 OOZIE-3249 [tools] Instrumentation log parser (andras.piros
via asalamon74)
08ddfb7 is described below
commit 08ddfb768e26282b2d5d24d600b4ea049f2a9cce
Author: Andras Salamon <[email protected]>
AuthorDate: Fri Apr 5 09:04:15 2019 +0200
OOZIE-3249 [tools] Instrumentation log parser (andras.piros via asalamon74)
---
release-log.txt | 1 +
source-headers.txt | 19 ++++
tools/src/main/bin/instrumentation-log-parser.py | 132 +++++++++++++++++++++++
3 files changed, 152 insertions(+)
diff --git a/release-log.txt b/release-log.txt
index 146c8ac..bbde580 100644
--- a/release-log.txt
+++ b/release-log.txt
@@ -1,5 +1,6 @@
-- Oozie 5.2.0 release (trunk - unreleased)
+OOZIE-3249 [tools] Instrumentation log parser (andras.piros via asalamon74)
OOZIE-3460 Remove pom files from Oozie sharelibs (nobigo via asalamon74)
OOZIE-3450 Investigate and clean git sharelib (matijhs via asalamon74)
OOZIE-3443 Migrate from joda time to java.time (matijhs via asalamon74)
diff --git a/source-headers.txt b/source-headers.txt
index 0999ca4..e8df318 100644
--- a/source-headers.txt
+++ b/source-headers.txt
@@ -63,3 +63,22 @@ File type: JAVA JS
* limitations under the License.
*/
----------------------------------
+File type: PYTHON
+
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+----------------------------------
diff --git a/tools/src/main/bin/instrumentation-log-parser.py
b/tools/src/main/bin/instrumentation-log-parser.py
new file mode 100755
index 0000000..3c44a86
--- /dev/null
+++ b/tools/src/main/bin/instrumentation-log-parser.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import sys
+import getopt
+import re
+from datetime import datetime
+
+
+def main(argv):
+ inputfile = ''
+ outputfile = ''
+ parameters = []
+
+ try:
+ opts, args = getopt.getopt(argv, "hi:o:p:",["input=", "output=",
"parameters="])
+ except getopt.GetoptError:
+ usage()
+ sys.exit(2)
+ for opt, arg in opts:
+ if opt in ("-h", "--help"):
+ usage()
+ sys.exit()
+ elif opt in ("-i", "--input"):
+ inputfile = arg
+ elif opt in ("-o", "--output"):
+ outputfile = arg
+ elif opt in ("-p", "--parameters"):
+ parameters = arg.split(',')
+
+ if inputfile == '' or outputfile == '' or parameters == []:
+ error('Arguments not set correctly.')
+ usage()
+ sys.exit(1)
+
+ print 'Input file is: ', inputfile
+ print 'Output file is: ', outputfile
+ print 'Paremeters are: ', parameters
+
+ parse_instrumentation_log(inputfile, outputfile, parameters)
+
+
+def usage():
+ print 'USAGE: python instrumentation-log-parser.py -i <inputfile> -o
<outputfile> -p <path/to/json/value1,path/to/json/value2>'
+
+
+def info(message):
+ log('INFO', message)
+
+
+def debug(message):
+ log('DEBUG', message)
+
+
+def error(message):
+ log('ERROR', message)
+
+
+def warn(message):
+ log('WARN', message)
+
+
+def log(level, message):
+ print '[%s] [%s] %s' % (datetime.now(), level, message)
+
+
+def parse_instrumentation_log(inputfile, outputfile, parameters):
+ info('Parsing instrumentation log.
[inputfile=%s;outputfile=%s;parameters=%s]' % (inputfile, outputfile,
parameters))
+
+ with open(inputfile) as i:
+ content = i.readlines()
+
+ debug('Input file has %d lines' % (len(content)))
+
+ header = re.compile('^([0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}).*$')
+ json_begin = re.compile('^\{$')
+ json_end = re.compile('^\}$')
+
+ json_content = ''
+ timestamp = ''
+ output_line_count = 0
+
+ with open(outputfile, 'w+') as o:
+ o.write('Timestamp;%s\n' % (';'.join(parameters)))
+ input_line_count = 0
+ for line in content:
+ if header.match(line) != None:
+ timestamp = header.match(line).group(1)
+ elif json_begin.match(line):
+ json_content = json_content + line
+ elif json_end.match(line):
+ json_content = json_content + line
+ try:
+ json_object = json.loads(json_content)
+ output_line = timestamp
+ for parameter in parameters:
+ json_fragment = json_object
+ for parameter_key in parameter.split('/'):
+ json_fragment = json_fragment[parameter_key]
+ output_line = output_line + ';' + str(json_fragment)
+ output_line_count = output_line_count + 1
+ o.write(output_line + '\n')
+ except ValueError:
+ warn('Unparseable JSON input at line %d' %
input_line_count)
+ json_content = ''
+ else:
+ json_content = json_content + line
+ input_line_count = input_line_count + 1
+ if input_line_count % 1000000 == 0:
+ debug('%d input lines processed, %d output lines written' %
(input_line_count, output_line_count))
+
+ info('Parsing instrumentation log finished. In total %d input lines
processed, %d output lines written'
+ % (input_line_count, output_line_count))
+
+
+if __name__ == "__main__":
+ main(sys.argv[1:])