Repository: tez Updated Branches: refs/heads/master 765afd236 -> a02a5ea9d
TEZ-2390. tez-tools swimlane tool fails to parse large jobs >8K containers (jeagles) Project: http://git-wip-us.apache.org/repos/asf/tez/repo Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/a02a5ea9 Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/a02a5ea9 Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/a02a5ea9 Branch: refs/heads/master Commit: a02a5ea9dd8af4e47114ef4145d8e6b75db6c119 Parents: 765afd2 Author: Jonathan Eagles <[email protected]> Authored: Thu Apr 30 16:16:10 2015 -0500 Committer: Jonathan Eagles <[email protected]> Committed: Thu Apr 30 16:16:10 2015 -0500 ---------------------------------------------------------------------- CHANGES.txt | 1 + tez-tools/swimlanes/amlogparser.py | 20 ++++++++++---------- tez-tools/swimlanes/swimlane.py | 6 ++---- 3 files changed, 13 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tez/blob/a02a5ea9/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index aa72320..5a5c21f 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -155,6 +155,7 @@ Release 0.6.1: Unreleased INCOMPATIBLE CHANGES ALL CHANGES: + TEZ-2390. tez-tools swimlane tool fails to parse large jobs >8K containers TEZ-2256. Avoid use of BufferTooSmallException to signal end of buffer in UnorderedPartitionedKVWriter TEZ-2380. Disable fall back to reading from timeline if timeline disabled. TEZ-2226. Disable writing history to timeline if domain creation fails. http://git-wip-us.apache.org/repos/asf/tez/blob/a02a5ea9/tez-tools/swimlanes/amlogparser.py ---------------------------------------------------------------------- diff --git a/tez-tools/swimlanes/amlogparser.py b/tez-tools/swimlanes/amlogparser.py index 02f4892..8ab8e29 100644 --- a/tez-tools/swimlanes/amlogparser.py +++ b/tez-tools/swimlanes/amlogparser.py @@ -17,8 +17,7 @@ # under the License. # -import os,sys,re,math,os.path -from collections import defaultdict +import sys,re from itertools import groupby from bz2 import BZ2File from gzip import GzipFile as GZFile @@ -142,10 +141,15 @@ class Attempt(object): def __init__(self, pair): start = first(filter(lambda a: a.event == "TASK_ATTEMPT_STARTED", pair)) finish = first(filter(lambda a: a.event == "TASK_ATTEMPT_FINISHED", pair)) + if start is None or finish is None: + print [start, finish]; self.raw = finish - self.dag = finish.dag self.kvs = csv_kv(start.args) - self.kvs.update(csv_kv(finish.args)) + if finish is not None: + self.dag = finish.dag + self.kvs.update(csv_kv(finish.args)) + self.finish = (int)(self.kvs["finishTime"]) + self.duration = (int)(self.kvs["timeTaken"]) self.name = self.kvs["taskAttemptId"] self.task = self.name[:self.name.rfind("_")].replace("attempt","task") (_, _, amid, dagid, vertexid, taskid, attemptid) = self.name.split("_") @@ -153,8 +157,6 @@ class Attempt(object): self.attemptnum = int(attemptid) self.vertex = self.kvs["vertexName"] self.start = (int)(self.kvs["startTime"]) - self.finish = (int)(self.kvs["finishTime"]) - self.duration = (int)(self.kvs["timeTaken"]) self.container = self.kvs["containerId"] self.node = self.kvs["nodeId"] def __repr__(self): @@ -243,6 +245,7 @@ class AMLog(object): def parse(self, l): if(l.find("[HISTORY]") != -1): m = self.MAIN_RE.match(l) + print(m); ts = m.group("ts") dag = m.group("dag") event = m.group("event") @@ -250,14 +253,11 @@ class AMLog(object): return AMRawEvent(ts, dag, event, args) def main(argv): - f = argv[0] tree = AMLog(argv[0]).structure() # AM -> dag -> vertex -> task -> attempt # AM -> container - containers = set(tree.containers.keys()) - timeto = lambda a: (a - tree.zero) for d in tree.dags: - for a in d.attempts(): + for a in d.attempts(): print [a.vertex, a.name, a.container, a.start, a.finish] if __name__ == "__main__": http://git-wip-us.apache.org/repos/asf/tez/blob/a02a5ea9/tez-tools/swimlanes/swimlane.py ---------------------------------------------------------------------- diff --git a/tez-tools/swimlanes/swimlane.py b/tez-tools/swimlanes/swimlane.py index b739b1e..dc8dc6f 100644 --- a/tez-tools/swimlanes/swimlane.py +++ b/tez-tools/swimlanes/swimlane.py @@ -17,10 +17,9 @@ # under the License. # -import os,sys,re,math,os.path +import sys,math,os.path import StringIO from amlogparser import AMLog -import random from getopt import getopt class ColourManager(object): @@ -133,7 +132,7 @@ def main(argv): svg = SVGHelper(x+2*marginRight+256, y+2*marginTop) a = marginTop svg.text(x/2, 32, log.name, style="font-size: 32px; text-anchor: middle") - containerMap = dict(zip(list(lanes), xrange(8192))) + containerMap = dict(zip(list(lanes), xrange(len(lanes)))) svg.text(marginRight - 16, marginTop - 32, "Container ID", "text-anchor:end; font-size: 16px;") # draw a grid for l in lanes: @@ -193,7 +192,6 @@ def main(argv): percentX = finishes[int(len(finishes)*fraction)] svg.line(marginRight+xdomain(percentX), marginTop, marginRight+xdomain(percentX), y+marginTop, style="stroke: red") svg.text(marginRight+xdomain(percentX), y+marginTop+12, "%d%% (%0.1fs)" % (int(fraction*100), (percentX - dag.start)/1000.0), style="font-size:12px; text-anchor: middle") - prefix = lambda a: (a.find(".") == -1 and a) or (a[:a.find(".")]) out.write(svg.flush()) out.close()
