Repository: tez Updated Branches: refs/heads/master 28cd991b8 -> eadbfec44
TEZ-2699. Internalize strings in ATF parser (bikas) Project: http://git-wip-us.apache.org/repos/asf/tez/repo Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/eadbfec4 Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/eadbfec4 Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/eadbfec4 Branch: refs/heads/master Commit: eadbfec4456b5edc318a5c94f90a2a78e172ebf4 Parents: 28cd991 Author: Bikas Saha <[email protected]> Authored: Thu Aug 6 13:40:37 2015 -0700 Committer: Bikas Saha <[email protected]> Committed: Thu Aug 6 13:40:37 2015 -0700 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../tez/history/parser/datamodel/DagInfo.java | 7 +++--- .../parser/datamodel/TaskAttemptInfo.java | 24 +++++++++----------- .../tez/history/parser/datamodel/TaskInfo.java | 9 +++++--- .../history/parser/datamodel/VertexInfo.java | 14 +++++++++--- tez-tools/analyzers/job-analyzer/pom.xml | 7 +++++- 6 files changed, 39 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tez/blob/eadbfec4/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 5d3c4f4..b37eb9e 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -13,6 +13,7 @@ INCOMPATIBLE CHANGES TEZ-2633. Allow VertexManagerPlugins to receive and report based on attempts instead of tasks TEZ-2650. Timing details on Vertex state changes + TEZ-2699. Internalize strings in ATF parser ALL CHANGES: TEZ-2663. SessionNotRunning exceptions are wrapped in a ServiceException from a dying AM. http://git-wip-us.apache.org/repos/asf/tez/blob/eadbfec4/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java ---------------------------------------------------------------------- diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java index fe596f0..5ea94d6 100644 --- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java +++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java @@ -32,6 +32,7 @@ import org.apache.commons.collections.BidiMap; import org.apache.commons.collections.bidimap.DualHashBidiMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.util.StringInterner; import org.apache.tez.dag.api.event.VertexState; import org.codehaus.jettison.json.JSONArray; import org.codehaus.jettison.json.JSONException; @@ -91,7 +92,7 @@ public class DagInfo extends BaseInfo { Preconditions.checkArgument(jsonObject.getString(Constants.ENTITY_TYPE).equalsIgnoreCase (Constants.TEZ_DAG_ID)); - dagId = jsonObject.getString(Constants.ENTITY); + dagId = StringInterner.weakIntern(jsonObject.getString(Constants.ENTITY)); //Parse additional Info JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO); @@ -102,7 +103,7 @@ public class DagInfo extends BaseInfo { diagnostics = otherInfoNode.optString(Constants.DIAGNOSTICS); failedTasks = otherInfoNode.optInt(Constants.NUM_FAILED_TASKS); JSONObject dagPlan = otherInfoNode.optJSONObject(Constants.DAG_PLAN); - name = (dagPlan != null) ? (dagPlan.optString(Constants.DAG_NAME)) : null; + name = StringInterner.weakIntern((dagPlan != null) ? (dagPlan.optString(Constants.DAG_NAME)) : null); if (dagPlan != null) { JSONArray vertices = dagPlan.optJSONArray(Constants.VERTICES); if (vertices != null) { @@ -114,7 +115,7 @@ public class DagInfo extends BaseInfo { } else { numVertices = 0; } - status = otherInfoNode.optString(Constants.STATUS); + status = StringInterner.weakIntern(otherInfoNode.optString(Constants.STATUS)); //parse name id mapping JSONObject vertexIDMappingJson = otherInfoNode.optJSONObject(Constants.VERTEX_NAME_ID_MAPPING); http://git-wip-us.apache.org/repos/asf/tez/blob/eadbfec4/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java ---------------------------------------------------------------------- diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java index 8f7ec23..916df95 100644 --- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java +++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java @@ -21,6 +21,7 @@ package org.apache.tez.history.parser.datamodel; import com.google.common.base.Preconditions; import com.google.common.collect.Maps; +import org.apache.hadoop.util.StringInterner; import org.apache.tez.common.ATSConstants; import org.apache.tez.common.counters.DAGCounter; import org.apache.tez.common.counters.TaskCounter; @@ -41,7 +42,7 @@ public class TaskAttemptInfo extends BaseInfo { private final long startTime; private final long endTime; private final String diagnostics; - private final String successfulAttemptId; + private final long scheduledTime; private final String containerId; private final String nodeId; @@ -62,26 +63,27 @@ public class TaskAttemptInfo extends BaseInfo { jsonObject.getString(Constants.ENTITY_TYPE).equalsIgnoreCase (Constants.TEZ_TASK_ATTEMPT_ID)); - taskAttemptId = jsonObject.optString(Constants.ENTITY); + taskAttemptId = StringInterner.weakIntern(jsonObject.optString(Constants.ENTITY)); //Parse additional Info final JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO); startTime = otherInfoNode.optLong(Constants.START_TIME); endTime = otherInfoNode.optLong(Constants.FINISH_TIME); diagnostics = otherInfoNode.optString(Constants.DIAGNOSTICS); - successfulAttemptId = otherInfoNode.optString(Constants.SUCCESSFUL_ATTEMPT_ID); scheduledTime = otherInfoNode.optLong(Constants.SCHEDULED_TIME); - schedulingCausalTA = otherInfoNode.optString(Constants.SCHEDULING_CAUSAL_ATTEMPT); + schedulingCausalTA = StringInterner.weakIntern( + otherInfoNode.optString(Constants.SCHEDULING_CAUSAL_ATTEMPT)); - containerId = otherInfoNode.optString(Constants.CONTAINER_ID); + containerId = StringInterner.weakIntern(otherInfoNode.optString(Constants.CONTAINER_ID)); String id = otherInfoNode.optString(Constants.NODE_ID); - nodeId = (id != null) ? (id.split(":")[0]) : ""; + nodeId = StringInterner.weakIntern((id != null) ? (id.split(":")[0]) : ""); logUrl = otherInfoNode.optString(Constants.COMPLETED_LOGS_URL); - status = otherInfoNode.optString(Constants.STATUS); + status = StringInterner.weakIntern(otherInfoNode.optString(Constants.STATUS)); container = new Container(containerId, nodeId); lastDataEventTime = otherInfoNode.optLong(ATSConstants.LAST_DATA_EVENT_TIME); - lastDataEventSourceTA = otherInfoNode.optString(ATSConstants.LAST_DATA_EVENT_SOURCE_TA); + lastDataEventSourceTA = StringInterner.weakIntern( + otherInfoNode.optString(ATSConstants.LAST_DATA_EVENT_SOURCE_TA)); } void setTaskInfo(TaskInfo taskInfo) { @@ -132,6 +134,7 @@ public class TaskAttemptInfo extends BaseInfo { return schedulingCausalTA; } + @Override public final String getDiagnostics() { return diagnostics; @@ -186,10 +189,6 @@ public class TaskAttemptInfo extends BaseInfo { return taskAttemptId; } - public final String getSuccessfulAttemptId() { - return successfulAttemptId; - } - public final String getNodeId() { return nodeId; } @@ -261,7 +260,6 @@ public class TaskAttemptInfo extends BaseInfo { sb.append("timeTaken=").append(getTimeTaken()).append(", "); sb.append("events=").append(getEvents()).append(", "); sb.append("diagnostics=").append(getDiagnostics()).append(", "); - sb.append("successfulAttempId=").append(getSuccessfulAttemptId()).append(", "); sb.append("container=").append(getContainer()).append(", "); sb.append("nodeId=").append(getNodeId()).append(", "); sb.append("logURL=").append(getLogURL()).append(", "); http://git-wip-us.apache.org/repos/asf/tez/blob/eadbfec4/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java ---------------------------------------------------------------------- diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java index 9705b73..5e63efa 100644 --- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java +++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java @@ -27,6 +27,8 @@ import com.google.common.collect.Maps; import com.google.common.collect.Multimap; import com.google.common.collect.Multimaps; import com.google.common.collect.Ordering; + +import org.apache.hadoop.util.StringInterner; import org.apache.tez.dag.api.oldrecords.TaskAttemptState; import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; @@ -63,16 +65,17 @@ public class TaskInfo extends BaseInfo { jsonObject.getString(Constants.ENTITY_TYPE).equalsIgnoreCase (Constants.TEZ_TASK_ID)); - taskId = jsonObject.optString(Constants.ENTITY); + taskId = StringInterner.weakIntern(jsonObject.optString(Constants.ENTITY)); //Parse additional Info final JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO); startTime = otherInfoNode.optLong(Constants.START_TIME); endTime = otherInfoNode.optLong(Constants.FINISH_TIME); diagnostics = otherInfoNode.optString(Constants.DIAGNOSTICS); - successfulAttemptId = otherInfoNode.optString(Constants.SUCCESSFUL_ATTEMPT_ID); + successfulAttemptId = StringInterner.weakIntern( + otherInfoNode.optString(Constants.SUCCESSFUL_ATTEMPT_ID)); scheduledTime = otherInfoNode.optLong(Constants.SCHEDULED_TIME); - status = otherInfoNode.optString(Constants.STATUS); + status = StringInterner.weakIntern(otherInfoNode.optString(Constants.STATUS)); } @Override http://git-wip-us.apache.org/repos/asf/tez/blob/eadbfec4/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java ---------------------------------------------------------------------- diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java index 554f94b..d2dac7d 100644 --- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java +++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java @@ -27,6 +27,8 @@ import com.google.common.collect.Maps; import com.google.common.collect.Multimap; import com.google.common.collect.Multimaps; import com.google.common.collect.Ordering; + +import org.apache.hadoop.util.StringInterner; import org.apache.tez.dag.api.oldrecords.TaskState; import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; @@ -43,6 +45,7 @@ import static org.apache.hadoop.classification.InterfaceStability.Evolving; @Evolving public class VertexInfo extends BaseInfo { + private final String vertexId; private final String vertexName; private final long finishTime; private final long initTime; @@ -80,6 +83,7 @@ public class VertexInfo extends BaseInfo { jsonObject.getString(Constants.ENTITY_TYPE).equalsIgnoreCase (Constants.TEZ_VERTEX_ID)); + vertexId = StringInterner.weakIntern(jsonObject.optString(Constants.ENTITY_TYPE)); taskInfoMap = Maps.newHashMap(); inEdgeList = Lists.newLinkedList(); @@ -104,9 +108,9 @@ public class VertexInfo extends BaseInfo { killedTasks = otherInfoNode.optInt(Constants.NUM_KILLED_TASKS); numFailedTaskAttempts = otherInfoNode.optInt(Constants.NUM_FAILED_TASKS_ATTEMPTS); - vertexName = otherInfoNode.optString(Constants.VERTEX_NAME); - processorClass = otherInfoNode.optString(Constants.PROCESSOR_CLASS_NAME); - status = otherInfoNode.optString(Constants.STATUS); + vertexName = StringInterner.weakIntern(otherInfoNode.optString(Constants.VERTEX_NAME)); + processorClass = StringInterner.weakIntern(otherInfoNode.optString(Constants.PROCESSOR_CLASS_NAME)); + status = StringInterner.weakIntern(otherInfoNode.optString(Constants.STATUS)); } public static VertexInfo create(JSONObject vertexInfoObject) throws @@ -217,6 +221,10 @@ public class VertexInfo extends BaseInfo { public final String getVertexName() { return vertexName; } + + public final String getVertexId() { + return vertexId; + } //Quite possible that getFinishTime is not yet recorded for failed vertices (or killed vertices) //Start time of vertex infers that the dependencies are done and AM has inited it. http://git-wip-us.apache.org/repos/asf/tez/blob/eadbfec4/tez-tools/analyzers/job-analyzer/pom.xml ---------------------------------------------------------------------- diff --git a/tez-tools/analyzers/job-analyzer/pom.xml b/tez-tools/analyzers/job-analyzer/pom.xml index 21ee6a2..fe28b14 100644 --- a/tez-tools/analyzers/job-analyzer/pom.xml +++ b/tez-tools/analyzers/job-analyzer/pom.xml @@ -26,6 +26,10 @@ <dependencies> <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + </dependency> + <dependency> <groupId>io.dropwizard.metrics</groupId> <artifactId>metrics-core</artifactId> </dependency> @@ -43,5 +47,6 @@ <artifactId>apache-rat-plugin</artifactId> </plugin> </plugins> - </build> + </build> + </project>
