Repository: incubator-slider Updated Branches: refs/heads/develop f0cd53ef1 -> 9644fd341
SLIDER-1187 Create app diagnostics resource with placeholder for containers (live/dead) Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/9644fd34 Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/9644fd34 Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/9644fd34 Branch: refs/heads/develop Commit: 9644fd34145174892c88d29cce4581f8a7c59e77 Parents: f0cd53e Author: Gour Saha <gourks...@apache.org> Authored: Tue Jan 24 17:33:54 2017 -0800 Committer: Gour Saha <gourks...@apache.org> Committed: Tue Jan 24 17:34:22 2017 -0800 ---------------------------------------------------------------------- .../apache/slider/api/ClusterDescription.java | 6 + .../java/org/apache/slider/api/ClusterNode.java | 7 + .../org/apache/slider/api/proto/Messages.java | 517 +++++++++++++++---- .../slider/api/proto/RestTypeMarshalling.java | 11 +- .../api/types/ApplicationDiagnostics.java | 111 ++++ .../slider/api/types/ContainerInformation.java | 83 +++ .../org/apache/slider/client/SliderClient.java | 35 +- .../common/params/ActionDiagnosticArgs.java | 4 + .../apache/slider/providers/ProviderRole.java | 2 +- .../server/appmaster/SliderAppMaster.java | 29 +- .../slider/server/appmaster/state/AppState.java | 132 ++++- .../server/appmaster/state/RoleInstance.java | 11 + .../src/main/proto/SliderClusterMessages.proto | 2 + .../apache/slider/client/TestDiagnostics.groovy | 149 +++++- 14 files changed, 988 insertions(+), 111 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9644fd34/slider-core/src/main/java/org/apache/slider/api/ClusterDescription.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/api/ClusterDescription.java b/slider-core/src/main/java/org/apache/slider/api/ClusterDescription.java index f8e5e7c..f355eb4 100644 --- a/slider-core/src/main/java/org/apache/slider/api/ClusterDescription.java +++ b/slider-core/src/main/java/org/apache/slider/api/ClusterDescription.java @@ -24,6 +24,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.slider.api.types.ApplicationLivenessInformation; +import org.apache.slider.api.types.ApplicationDiagnostics; import org.apache.slider.common.tools.SliderUtils; import org.apache.slider.core.exceptions.BadConfigException; import org.apache.slider.providers.SliderProviderFactory; @@ -214,6 +215,11 @@ public class ClusterDescription implements Cloneable { public ApplicationLivenessInformation liveness; /** + * Application diagnostics information + */ + public ApplicationDiagnostics appDiagnostics = new ApplicationDiagnostics(); + + /** * Creator. */ public ClusterDescription() { http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9644fd34/slider-core/src/main/java/org/apache/slider/api/ClusterNode.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/api/ClusterNode.java b/slider-core/src/main/java/org/apache/slider/api/ClusterNode.java index 8b0a563..ffce8f8 100644 --- a/slider-core/src/main/java/org/apache/slider/api/ClusterNode.java +++ b/slider-core/src/main/java/org/apache/slider/api/ClusterNode.java @@ -97,6 +97,11 @@ public final class ClusterNode implements Cloneable { public String[] output; /** + * Absolute link to the container log + */ + public String logLink; + + /** * Any environment details */ public String[] environment; @@ -134,6 +139,7 @@ public final class ClusterNode implements Cloneable { } } append(builder, "diagnostics", diagnostics); + append(builder, "logLink", logLink); return builder.toString(); } @@ -193,6 +199,7 @@ public final class ClusterNode implements Cloneable { arr = new String[outputCount]; node.output = message.getOutputList().toArray(arr); } + node.logLink = message.getLogLink(); node.role = message.getRole(); node.roleId = message.getRoleId(); node.state = message.getState(); http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9644fd34/slider-core/src/main/java/org/apache/slider/api/proto/Messages.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/api/proto/Messages.java b/slider-core/src/main/java/org/apache/slider/api/proto/Messages.java index 373d64d..9f7334f 100644 --- a/slider-core/src/main/java/org/apache/slider/api/proto/Messages.java +++ b/slider-core/src/main/java/org/apache/slider/api/proto/Messages.java @@ -215,6 +215,21 @@ public final class Messages { */ com.google.protobuf.ByteString getAppVersionBytes(); + + // optional string logLink = 17; + /** + * <code>optional string logLink = 17;</code> + */ + boolean hasLogLink(); + /** + * <code>optional string logLink = 17;</code> + */ + java.lang.String getLogLink(); + /** + * <code>optional string logLink = 17;</code> + */ + com.google.protobuf.ByteString + getLogLinkBytes(); } /** * Protobuf type {@code org.apache.slider.api.RoleInstanceState} @@ -348,6 +363,11 @@ public final class Messages { appVersion_ = input.readBytes(); break; } + case 138: { + bitField0_ |= 0x00002000; + logLink_ = input.readBytes(); + break; + } } } } catch (com.google.protobuf.InvalidProtocolBufferException e) { @@ -851,6 +871,49 @@ public final class Messages { } } + // optional string logLink = 17; + public static final int LOGLINK_FIELD_NUMBER = 17; + private java.lang.Object logLink_; + /** + * <code>optional string logLink = 17;</code> + */ + public boolean hasLogLink() { + return ((bitField0_ & 0x00002000) == 0x00002000); + } + /** + * <code>optional string logLink = 17;</code> + */ + public java.lang.String getLogLink() { + java.lang.Object ref = logLink_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + if (bs.isValidUtf8()) { + logLink_ = s; + } + return s; + } + } + /** + * <code>optional string logLink = 17;</code> + */ + public com.google.protobuf.ByteString + getLogLinkBytes() { + java.lang.Object ref = logLink_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + logLink_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + private void initFields() { name_ = ""; role_ = ""; @@ -867,6 +930,7 @@ public final class Messages { host_ = ""; hostURL_ = ""; appVersion_ = ""; + logLink_ = ""; } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -961,6 +1025,9 @@ public final class Messages { if (((bitField0_ & 0x00001000) == 0x00001000)) { output.writeBytes(16, getAppVersionBytes()); } + if (((bitField0_ & 0x00002000) == 0x00002000)) { + output.writeBytes(17, getLogLinkBytes()); + } getUnknownFields().writeTo(output); } @@ -1040,6 +1107,10 @@ public final class Messages { size += com.google.protobuf.CodedOutputStream .computeBytesSize(16, getAppVersionBytes()); } + if (((bitField0_ & 0x00002000) == 0x00002000)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(17, getLogLinkBytes()); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -1132,6 +1203,11 @@ public final class Messages { result = result && getAppVersion() .equals(other.getAppVersion()); } + result = result && (hasLogLink() == other.hasLogLink()); + if (hasLogLink()) { + result = result && getLogLink() + .equals(other.getLogLink()); + } result = result && getUnknownFields().equals(other.getUnknownFields()); return result; @@ -1205,6 +1281,10 @@ public final class Messages { hash = (37 * hash) + APPVERSION_FIELD_NUMBER; hash = (53 * hash) + getAppVersion().hashCode(); } + if (hasLogLink()) { + hash = (37 * hash) + LOGLINK_FIELD_NUMBER; + hash = (53 * hash) + getLogLink().hashCode(); + } hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; @@ -1344,6 +1424,8 @@ public final class Messages { bitField0_ = (bitField0_ & ~0x00002000); appVersion_ = ""; bitField0_ = (bitField0_ & ~0x00004000); + logLink_ = ""; + bitField0_ = (bitField0_ & ~0x00008000); return this; } @@ -1436,6 +1518,10 @@ public final class Messages { to_bitField0_ |= 0x00001000; } result.appVersion_ = appVersion_; + if (((from_bitField0_ & 0x00008000) == 0x00008000)) { + to_bitField0_ |= 0x00002000; + } + result.logLink_ = logLink_; result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -1525,6 +1611,11 @@ public final class Messages { appVersion_ = other.appVersion_; onChanged(); } + if (other.hasLogLink()) { + bitField0_ |= 0x00008000; + logLink_ = other.logLink_; + onChanged(); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -2490,6 +2581,80 @@ public final class Messages { return this; } + // optional string logLink = 17; + private java.lang.Object logLink_ = ""; + /** + * <code>optional string logLink = 17;</code> + */ + public boolean hasLogLink() { + return ((bitField0_ & 0x00008000) == 0x00008000); + } + /** + * <code>optional string logLink = 17;</code> + */ + public java.lang.String getLogLink() { + java.lang.Object ref = logLink_; + if (!(ref instanceof java.lang.String)) { + java.lang.String s = ((com.google.protobuf.ByteString) ref) + .toStringUtf8(); + logLink_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + /** + * <code>optional string logLink = 17;</code> + */ + public com.google.protobuf.ByteString + getLogLinkBytes() { + java.lang.Object ref = logLink_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + logLink_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + /** + * <code>optional string logLink = 17;</code> + */ + public Builder setLogLink( + java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00008000; + logLink_ = value; + onChanged(); + return this; + } + /** + * <code>optional string logLink = 17;</code> + */ + public Builder clearLogLink() { + bitField0_ = (bitField0_ & ~0x00008000); + logLink_ = getDefaultInstance().getLogLink(); + onChanged(); + return this; + } + /** + * <code>optional string logLink = 17;</code> + */ + public Builder setLogLinkBytes( + com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00008000; + logLink_ = value; + onChanged(); + return this; + } + // @@protoc_insertion_point(builder_scope:org.apache.slider.api.RoleInstanceState) } @@ -17400,6 +17565,21 @@ public final class Messages { */ com.google.protobuf.ByteString getAppVersionBytes(); + + // optional string logLink = 14; + /** + * <code>optional string logLink = 14;</code> + */ + boolean hasLogLink(); + /** + * <code>optional string logLink = 14;</code> + */ + java.lang.String getLogLink(); + /** + * <code>optional string logLink = 14;</code> + */ + com.google.protobuf.ByteString + getLogLinkBytes(); } /** * Protobuf type {@code org.apache.slider.api.ContainerInformationProto} @@ -17525,6 +17705,11 @@ public final class Messages { appVersion_ = input.readBytes(); break; } + case 114: { + bitField0_ |= 0x00001000; + logLink_ = input.readBytes(); + break; + } } } } catch (com.google.protobuf.InvalidProtocolBufferException e) { @@ -17979,6 +18164,49 @@ public final class Messages { } } + // optional string logLink = 14; + public static final int LOGLINK_FIELD_NUMBER = 14; + private java.lang.Object logLink_; + /** + * <code>optional string logLink = 14;</code> + */ + public boolean hasLogLink() { + return ((bitField0_ & 0x00001000) == 0x00001000); + } + /** + * <code>optional string logLink = 14;</code> + */ + public java.lang.String getLogLink() { + java.lang.Object ref = logLink_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + if (bs.isValidUtf8()) { + logLink_ = s; + } + return s; + } + } + /** + * <code>optional string logLink = 14;</code> + */ + public com.google.protobuf.ByteString + getLogLinkBytes() { + java.lang.Object ref = logLink_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + logLink_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + private void initFields() { containerId_ = ""; component_ = ""; @@ -17993,6 +18221,7 @@ public final class Messages { hostURL_ = ""; placement_ = ""; appVersion_ = ""; + logLink_ = ""; } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -18045,6 +18274,9 @@ public final class Messages { if (((bitField0_ & 0x00000800) == 0x00000800)) { output.writeBytes(13, getAppVersionBytes()); } + if (((bitField0_ & 0x00001000) == 0x00001000)) { + output.writeBytes(14, getLogLinkBytes()); + } getUnknownFields().writeTo(output); } @@ -18111,6 +18343,10 @@ public final class Messages { size += com.google.protobuf.CodedOutputStream .computeBytesSize(13, getAppVersionBytes()); } + if (((bitField0_ & 0x00001000) == 0x00001000)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(14, getLogLinkBytes()); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -18196,6 +18432,11 @@ public final class Messages { result = result && getAppVersion() .equals(other.getAppVersion()); } + result = result && (hasLogLink() == other.hasLogLink()); + if (hasLogLink()) { + result = result && getLogLink() + .equals(other.getLogLink()); + } result = result && getUnknownFields().equals(other.getUnknownFields()); return result; @@ -18261,6 +18502,10 @@ public final class Messages { hash = (37 * hash) + APPVERSION_FIELD_NUMBER; hash = (53 * hash) + getAppVersion().hashCode(); } + if (hasLogLink()) { + hash = (37 * hash) + LOGLINK_FIELD_NUMBER; + hash = (53 * hash) + getLogLink().hashCode(); + } hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; @@ -18401,6 +18646,8 @@ public final class Messages { bitField0_ = (bitField0_ & ~0x00000800); appVersion_ = ""; bitField0_ = (bitField0_ & ~0x00001000); + logLink_ = ""; + bitField0_ = (bitField0_ & ~0x00002000); return this; } @@ -18483,6 +18730,10 @@ public final class Messages { to_bitField0_ |= 0x00000800; } result.appVersion_ = appVersion_; + if (((from_bitField0_ & 0x00002000) == 0x00002000)) { + to_bitField0_ |= 0x00001000; + } + result.logLink_ = logLink_; result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -18559,6 +18810,11 @@ public final class Messages { appVersion_ = other.appVersion_; onChanged(); } + if (other.hasLogLink()) { + bitField0_ |= 0x00002000; + logLink_ = other.logLink_; + onChanged(); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -19362,6 +19618,80 @@ public final class Messages { return this; } + // optional string logLink = 14; + private java.lang.Object logLink_ = ""; + /** + * <code>optional string logLink = 14;</code> + */ + public boolean hasLogLink() { + return ((bitField0_ & 0x00002000) == 0x00002000); + } + /** + * <code>optional string logLink = 14;</code> + */ + public java.lang.String getLogLink() { + java.lang.Object ref = logLink_; + if (!(ref instanceof java.lang.String)) { + java.lang.String s = ((com.google.protobuf.ByteString) ref) + .toStringUtf8(); + logLink_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + /** + * <code>optional string logLink = 14;</code> + */ + public com.google.protobuf.ByteString + getLogLinkBytes() { + java.lang.Object ref = logLink_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + logLink_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + /** + * <code>optional string logLink = 14;</code> + */ + public Builder setLogLink( + java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00002000; + logLink_ = value; + onChanged(); + return this; + } + /** + * <code>optional string logLink = 14;</code> + */ + public Builder clearLogLink() { + bitField0_ = (bitField0_ & ~0x00002000); + logLink_ = getDefaultInstance().getLogLink(); + onChanged(); + return this; + } + /** + * <code>optional string logLink = 14;</code> + */ + public Builder setLogLinkBytes( + com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00002000; + logLink_ = value; + onChanged(); + return this; + } + // @@protoc_insertion_point(builder_scope:org.apache.slider.api.ContainerInformationProto) } @@ -34057,103 +34387,104 @@ public final class Messages { static { java.lang.String[] descriptorData = { "\n\033SliderClusterMessages.proto\022\025org.apach" + - "e.slider.api\"\227\002\n\021RoleInstanceState\022\014\n\004na" + + "e.slider.api\"\250\002\n\021RoleInstanceState\022\014\n\004na" + "me\030\001 \002(\t\022\014\n\004role\030\002 \001(\t\022\r\n\005state\030\004 \002(\r\022\020\n" + "\010exitCode\030\005 \002(\r\022\017\n\007command\030\006 \001(\t\022\023\n\013diag" + "nostics\030\007 \001(\t\022\016\n\006output\030\010 \003(\t\022\023\n\013environ" + "ment\030\t \003(\t\022\016\n\006roleId\030\n \002(\r\022\020\n\010released\030\013" + " \002(\010\022\022\n\ncreateTime\030\014 \002(\003\022\021\n\tstartTime\030\r " + "\002(\003\022\014\n\004host\030\016 \002(\t\022\017\n\007hostURL\030\017 \002(\t\022\022\n\nap" + - "pVersion\030\020 \001(\t\"*\n\027StopClusterRequestProt" + - "o\022\017\n\007message\030\001 \002(\t\"\032\n\030StopClusterRespons", - "eProto\"V\n\035UpgradeContainersRequestProto\022" + - "\017\n\007message\030\001 \002(\t\022\021\n\tcontainer\030\002 \003(\t\022\021\n\tc" + - "omponent\030\003 \003(\t\" \n\036UpgradeContainersRespo" + - "nseProto\".\n\027FlexClusterRequestProto\022\023\n\013c" + - "lusterSpec\030\001 \002(\t\",\n\030FlexClusterResponseP" + - "roto\022\020\n\010response\030\001 \002(\010\"\"\n GetJSONCluster" + - "StatusRequestProto\"8\n!GetJSONClusterStat" + - "usResponseProto\022\023\n\013clusterSpec\030\001 \002(\t\"/\n\037" + - "ListNodeUUIDsByRoleRequestProto\022\014\n\004role\030" + - "\001 \002(\t\"0\n ListNodeUUIDsByRoleResponseProt", - "o\022\014\n\004uuid\030\001 \003(\t\"#\n\023GetNodeRequestProto\022\014" + - "\n\004uuid\030\001 \002(\t\"U\n\024GetNodeResponseProto\022=\n\013" + - "clusterNode\030\001 \002(\0132(.org.apache.slider.ap" + - "i.RoleInstanceState\"+\n\033GetClusterNodesRe" + - "questProto\022\014\n\004uuid\030\001 \003(\t\"]\n\034GetClusterNo" + - "desResponseProto\022=\n\013clusterNode\030\001 \003(\0132(." + - "org.apache.slider.api.RoleInstanceState\"" + - " \n\020EchoRequestProto\022\014\n\004text\030\001 \002(\t\"!\n\021Ech" + - "oResponseProto\022\014\n\004text\030\001 \002(\t\"\'\n\031KillCont" + - "ainerRequestProto\022\n\n\002id\030\001 \002(\t\"-\n\032KillCon", - "tainerResponseProto\022\017\n\007success\030\001 \002(\010\"D\n\025" + - "AMSuicideRequestProto\022\014\n\004text\030\001 \002(\t\022\016\n\006s" + - "ignal\030\002 \002(\005\022\r\n\005delay\030\003 \002(\005\"\030\n\026AMSuicideR" + - "esponseProto\"#\n!GetInstanceDefinitionReq" + - "uestProto\"^\n\"GetInstanceDefinitionRespon" + - "seProto\022\020\n\010internal\030\001 \002(\t\022\021\n\tresources\030\002" + - " \002(\t\022\023\n\013application\030\003 \002(\t\"`\n#Application" + - "LivenessInformationProto\022\034\n\024allRequestsS" + - "atisfied\030\001 \001(\010\022\033\n\023requestsOutstanding\030\002 " + - "\001(\005\"\256\003\n\031ComponentInformationProto\022\014\n\004nam", - "e\030\001 \001(\t\022\020\n\010priority\030\002 \001(\005\022\017\n\007desired\030\003 \001" + - "(\005\022\016\n\006actual\030\004 \001(\005\022\021\n\treleasing\030\005 \001(\005\022\021\n" + - "\trequested\030\006 \001(\005\022\016\n\006failed\030\007 \001(\005\022\017\n\007star" + - "ted\030\010 \001(\005\022\023\n\013startFailed\030\t \001(\005\022\021\n\tcomple" + - "ted\030\n \001(\005\022\026\n\016totalRequested\030\013 \001(\005\022\026\n\016fai" + - "lureMessage\030\014 \001(\t\022\027\n\017placementPolicy\030\r \001" + - "(\005\022\022\n\ncontainers\030\016 \003(\t\022\026\n\016failedRecently" + - "\030\017 \001(\005\022\022\n\nnodeFailed\030\020 \001(\005\022\021\n\tpreempted\030" + - "\021 \001(\005\022%\n\035pendingAntiAffineRequestCount\030\022" + - " \001(\005\022\036\n\026isAARequestOutstanding\030\023 \001(\010\"\210\002\n", - "\031ContainerInformationProto\022\023\n\013containerI" + - "d\030\001 \001(\t\022\021\n\tcomponent\030\002 \001(\t\022\020\n\010released\030\003" + - " \001(\010\022\r\n\005state\030\004 \001(\005\022\020\n\010exitCode\030\005 \001(\005\022\023\n" + - "\013diagnostics\030\006 \001(\t\022\022\n\ncreateTime\030\007 \001(\003\022\021" + - "\n\tstartTime\030\010 \001(\003\022\016\n\006output\030\t \003(\t\022\014\n\004hos" + - "t\030\n \001(\t\022\017\n\007hostURL\030\013 \001(\t\022\021\n\tplacement\030\014 " + - "\001(\t\022\022\n\nappVersion\030\r \001(\t\"N\n\024PingInformati" + - "onProto\022\014\n\004text\030\001 \001(\t\022\014\n\004verb\030\002 \001(\t\022\014\n\004b" + - "ody\030\003 \001(\t\022\014\n\004time\030\004 \001(\003\"\343\001\n\031NodeEntryInf" + - "ormationProto\022\020\n\010priority\030\001 \002(\005\022\021\n\treque", - "sted\030\002 \002(\005\022\020\n\010starting\030\003 \002(\005\022\023\n\013startFai" + - "led\030\004 \002(\005\022\016\n\006failed\030\005 \002(\005\022\026\n\016failedRecen" + - "tly\030\006 \002(\005\022\021\n\tpreempted\030\007 \002(\005\022\014\n\004live\030\010 \002" + - "(\005\022\021\n\treleasing\030\t \002(\005\022\020\n\010lastUsed\030\n \002(\003\022" + - "\014\n\004name\030\013 \002(\t\"\334\001\n\024NodeInformationProto\022\020" + - "\n\010hostname\030\001 \002(\t\022\r\n\005state\030\002 \002(\t\022\023\n\013httpA" + - "ddress\030\003 \002(\t\022\020\n\010rackName\030\004 \002(\t\022\016\n\006labels" + - "\030\005 \002(\t\022\024\n\014healthReport\030\006 \002(\t\022\023\n\013lastUpda" + - "ted\030\007 \002(\003\022A\n\007entries\030\010 \003(\01320.org.apache." + - "slider.api.NodeEntryInformationProto\"\026\n\024", - "GetModelRequestProto\"\035\n\033GetModelDesiredR" + - "equestProto\"$\n\"GetModelDesiredAppconfReq" + - "uestProto\"&\n$GetModelDesiredResourcesReq" + - "uestProto\"%\n#GetModelResolvedAppconfRequ" + - "estProto\"\'\n%GetModelResolvedResourcesReq" + - "uestProto\"#\n!GetModelLiveResourcesReques" + - "tProto\"\037\n\035GetLiveContainersRequestProto\"" + - "u\n\036GetLiveContainersResponseProto\022\r\n\005nam" + - "es\030\001 \003(\t\022D\n\ncontainers\030\002 \003(\01320.org.apach" + - "e.slider.api.ContainerInformationProto\"3", - "\n\034GetLiveContainerRequestProto\022\023\n\013contai" + - "nerId\030\001 \002(\t\"\037\n\035GetLiveComponentsRequestP" + - "roto\"u\n\036GetLiveComponentsResponseProto\022\r" + - "\n\005names\030\001 \003(\t\022D\n\ncomponents\030\002 \003(\01320.org." + - "apache.slider.api.ComponentInformationPr" + - "oto\",\n\034GetLiveComponentRequestProto\022\014\n\004n" + - "ame\030\001 \002(\t\"$\n\"GetApplicationLivenessReque" + - "stProto\"\023\n\021EmptyPayloadProto\" \n\020WrappedJ" + - "sonProto\022\014\n\004json\030\001 \002(\t\"h\n\037GetCertificate" + - "StoreRequestProto\022\020\n\010hostname\030\001 \001(\t\022\023\n\013r", - "equesterId\030\002 \002(\t\022\020\n\010password\030\003 \002(\t\022\014\n\004ty" + - "pe\030\004 \002(\t\"1\n GetCertificateStoreResponseP" + - "roto\022\r\n\005store\030\001 \002(\014\"\032\n\030GetLiveNodesReque" + - "stProto\"W\n\031GetLiveNodesResponseProto\022:\n\005" + - "nodes\030\001 \003(\0132+.org.apache.slider.api.Node" + - "InformationProto\"\'\n\027GetLiveNodeRequestPr" + - "oto\022\014\n\004name\030\001 \002(\tB-\n\033org.apache.slider.a" + - "pi.protoB\010Messages\210\001\001\240\001\001" + "pVersion\030\020 \001(\t\022\017\n\007logLink\030\021 \001(\t\"*\n\027StopC" + + "lusterRequestProto\022\017\n\007message\030\001 \002(\t\"\032\n\030S", + "topClusterResponseProto\"V\n\035UpgradeContai" + + "nersRequestProto\022\017\n\007message\030\001 \002(\t\022\021\n\tcon" + + "tainer\030\002 \003(\t\022\021\n\tcomponent\030\003 \003(\t\" \n\036Upgra" + + "deContainersResponseProto\".\n\027FlexCluster" + + "RequestProto\022\023\n\013clusterSpec\030\001 \002(\t\",\n\030Fle" + + "xClusterResponseProto\022\020\n\010response\030\001 \002(\010\"" + + "\"\n GetJSONClusterStatusRequestProto\"8\n!G" + + "etJSONClusterStatusResponseProto\022\023\n\013clus" + + "terSpec\030\001 \002(\t\"/\n\037ListNodeUUIDsByRoleRequ" + + "estProto\022\014\n\004role\030\001 \002(\t\"0\n ListNodeUUIDsB", + "yRoleResponseProto\022\014\n\004uuid\030\001 \003(\t\"#\n\023GetN" + + "odeRequestProto\022\014\n\004uuid\030\001 \002(\t\"U\n\024GetNode" + + "ResponseProto\022=\n\013clusterNode\030\001 \002(\0132(.org" + + ".apache.slider.api.RoleInstanceState\"+\n\033" + + "GetClusterNodesRequestProto\022\014\n\004uuid\030\001 \003(" + + "\t\"]\n\034GetClusterNodesResponseProto\022=\n\013clu" + + "sterNode\030\001 \003(\0132(.org.apache.slider.api.R" + + "oleInstanceState\" \n\020EchoRequestProto\022\014\n\004" + + "text\030\001 \002(\t\"!\n\021EchoResponseProto\022\014\n\004text\030" + + "\001 \002(\t\"\'\n\031KillContainerRequestProto\022\n\n\002id", + "\030\001 \002(\t\"-\n\032KillContainerResponseProto\022\017\n\007" + + "success\030\001 \002(\010\"D\n\025AMSuicideRequestProto\022\014" + + "\n\004text\030\001 \002(\t\022\016\n\006signal\030\002 \002(\005\022\r\n\005delay\030\003 " + + "\002(\005\"\030\n\026AMSuicideResponseProto\"#\n!GetInst" + + "anceDefinitionRequestProto\"^\n\"GetInstanc" + + "eDefinitionResponseProto\022\020\n\010internal\030\001 \002" + + "(\t\022\021\n\tresources\030\002 \002(\t\022\023\n\013application\030\003 \002" + + "(\t\"`\n#ApplicationLivenessInformationProt" + + "o\022\034\n\024allRequestsSatisfied\030\001 \001(\010\022\033\n\023reque" + + "stsOutstanding\030\002 \001(\005\"\256\003\n\031ComponentInform", + "ationProto\022\014\n\004name\030\001 \001(\t\022\020\n\010priority\030\002 \001" + + "(\005\022\017\n\007desired\030\003 \001(\005\022\016\n\006actual\030\004 \001(\005\022\021\n\tr" + + "eleasing\030\005 \001(\005\022\021\n\trequested\030\006 \001(\005\022\016\n\006fai" + + "led\030\007 \001(\005\022\017\n\007started\030\010 \001(\005\022\023\n\013startFaile" + + "d\030\t \001(\005\022\021\n\tcompleted\030\n \001(\005\022\026\n\016totalReque" + + "sted\030\013 \001(\005\022\026\n\016failureMessage\030\014 \001(\t\022\027\n\017pl" + + "acementPolicy\030\r \001(\005\022\022\n\ncontainers\030\016 \003(\t\022" + + "\026\n\016failedRecently\030\017 \001(\005\022\022\n\nnodeFailed\030\020 " + + "\001(\005\022\021\n\tpreempted\030\021 \001(\005\022%\n\035pendingAntiAff" + + "ineRequestCount\030\022 \001(\005\022\036\n\026isAARequestOuts", + "tanding\030\023 \001(\010\"\231\002\n\031ContainerInformationPr" + + "oto\022\023\n\013containerId\030\001 \001(\t\022\021\n\tcomponent\030\002 " + + "\001(\t\022\020\n\010released\030\003 \001(\010\022\r\n\005state\030\004 \001(\005\022\020\n\010" + + "exitCode\030\005 \001(\005\022\023\n\013diagnostics\030\006 \001(\t\022\022\n\nc" + + "reateTime\030\007 \001(\003\022\021\n\tstartTime\030\010 \001(\003\022\016\n\006ou" + + "tput\030\t \003(\t\022\014\n\004host\030\n \001(\t\022\017\n\007hostURL\030\013 \001(" + + "\t\022\021\n\tplacement\030\014 \001(\t\022\022\n\nappVersion\030\r \001(\t" + + "\022\017\n\007logLink\030\016 \001(\t\"N\n\024PingInformationProt" + + "o\022\014\n\004text\030\001 \001(\t\022\014\n\004verb\030\002 \001(\t\022\014\n\004body\030\003 " + + "\001(\t\022\014\n\004time\030\004 \001(\003\"\343\001\n\031NodeEntryInformati", + "onProto\022\020\n\010priority\030\001 \002(\005\022\021\n\trequested\030\002" + + " \002(\005\022\020\n\010starting\030\003 \002(\005\022\023\n\013startFailed\030\004 " + + "\002(\005\022\016\n\006failed\030\005 \002(\005\022\026\n\016failedRecently\030\006 " + + "\002(\005\022\021\n\tpreempted\030\007 \002(\005\022\014\n\004live\030\010 \002(\005\022\021\n\t" + + "releasing\030\t \002(\005\022\020\n\010lastUsed\030\n \002(\003\022\014\n\004nam" + + "e\030\013 \002(\t\"\334\001\n\024NodeInformationProto\022\020\n\010host" + + "name\030\001 \002(\t\022\r\n\005state\030\002 \002(\t\022\023\n\013httpAddress" + + "\030\003 \002(\t\022\020\n\010rackName\030\004 \002(\t\022\016\n\006labels\030\005 \002(\t" + + "\022\024\n\014healthReport\030\006 \002(\t\022\023\n\013lastUpdated\030\007 " + + "\002(\003\022A\n\007entries\030\010 \003(\01320.org.apache.slider", + ".api.NodeEntryInformationProto\"\026\n\024GetMod" + + "elRequestProto\"\035\n\033GetModelDesiredRequest" + + "Proto\"$\n\"GetModelDesiredAppconfRequestPr" + + "oto\"&\n$GetModelDesiredResourcesRequestPr" + + "oto\"%\n#GetModelResolvedAppconfRequestPro" + + "to\"\'\n%GetModelResolvedResourcesRequestPr" + + "oto\"#\n!GetModelLiveResourcesRequestProto" + + "\"\037\n\035GetLiveContainersRequestProto\"u\n\036Get" + + "LiveContainersResponseProto\022\r\n\005names\030\001 \003" + + "(\t\022D\n\ncontainers\030\002 \003(\01320.org.apache.slid", + "er.api.ContainerInformationProto\"3\n\034GetL" + + "iveContainerRequestProto\022\023\n\013containerId\030" + + "\001 \002(\t\"\037\n\035GetLiveComponentsRequestProto\"u" + + "\n\036GetLiveComponentsResponseProto\022\r\n\005name" + + "s\030\001 \003(\t\022D\n\ncomponents\030\002 \003(\01320.org.apache" + + ".slider.api.ComponentInformationProto\",\n" + + "\034GetLiveComponentRequestProto\022\014\n\004name\030\001 " + + "\002(\t\"$\n\"GetApplicationLivenessRequestProt" + + "o\"\023\n\021EmptyPayloadProto\" \n\020WrappedJsonPro" + + "to\022\014\n\004json\030\001 \002(\t\"h\n\037GetCertificateStoreR", + "equestProto\022\020\n\010hostname\030\001 \001(\t\022\023\n\013request" + + "erId\030\002 \002(\t\022\020\n\010password\030\003 \002(\t\022\014\n\004type\030\004 \002" + + "(\t\"1\n GetCertificateStoreResponseProto\022\r" + + "\n\005store\030\001 \002(\014\"\032\n\030GetLiveNodesRequestProt" + + "o\"W\n\031GetLiveNodesResponseProto\022:\n\005nodes\030" + + "\001 \003(\0132+.org.apache.slider.api.NodeInform" + + "ationProto\"\'\n\027GetLiveNodeRequestProto\022\014\n" + + "\004name\030\001 \002(\tB-\n\033org.apache.slider.api.pro" + + "toB\010Messages\210\001\001\240\001\001" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -34165,7 +34496,7 @@ public final class Messages { internal_static_org_apache_slider_api_RoleInstanceState_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_slider_api_RoleInstanceState_descriptor, - new java.lang.String[] { "Name", "Role", "State", "ExitCode", "Command", "Diagnostics", "Output", "Environment", "RoleId", "Released", "CreateTime", "StartTime", "Host", "HostURL", "AppVersion", }); + new java.lang.String[] { "Name", "Role", "State", "ExitCode", "Command", "Diagnostics", "Output", "Environment", "RoleId", "Released", "CreateTime", "StartTime", "Host", "HostURL", "AppVersion", "LogLink", }); internal_static_org_apache_slider_api_StopClusterRequestProto_descriptor = getDescriptor().getMessageTypes().get(1); internal_static_org_apache_slider_api_StopClusterRequestProto_fieldAccessorTable = new @@ -34315,7 +34646,7 @@ public final class Messages { internal_static_org_apache_slider_api_ContainerInformationProto_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_slider_api_ContainerInformationProto_descriptor, - new java.lang.String[] { "ContainerId", "Component", "Released", "State", "ExitCode", "Diagnostics", "CreateTime", "StartTime", "Output", "Host", "HostURL", "Placement", "AppVersion", }); + new java.lang.String[] { "ContainerId", "Component", "Released", "State", "ExitCode", "Diagnostics", "CreateTime", "StartTime", "Output", "Host", "HostURL", "Placement", "AppVersion", "LogLink", }); internal_static_org_apache_slider_api_PingInformationProto_descriptor = getDescriptor().getMessageTypes().get(26); internal_static_org_apache_slider_api_PingInformationProto_fieldAccessorTable = new http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9644fd34/slider-core/src/main/java/org/apache/slider/api/proto/RestTypeMarshalling.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/api/proto/RestTypeMarshalling.java b/slider-core/src/main/java/org/apache/slider/api/proto/RestTypeMarshalling.java index 17fd965..50e168e 100644 --- a/slider-core/src/main/java/org/apache/slider/api/proto/RestTypeMarshalling.java +++ b/slider-core/src/main/java/org/apache/slider/api/proto/RestTypeMarshalling.java @@ -33,12 +33,10 @@ import org.apache.slider.core.persist.ConfTreeSerDeser; import org.apache.slider.server.services.security.SecurityStore; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -235,6 +233,9 @@ public class RestTypeMarshalling { if (wire.hasDiagnostics()) { info.diagnostics = wire.getDiagnostics(); } + if (wire.hasLogLink()) { + info.logLink = wire.getLogLink(); + } if (wire.hasHost()) { info.host = wire.getHost(); } @@ -294,6 +295,12 @@ public class RestTypeMarshalling { } builder.setStartTime(info.startTime); builder.setState(info.state); + if (info.logLink != null) { + builder.setLogLink(info.logLink); + } + if (info.exitCode != null) { + builder.setExitCode(info.exitCode); + } return builder.build(); } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9644fd34/slider-core/src/main/java/org/apache/slider/api/types/ApplicationDiagnostics.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/api/types/ApplicationDiagnostics.java b/slider-core/src/main/java/org/apache/slider/api/types/ApplicationDiagnostics.java new file mode 100644 index 0000000..2b563e2 --- /dev/null +++ b/slider-core/src/main/java/org/apache/slider/api/types/ApplicationDiagnostics.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.slider.api.types; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.codehaus.jackson.JsonGenerationException; +import org.codehaus.jackson.JsonParseException; +import org.codehaus.jackson.annotate.JsonIgnore; +import org.codehaus.jackson.annotate.JsonIgnoreProperties; +import org.codehaus.jackson.map.JsonMappingException; +import org.codehaus.jackson.map.ObjectMapper; +import org.codehaus.jackson.map.SerializationConfig; +import org.codehaus.jackson.map.annotate.JsonSerialize; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@JsonIgnoreProperties(ignoreUnknown = true) +@JsonSerialize(include = JsonSerialize.Inclusion.NON_NULL) +public class ApplicationDiagnostics { + private static final Logger logger = LoggerFactory + .getLogger(ApplicationDiagnostics.class); + + @JsonIgnore + private Map<String, ContainerInformation> containersMap = new HashMap<>(); + private FinalApplicationStatus finalStatus; + private String finalMessage; + private Set<ContainerInformation> containers = new HashSet<>(); + + public Collection<ContainerInformation> getContainers() { + return Collections.unmodifiableCollection(containers); + } + + public ContainerInformation getContainer(String containerId) { + return containersMap.get(containerId); + } + + public void addContainer(ContainerInformation container) { + if (container == null) { + return; + } + containersMap.put(container.containerId, container); + containers.add(container); + } + + public FinalApplicationStatus getFinalStatus() { + return finalStatus; + } + + public void setFinalStatus(FinalApplicationStatus finalStatus) { + this.finalStatus = finalStatus; + } + + public String getFinalMessage() { + return finalMessage; + } + + public void setFinalMessage(String finalMessage) { + this.finalMessage = finalMessage; + } + + @Override + public String toString() { + try { + return toJsonString(); + } catch (Exception e) { + logger.debug("Failed to convert ApplicationDiagnostics to JSON ", e); + return super.toString(); + } + } + + public String toJsonString() + throws IOException, JsonGenerationException, JsonMappingException { + ObjectMapper mapper = new ObjectMapper(); + mapper.configure(SerializationConfig.Feature.INDENT_OUTPUT, true); + return mapper.writeValueAsString(this); + } + + public static ApplicationDiagnostics fromJson(String json) + throws IOException, JsonParseException, JsonMappingException { + ObjectMapper mapper = new ObjectMapper(); + try { + return mapper.readValue(json, ApplicationDiagnostics.class); + } catch (IOException e) { + logger.error("Exception while parsing json : " + e + "\n" + json, e); + throw e; + } + } + +} http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9644fd34/slider-core/src/main/java/org/apache/slider/api/types/ContainerInformation.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/api/types/ContainerInformation.java b/slider-core/src/main/java/org/apache/slider/api/types/ContainerInformation.java index 6991340..0f40498 100644 --- a/slider-core/src/main/java/org/apache/slider/api/types/ContainerInformation.java +++ b/slider-core/src/main/java/org/apache/slider/api/types/ContainerInformation.java @@ -47,6 +47,89 @@ public class ContainerInformation { * or the log cannot be picked up */ public String[] output; + public String logLink; + + public String getContainerId() { + return containerId; + } + + public String getComponent() { + return component; + } + + public String getAppVersion() { + return appVersion; + } + + public Boolean getReleased() { + return released; + } + + public int getState() { + return state; + } + + public Integer getExitCode() { + return exitCode; + } + + public String getDiagnostics() { + return diagnostics; + } + + public long getCreateTime() { + return createTime; + } + + public long getStartTime() { + return startTime; + } + + public String getHost() { + return host; + } + + public String getHostURL() { + return hostURL; + } + + public String getPlacement() { + return placement; + } + + public String[] getOutput() { + return output; + } + + public String getLogLink() { + return logLink; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + + ((containerId == null) ? 0 : containerId.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + ContainerInformation other = (ContainerInformation) obj; + if (containerId == null) { + if (other.containerId != null) + return false; + } else if (!containerId.equals(other.containerId)) + return false; + return true; + } @Override public String toString() { http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9644fd34/slider-core/src/main/java/org/apache/slider/client/SliderClient.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/client/SliderClient.java b/slider-core/src/main/java/org/apache/slider/client/SliderClient.java index 471110b..1ba32bb 100644 --- a/slider-core/src/main/java/org/apache/slider/client/SliderClient.java +++ b/slider-core/src/main/java/org/apache/slider/client/SliderClient.java @@ -71,6 +71,7 @@ import org.apache.slider.api.SliderClusterProtocol; import org.apache.slider.api.StateValues; import org.apache.slider.api.proto.Messages; import org.apache.slider.api.types.ContainerInformation; +import org.apache.slider.api.types.ApplicationDiagnostics; import org.apache.slider.api.types.NodeInformationList; import org.apache.slider.api.types.SliderInstanceDescription; import org.apache.slider.client.ipc.SliderApplicationIpcClient; @@ -151,7 +152,6 @@ import org.apache.slider.core.registry.YarnAppListClient; import org.apache.slider.core.registry.docstore.ConfigFormat; import org.apache.slider.core.registry.docstore.PublishedConfigSet; import org.apache.slider.core.registry.docstore.PublishedConfiguration; -import org.apache.slider.core.registry.docstore.PublishedConfigurationOutputter; import org.apache.slider.core.registry.docstore.PublishedExports; import org.apache.slider.core.registry.docstore.PublishedExportsOutputter; import org.apache.slider.core.registry.docstore.PublishedExportsSet; @@ -3142,6 +3142,8 @@ public class SliderClient extends AbstractSliderLaunchedService implements RunSe validateClusterName(clustername); String outfile = statusArgs.getOutput(); ClusterDescription status = getClusterDescription(clustername); + // no need to print diagnostics in status command + status.appDiagnostics = null; String text = status.toJsonString(); if (outfile == null) { log.info(text); @@ -3768,6 +3770,8 @@ public class SliderClient extends AbstractSliderLaunchedService implements RunSe actionDiagnosticAll(diagnosticArgs); } else if (diagnosticArgs.level) { actionDiagnosticIntelligent(diagnosticArgs); + } else if (diagnosticArgs.containers) { + printDiagnosticContainers(actionDiagnosticContainers(diagnosticArgs)); } else { // it's an unknown option log.info(CommonArgs.usage(serviceArgs, ACTION_DIAGNOSTICS)); @@ -3780,6 +3784,35 @@ public class SliderClient extends AbstractSliderLaunchedService implements RunSe return EXIT_SUCCESS; } + public ApplicationDiagnostics actionDiagnosticContainers( + ActionDiagnosticArgs diagnosticArgs) + throws YarnException, IOException, URISyntaxException { + String clusterName = diagnosticArgs.name; + requireArgumentSet(Arguments.ARG_NAME, clusterName); + return getApplicationDiagnostics(clusterName); + } + + private void printDiagnosticContainers(ApplicationDiagnostics appDiagnostics) { + if (appDiagnostics == null + || CollectionUtils.isEmpty(appDiagnostics.getContainers())) { + log.info("No application container diagnostics found yet"); + return; + } + log.info("Application container diagnostics:{}{}", + System.getProperty("line.separator"), appDiagnostics); + } + + private ApplicationDiagnostics getApplicationDiagnostics(String clusterName) + throws YarnException, IOException { + SliderClusterOperations clusterOperations = createClusterOperations( + clusterName); + // cluster not found exceptions will be thrown upstream + ClusterDescription clusterDescription = clusterOperations + .getClusterDescription(); + log.info("Slider AppMaster is accessible"); + return clusterDescription.appDiagnostics; + } + private void actionDiagnosticIntelligent(ActionDiagnosticArgs diagnosticArgs) throws YarnException, IOException, URISyntaxException { // not using member variable clustername because we want to place http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9644fd34/slider-core/src/main/java/org/apache/slider/common/params/ActionDiagnosticArgs.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/common/params/ActionDiagnosticArgs.java b/slider-core/src/main/java/org/apache/slider/common/params/ActionDiagnosticArgs.java index c891873..4e72cd9 100644 --- a/slider-core/src/main/java/org/apache/slider/common/params/ActionDiagnosticArgs.java +++ b/slider-core/src/main/java/org/apache/slider/common/params/ActionDiagnosticArgs.java @@ -62,6 +62,10 @@ public class ActionDiagnosticArgs extends AbstractActionArgs { description = "diagnose each slider configuration one by one") public boolean level; + @Parameter(names = {ARG_CONTAINERS}, + description = "container info diagnostics (including old/dead ones)") + public boolean containers; + /** * Get the min #of params expected * @return the min number of params in the {@link #parameters} field http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9644fd34/slider-core/src/main/java/org/apache/slider/providers/ProviderRole.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/providers/ProviderRole.java b/slider-core/src/main/java/org/apache/slider/providers/ProviderRole.java index 761ac0f..4f6be52 100644 --- a/slider-core/src/main/java/org/apache/slider/providers/ProviderRole.java +++ b/slider-core/src/main/java/org/apache/slider/providers/ProviderRole.java @@ -132,4 +132,4 @@ public final class ProviderRole { sb.append('}'); return sb.toString(); } -} +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9644fd34/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java index 1a27374..8232225 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java @@ -52,6 +52,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerState; @@ -89,8 +90,11 @@ import org.apache.slider.api.ClusterDescription; import org.apache.slider.api.InternalKeys; import org.apache.slider.api.ResourceKeys; import org.apache.slider.api.RoleKeys; +import org.apache.slider.api.StateValues; import org.apache.slider.api.StatusKeys; import org.apache.slider.api.proto.SliderClusterAPI; +import org.apache.slider.api.types.ApplicationDiagnostics; +import org.apache.slider.api.types.ContainerInformation; import org.apache.slider.client.SliderYarnClientImpl; import org.apache.slider.common.SliderExitCodes; import org.apache.slider.common.SliderKeys; @@ -1573,7 +1577,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService FinalApplicationStatus appStatus; log.info("Triggering shutdown of the AM: {}", stopAction); - String appMessage = stopAction.getMessage(); + String finalMessage = stopAction.getMessage(); //stop the daemon & grab its exit code int exitCode = stopAction.getExitCode(); Exception exception = stopAction.getEx(); @@ -1605,6 +1609,12 @@ public class SliderAppMaster extends AbstractSliderLaunchedService // signal to the RM log.info("Application completed. Signalling finish to RM"); + // Serialize the app diagnostics to app message for rich detailed + // diagnostics + ApplicationDiagnostics appDiagnostics = getApplicationDiagnostics(); + appDiagnostics.setFinalStatus(appStatus); + appDiagnostics.setFinalMessage(finalMessage); + String appMessage = appDiagnostics.toString(); try { log.info("Unregistering AM status={} message={}", appStatus, appMessage); asyncRMClient.unregisterApplicationMaster(appStatus, appMessage, null); @@ -2281,26 +2291,27 @@ public class SliderAppMaster extends AbstractSliderLaunchedService @Override // NMClientAsync.CallbackHandler public void onStartContainerError(ContainerId containerId, Throwable t) { - LOG_YARN.error("Failed to start Container {}", containerId, t); + LOG_YARN.error("Failed to start Container " + containerId, t); appState.onNodeManagerContainerStartFailed(containerId, t); } @Override // NMClientAsync.CallbackHandler public void onContainerStatusReceived(ContainerId containerId, ContainerStatus containerStatus) { - LOG_YARN.debug("Container Status: id={}, status={}", containerId, + LOG_YARN.info("Container Status: id={}, status={}", containerId, containerStatus); + appState.onContainerStatusReceived(containerId, containerStatus); } @Override // NMClientAsync.CallbackHandler public void onGetContainerStatusError( ContainerId containerId, Throwable t) { - LOG_YARN.error("Failed to query the status of Container {}", containerId); + LOG_YARN.error("Failed to query the status of Container " + containerId, t); } @Override // NMClientAsync.CallbackHandler public void onStopContainerError(ContainerId containerId, Throwable t) { - LOG_YARN.warn("Failed to stop Container {}", containerId); + LOG_YARN.error("Failed to stop Container " + containerId, t); } public AggregateConf getInstanceDefinition() { @@ -2314,6 +2325,14 @@ public class SliderAppMaster extends AbstractSliderLaunchedService return appState.getClusterStatus(); } + /** + * This is app level diagnostics with info for each and every container + * allocated for this application during its entire lifetime. + */ + public ApplicationDiagnostics getApplicationDiagnostics() { + return getClusterDescription().appDiagnostics; + } + public ProviderService getProviderService() { return providerService; } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9644fd34/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java index 19980aa..c5fd38c 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java @@ -25,7 +25,9 @@ import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.NodeId; @@ -42,9 +44,12 @@ import org.apache.slider.api.ClusterDescriptionOperations; import org.apache.slider.api.ClusterNode; import org.apache.slider.api.InternalKeys; import org.apache.slider.api.ResourceKeys; +import org.apache.slider.api.StateValues; import org.apache.slider.api.StatusKeys; import org.apache.slider.api.types.ApplicationLivenessInformation; import org.apache.slider.api.types.ComponentInformation; +import org.apache.slider.api.types.ContainerInformation; +import org.apache.slider.api.types.ApplicationDiagnostics; import org.apache.slider.api.types.RoleStatistics; import org.apache.slider.common.SliderExitCodes; import org.apache.slider.common.SliderKeys; @@ -285,7 +290,7 @@ public class AppState { private int failureThreshold = 10; private int nodeFailureThreshold = 3; - private String logServerURL = ""; + private static String logServerURL = ""; /** * Selector of containers to release; application wide. @@ -1427,6 +1432,18 @@ public class AppState { "Unknown role for node " + node); } getLiveContainers().put(node.getContainerId(), node); + + // Store container info for diagnostics + log.info("Initial diagnostics entry of container {}", container.getId()); + ContainerInformation ci = node.serialize(); + // Add the live container log link + if (ci != null) { + String logLink = AppState.getLiveLogsURLForContainer(container); + node.logLink = logLink; + ci.logLink = logLink; + } + getApplicationDiagnostics().addContainer(ci); + //tell role history roleHistory.onContainerStarted(container); } @@ -1492,18 +1509,23 @@ public class AppState { */ public synchronized void onNodeManagerContainerStartFailed(ContainerId containerId, Throwable thrown) { + String text; + if (null != thrown) { + text = SliderUtils.stringify(thrown); + } else { + text = "container start failure"; + } + // store container diagnostics on start error + storeContainerDiagnostics(containerId.toString(), + ContainerExitStatus.ABORTED, text, StateValues.STATE_INCOMPLETE, + getCompletedLogLink(containerId)); + removeOwnedContainer(containerId); incFailedCountainerCount(); incStartFailedCountainerCount(); RoleInstance instance = getStartingContainers().remove(containerId); if (null != instance) { RoleStatus roleStatus = lookupRoleStatus(instance.roleId); - String text; - if (null != thrown) { - text = SliderUtils.stringify(thrown); - } else { - text = "container start failure"; - } instance.diagnostics = text; roleStatus.noteFailed(true, text, ContainerOutcome.Failed); getFailedContainers().put(containerId, instance); @@ -1608,6 +1630,12 @@ public class AppState { NodeCompletionResult result = new NodeCompletionResult(); RoleInstance roleInstance; + // store container diagnostics on completion + storeContainerDiagnostics(containerId.toString(), status.getExitStatus(), + status.getDiagnostics(), + getContainerStateForDiagnostics(status.getState()), + getCompletedLogLink(containerId)); + int exitStatus = status.getExitStatus(); result.exitStatus = exitStatus; if (containersBeingReleased.containsKey(containerId)) { @@ -1728,7 +1756,7 @@ public class AppState { * @param c container * @return the URL or "" if it cannot be determined */ - protected String getLogsURLForContainer(Container c) { + public static String getLogsURLForContainer(Container c) { if (c==null) { return null; } @@ -1743,9 +1771,28 @@ public class AppState { completedLogsUrl = url + "/" + c.getNodeId() + "/" + c.getId() + "/ctx/" + user; } + log.info("Completed log link = {}", completedLogsUrl); return completedLogsUrl; } + public static String getLiveLogsURLForContainer(Container c) { + if (c == null) { + return null; + } + String user = null; + try { + user = SliderUtils.getCurrentUser().getShortUserName(); + } catch (IOException ignored) { + } + String liveLogsUrl = ""; + if (user != null) { + liveLogsUrl = "http://" + c.getNodeHttpAddress() + "/node/containerlogs/" + + c.getId() + "/" + user; + } + log.info("Live log link = {}", liveLogsUrl); + return liveLogsUrl; + } + /** * Return the percentage done that Slider is to have YARN display in its * Web UI @@ -2283,6 +2330,13 @@ public class AppState { ContainerId id = possible.getId(); if (!instance.released) { String url = getLogsURLForContainer(possible); + // Add the completed container log link (overwrites log link for live + // container) + ContainerInformation ci = getApplicationDiagnostics() + .getContainer(id.toString()); + if (ci != null) { + ci.logLink = url; + } log.info("Releasing container. Log: " + url); try { containerReleaseSubmitted(possible); @@ -2384,6 +2438,14 @@ public class AppState { } } + public void onContainerStatusReceived(ContainerId containerId, + ContainerStatus containerStatus) { + // store container diagnostics on status update + storeContainerDiagnostics(containerId.toString(), + containerStatus.getExitStatus(), containerStatus.getDiagnostics(), + getContainerStateForDiagnostics(containerStatus.getState()), null); + } + /** * Get diagnostics info about containers */ @@ -2464,6 +2526,60 @@ public class AppState { innerOnNodeManagerContainerStarted(cid); } + public ApplicationDiagnostics getApplicationDiagnostics() { + return clusterStatus.appDiagnostics; + } + + /** + * Store container diagnostics if container info is available. If diagnostics + * information for this container already existed, it will be overwritten. + * + * @param containerId id of the container + * @param exitCode exit code reason (of type {@link ContainerExitStatus}) + * @param diagnostics any textual message + * @param state final state of container (of type {@link StateValues}) + * @param logLink jobhistory link for a finished container or nodemanager link + * for a running one + */ + public void storeContainerDiagnostics(String containerId, int exitCode, + String diagnostics, int state, String logLink) { + ContainerInformation containerInfo = getApplicationDiagnostics() + .getContainer(containerId); + if (containerInfo != null) { + containerInfo.exitCode = exitCode; + containerInfo.diagnostics = diagnostics; + containerInfo.state = state; + if (logLink != null) { + containerInfo.logLink = logLink; + } + } + } + + private int getContainerStateForDiagnostics(ContainerState state) { + if (state == null) { + return StateValues.STATE_INCOMPLETE; + } + switch (state) { + case NEW: + return StateValues.STATE_CREATED; + case RUNNING: + return StateValues.STATE_LIVE; + case COMPLETE: + return StateValues.STATE_STOPPED; + default: + return StateValues.STATE_INCOMPLETE; + } + } + + private String getCompletedLogLink(ContainerId containerId) { + RoleInstance roleInstance = getLiveContainers().get(containerId); + String logLink = null; + if (roleInstance != null) { + logLink = AppState.getLogsURLForContainer(roleInstance.container); + } + return logLink; + } + @Override public String toString() { final StringBuilder sb = new StringBuilder("AppState{"); http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9644fd34/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleInstance.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleInstance.java index 30cfec9..015a8bf 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleInstance.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleInstance.java @@ -95,6 +95,11 @@ public final class RoleInstance implements Cloneable { public String[] output; /** + * Absolute path to log - if available + */ + public String logLink; + + /** * Any environment details */ public String[] environment; @@ -162,6 +167,7 @@ public final class RoleInstance implements Cloneable { sb.append(", command='").append(command).append('\''); sb.append(", diagnostics='").append(diagnostics).append('\''); sb.append(", output=").append(Arrays.toString(output)); + sb.append(", logLink=").append(logLink); sb.append(", environment=").append(Arrays.toString(environment)); sb.append('}'); return sb.toString(); @@ -197,6 +203,9 @@ public final class RoleInstance implements Cloneable { if (output != null) { builder.addAllOutput(Arrays.asList(output)); } + if (logLink != null) { + builder.setLogLink(logLink); + } if (role != null) { builder.setRole(role); } @@ -241,6 +250,7 @@ public final class RoleInstance implements Cloneable { if (output != null) { node.output = Arrays.copyOf(output, output.length); } + node.logLink = logLink; node.released = released; node.role = role; node.roleId = roleId; @@ -318,6 +328,7 @@ public final class RoleInstance implements Cloneable { if (output != null) { info.output = output; } + info.logLink = logLink; return info; } } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9644fd34/slider-core/src/main/proto/SliderClusterMessages.proto ---------------------------------------------------------------------- diff --git a/slider-core/src/main/proto/SliderClusterMessages.proto b/slider-core/src/main/proto/SliderClusterMessages.proto index b8bdc59..40da257 100644 --- a/slider-core/src/main/proto/SliderClusterMessages.proto +++ b/slider-core/src/main/proto/SliderClusterMessages.proto @@ -44,6 +44,7 @@ message RoleInstanceState { required string host = 14; required string hostURL = 15; optional string appVersion = 16; + optional string logLink = 17; } /** @@ -275,6 +276,7 @@ message ContainerInformationProto { optional string hostURL = 11; optional string placement = 12; optional string appVersion = 13; + optional string logLink = 14; } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9644fd34/slider-core/src/test/groovy/org/apache/slider/client/TestDiagnostics.groovy ---------------------------------------------------------------------- diff --git a/slider-core/src/test/groovy/org/apache/slider/client/TestDiagnostics.groovy b/slider-core/src/test/groovy/org/apache/slider/client/TestDiagnostics.groovy index 81c4daf..59e6d54 100644 --- a/slider-core/src/test/groovy/org/apache/slider/client/TestDiagnostics.groovy +++ b/slider-core/src/test/groovy/org/apache/slider/client/TestDiagnostics.groovy @@ -20,21 +20,34 @@ package org.apache.slider.client import groovy.util.logging.Slf4j import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.fs.FileUtil +import org.apache.hadoop.fs.Path +import org.apache.hadoop.fs.RawLocalFileSystem import org.apache.hadoop.security.KerberosDiags +import org.apache.hadoop.yarn.api.records.ApplicationReport +import org.apache.hadoop.yarn.api.records.YarnApplicationState import org.apache.hadoop.yarn.conf.YarnConfiguration import static org.apache.slider.common.Constants.SUN_SECURITY_KRB5_DEBUG +import org.apache.slider.agent.AgentMiniClusterTestBase +import org.apache.slider.common.SliderKeys import org.apache.slider.common.params.ActionDiagnosticArgs import org.apache.slider.common.params.Arguments import org.apache.slider.common.params.ClientArgs import org.apache.slider.common.params.SliderActions +import org.apache.slider.common.tools.SliderFileSystem import org.apache.slider.common.tools.SliderUtils +import org.apache.slider.core.exceptions.SliderException import org.apache.slider.core.main.ServiceLauncher +import org.apache.slider.providers.agent.AgentKeys import org.apache.slider.test.YarnZKMiniClusterTestBase import org.junit.Test @Slf4j -class TestDiagnostics extends YarnZKMiniClusterTestBase { +class TestDiagnostics extends AgentMiniClusterTestBase { + private static SliderFileSystem testFileSystem + private static String APP_NAME = "HBASE" + private static String APP_VERSION = "1.0.0" @Test public void testClientDiags() throws Throwable { @@ -118,4 +131,138 @@ class TestDiagnostics extends YarnZKMiniClusterTestBase { def output = sw.toString() assert output.contains(s) } + + @Test + public void testContainerDiagsNoAppContainer() throws Throwable { + super.setup() + describe("Create a live cluster then run the container diagnostics command") + createMiniCluster("testactiondiag", configuration, 1, true) + String clustername = "testdiagclusternoappcontainers" + + //launch the cluster + describe("Run an app with AM only and no app containers") + ServiceLauncher<SliderClient> launcher = createStandaloneAM( + clustername, + true, + false) + + SliderClient sliderClient = launcher.service + ApplicationReport report = waitForClusterLive(sliderClient) + + //now look for the explicit service + describe("Now running diagnostics command") + ActionDiagnosticArgs diagArgs = new ActionDiagnosticArgs() + diagArgs.name = clustername + diagArgs.containers = true + int status = sliderClient.actionDiagnostic(diagArgs) + assert 0 == status + + //now exec the status command + ServiceLauncher diagLauncher = launchClientAgainstMiniMR( + //config includes RM binding info + new YarnConfiguration(miniCluster.config), + //varargs list of command line params + [ + SliderActions.ACTION_DIAGNOSTICS, + Arguments.ARG_NAME, + clustername, + Arguments.ARG_CONTAINERS, + Arguments.ARG_MANAGER, RMAddr, + ] + + ) + assert diagLauncher.serviceExitCode == 0 + + } + + @Test + public void testContainerDiagsWithAppPackage() throws Throwable { + super.setup() + FileSystem fileSystem = new RawLocalFileSystem() + YarnConfiguration configuration = SliderUtils.createConfiguration() + fileSystem.setConf(configuration) + testFileSystem = new SliderFileSystem(fileSystem, configuration) + describe("Create a live cluster then run the container diagnostics command") + createMiniCluster("testactiondiag", configuration, 1, true) + + YarnConfiguration yarnConfig = new YarnConfiguration(configuration) + String clustername = "testdiagclusterwithappcontainers" + // get the default application.def file and install it as a package + String appDefPath = agentDefOptions.getAt(AgentKeys.APP_DEF) + File appDefFile = new File(new URI(appDefPath)) + YarnConfiguration conf = SliderUtils.createConfiguration() + ServiceLauncher<SliderClient> launcher = launch(TestSliderClient, + conf, + [ + ClientArgs.ACTION_PACKAGE, + ClientArgs.ARG_INSTALL, + ClientArgs.ARG_NAME, + APP_NAME, + ClientArgs.ARG_PACKAGE, + appDefFile.absolutePath, + ClientArgs.ARG_VERSION, + APP_VERSION, + ClientArgs.ARG_REPLACE_PKG + ]) + Path installedPath = new Path(testFileSystem.buildPackageDirPath(APP_NAME, + APP_VERSION), appDefFile.getName()) + File installedPackage = new File(installedPath.toUri().path) + assert installedPackage.exists() + describe("Installed app package to - " + installedPackage.toURI() + .toString()) + // overwrite the application.def property with the new installed path + agentDefOptions.putAt(AgentKeys.APP_DEF, installedPackage.toURI() + .toString()) + // add the app version + agentDefOptions.putAt(SliderKeys.APP_VERSION, APP_VERSION) + // start the app and AM + describe("Starting the app") + launcher = createStandaloneAM(clustername, true, false) + SliderClient sliderClient = launcher.service + addToTeardown(sliderClient) + waitForClusterLive(sliderClient) + + describe("Now running diagnostics command") + launcher = launchClientAgainstMiniMR( + //config includes RM binding info + yarnConfig, + //varargs list of command line params + [SliderActions.ACTION_DIAGNOSTICS, + Arguments.ARG_NAME, + clustername, + Arguments.ARG_CONTAINERS + ] + ) + + assert launcher.serviceExitCode == 0 + def client = launcher.service + def instances = client.enumSliderInstances(false, null, null) + assert instances.size() > 0 + def enumeratedInstance = instances[clustername] + assert enumeratedInstance != null + assert enumeratedInstance.applicationReport != null + assert enumeratedInstance.applicationReport.name == + clustername + assert enumeratedInstance.name == clustername + assert enumeratedInstance.path.toString().endsWith("/" + clustername) + assert enumeratedInstance.applicationReport.yarnApplicationState == + YarnApplicationState.RUNNING + instances = sliderClient.enumSliderInstances(true, + YarnApplicationState.RUNNING, YarnApplicationState.RUNNING) + assert instances[clustername] + + clusterActionFreeze(sliderClient, clustername, "stopping the cluster") + waitForAppToFinish(sliderClient) + } + + static class TestSliderClient extends SliderClient { + public TestSliderClient() { + super() + } + + @Override + protected void initHadoopBinding() throws IOException, SliderException { + sliderFileSystem = testFileSystem + } + } }