[
https://issues.apache.org/jira/browse/SUBMARINE-542?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17141692#comment-17141692
]
Wangda Tan commented on SUBMARINE-542:
--------------------------------------
Last 100 lines of the submarine server:
{code:java}
K8s submitter: parse Job object failed by ConflictK8s submitter: parse Job
object failed by Conflictio.kubernetes.client.ApiException: Conflict at
io.kubernetes.client.ApiClient.handleResponse(ApiClient.java:882) at
io.kubernetes.client.ApiClient.execute(ApiClient.java:798) at
io.kubernetes.client.apis.CustomObjectsApi.createNamespacedCustomObjectWithHttpInfo(CustomObjectsApi.java:349)
at
io.kubernetes.client.apis.CustomObjectsApi.createNamespacedCustomObject(CustomObjectsApi.java:330)
at
org.apache.submarine.server.submitter.k8s.K8sSubmitter.createExperiment(K8sSubmitter.java:102)
at
org.apache.submarine.server.experiment.ExperimentManager.createExperiment(ExperimentManager.java:88)
at
org.apache.submarine.server.rest.ExperimentRestApi.createExperiment(ExperimentRestApi.java:86)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498) at
org.glassfish.jersey.server.model.internal.ResourceMethodInvocationHandlerFactory.lambda$static$0(ResourceMethodInvocationHandlerFactory.java:76)
at
org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher$1.run(AbstractJavaResourceMethodDispatcher.java:148)
at
org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.invoke(AbstractJavaResourceMethodDispatcher.java:191)
at
org.glassfish.jersey.server.model.internal.JavaResourceMethodDispatcherProvider$ResponseOutInvoker.doDispatch(JavaResourceMethodDispatcherProvider.java:200)
at
org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.dispatch(AbstractJavaResourceMethodDispatcher.java:103)
at
org.glassfish.jersey.server.model.ResourceMethodInvoker.invoke(ResourceMethodInvoker.java:493)
at
org.glassfish.jersey.server.model.ResourceMethodInvoker.apply(ResourceMethodInvoker.java:415)
at
org.glassfish.jersey.server.model.ResourceMethodInvoker.apply(ResourceMethodInvoker.java:104)
at org.glassfish.jersey.server.ServerRuntime$1.run(ServerRuntime.java:277) at
org.glassfish.jersey.internal.Errors$1.call(Errors.java:272) at
org.glassfish.jersey.internal.Errors$1.call(Errors.java:268) at
org.glassfish.jersey.internal.Errors.process(Errors.java:316) at
org.glassfish.jersey.internal.Errors.process(Errors.java:298) at
org.glassfish.jersey.internal.Errors.process(Errors.java:268) at
org.glassfish.jersey.process.internal.RequestScope.runInScope(RequestScope.java:289)
at org.glassfish.jersey.server.ServerRuntime.process(ServerRuntime.java:256)
at
org.glassfish.jersey.server.ApplicationHandler.handle(ApplicationHandler.java:703)
at
org.glassfish.jersey.servlet.WebComponent.serviceImpl(WebComponent.java:416) at
org.glassfish.jersey.servlet.WebComponent.service(WebComponent.java:370) at
org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:389)
at
org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:342)
at
org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:229)
at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:873) at
org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:542) at
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:146)
at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at
org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:257)
at
org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1700)
at
org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:255)
at
org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1345)
at
org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:203)
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:480)
at
org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1667)
at
org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:201)
at
org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1247)
at
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:144)
at org.eclipse.jetty.server.handler.HandlerList.handle(HandlerList.java:61) at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at org.eclipse.jetty.server.Server.handle(Server.java:505) at
org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:370) at
org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:267) at
org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:305)
at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:103) at
org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:117) at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:333)
at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:310)
at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:168)
at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:126)
at
org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:366)
at
org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:698)
at
org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:804)
at java.lang.Thread.run(Thread.java:748)[INFO ] 2020-06-22 12:21:45,934
method:org.apache.submarine.server.submitter.k8s.K8sSubmitter.parseResponseObject(K8sSubmitter.java:168)Upstream
response JSON:
{"apiVersion":"kubeflow.org/v1","kind":"TFJob","metadata":{"creationTimestamp":"2020-06-22T04:21:38Z","generation":1.0,"name":"mnist-1","namespace":"default","resourceVersion":"5719","selfLink":"/apis/kubeflow.org/v1/namespaces/default/tfjobs/mnist-1","uid":"ddb2d259-b43f-11ea-b167-0242ac120002"},"spec":{"tfReplicaSpecs":{"Ps":{"replicas":1.0,"restartPolicy":"OnFailure","template":{"spec":{"containers":[{"command":["python","/var/tf_mnist/mnist_with_summaries.py","--log_dir\u003d/train/log","--learning_rate\u003d0.01","--batch_size\u003d150"],"env":[{"name":"ENV1","value":"ENV1"}],"image":"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0","name":"tensorflow","resources":{"limits":{"cpu":"1","memory":"1024M"}}}]}}},"Worker":{"replicas":1.0,"restartPolicy":"OnFailure","template":{"spec":{"containers":[{"command":["python","/var/tf_mnist/mnist_with_summaries.py","--log_dir\u003d/train/log","--learning_rate\u003d0.01","--batch_size\u003d150"],"env":[{"name":"ENV1","value":"ENV1"}],"image":"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0","name":"tensorflow","resources":{"limits":{"cpu":"1","memory":"1024M"}}}]}}}}},"status":{"conditions":[{"lastTransitionTime":"2020-06-22T04:21:38Z","lastUpdateTime":"2020-06-22T04:21:38Z","message":"TFJob
mnist-1 is
created.","reason":"TFJobCreated","status":"True","type":"Created"},{"lastTransitionTime":"2020-06-22T04:21:40Z","lastUpdateTime":"2020-06-22T04:21:40Z","message":"TFJob
mnist-1 is
running.","reason":"TFJobRunning","status":"True","type":"Running"}],"replicaStatuses":{"PS":{"active":1.0},"Worker":{"active":1.0}},"startTime":"2020-06-22T04:21:38Z"}}[WARN
] 2020-06-22 12:21:45,936
method:org.apache.submarine.server.response.DictAnnotation.parseDictAnnotation(DictAnnotation.java:180)Unsupported
parse class org.apache.submarine.server.api.experiment.Experiment Dict
Annotation![INFO ] 2020-06-22 12:22:04,638
method:org.apache.submarine.server.submitter.k8s.K8sSubmitter.parseResponseObject(K8sSubmitter.java:168)Upstream
response JSON:
{"apiVersion":"kubeflow.org/v1","kind":"TFJob","metadata":{"creationTimestamp":"2020-06-22T04:21:38Z","generation":1.0,"name":"mnist-1","namespace":"default","resourceVersion":"5719","selfLink":"/apis/kubeflow.org/v1/namespaces/default/tfjobs/mnist-1","uid":"ddb2d259-b43f-11ea-b167-0242ac120002"},"spec":{"tfReplicaSpecs":{"Ps":{"replicas":1.0,"restartPolicy":"OnFailure","template":{"spec":{"containers":[{"command":["python","/var/tf_mnist/mnist_with_summaries.py","--log_dir\u003d/train/log","--learning_rate\u003d0.01","--batch_size\u003d150"],"env":[{"name":"ENV1","value":"ENV1"}],"image":"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0","name":"tensorflow","resources":{"limits":{"cpu":"1","memory":"1024M"}}}]}}},"Worker":{"replicas":1.0,"restartPolicy":"OnFailure","template":{"spec":{"containers":[{"command":["python","/var/tf_mnist/mnist_with_summaries.py","--log_dir\u003d/train/log","--learning_rate\u003d0.01","--batch_size\u003d150"],"env":[{"name":"ENV1","value":"ENV1"}],"image":"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0","name":"tensorflow","resources":{"limits":{"cpu":"1","memory":"1024M"}}}]}}}}},"status":{"conditions":[{"lastTransitionTime":"2020-06-22T04:21:38Z","lastUpdateTime":"2020-06-22T04:21:38Z","message":"TFJob
mnist-1 is
created.","reason":"TFJobCreated","status":"True","type":"Created"},{"lastTransitionTime":"2020-06-22T04:21:40Z","lastUpdateTime":"2020-06-22T04:21:40Z","message":"TFJob
mnist-1 is
running.","reason":"TFJobRunning","status":"True","type":"Running"}],"replicaStatuses":{"PS":{"active":1.0},"Worker":{"active":1.0}},"startTime":"2020-06-22T04:21:38Z"}}[WARN
] 2020-06-22 12:22:04,640
method:org.apache.submarine.server.response.DictAnnotation.parseDictAnnotation(DictAnnotation.java:180)Unsupported
parse class org.apache.submarine.server.api.experiment.Experiment Dict
Annotation![INFO ] 2020-06-22 12:22:04,651
method:org.apache.submarine.server.submitter.k8s.K8sSubmitter.parseResponseObject(K8sSubmitter.java:168)Upstream
response JSON:
{"apiVersion":"kubeflow.org/v1","kind":"TFJob","metadata":{"creationTimestamp":"2020-06-22T04:21:38Z","generation":1.0,"name":"mnist-1","namespace":"default","resourceVersion":"5719","selfLink":"/apis/kubeflow.org/v1/namespaces/default/tfjobs/mnist-1","uid":"ddb2d259-b43f-11ea-b167-0242ac120002"},"spec":{"tfReplicaSpecs":{"Ps":{"replicas":1.0,"restartPolicy":"OnFailure","template":{"spec":{"containers":[{"command":["python","/var/tf_mnist/mnist_with_summaries.py","--log_dir\u003d/train/log","--learning_rate\u003d0.01","--batch_size\u003d150"],"env":[{"name":"ENV1","value":"ENV1"}],"image":"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0","name":"tensorflow","resources":{"limits":{"cpu":"1","memory":"1024M"}}}]}}},"Worker":{"replicas":1.0,"restartPolicy":"OnFailure","template":{"spec":{"containers":[{"command":["python","/var/tf_mnist/mnist_with_summaries.py","--log_dir\u003d/train/log","--learning_rate\u003d0.01","--batch_size\u003d150"],"env":[{"name":"ENV1","value":"ENV1"}],"image":"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0","name":"tensorflow","resources":{"limits":{"cpu":"1","memory":"1024M"}}}]}}}}},"status":{"conditions":[{"lastTransitionTime":"2020-06-22T04:21:38Z","lastUpdateTime":"2020-06-22T04:21:38Z","message":"TFJob
mnist-1 is
created.","reason":"TFJobCreated","status":"True","type":"Created"},{"lastTransitionTime":"2020-06-22T04:21:40Z","lastUpdateTime":"2020-06-22T04:21:40Z","message":"TFJob
mnist-1 is
running.","reason":"TFJobRunning","status":"True","type":"Running"}],"replicaStatuses":{"PS":{"active":1.0},"Worker":{"active":1.0}},"startTime":"2020-06-22T04:21:38Z"}}[ERROR]
2020-06-22 12:22:04,656
method:org.apache.submarine.server.submitter.k8s.K8sSubmitter.getExperimentLog(K8sSubmitter.java:225)Error
when listing pod for experiment:mnist-1[WARN ] 2020-06-22 12:22:04,656
method:org.apache.submarine.server.response.DictAnnotation.parseDictAnnotation(DictAnnotation.java:180)Unsupported
parse class org.apache.submarine.server.api.experiment.ExperimentLog Dict
Annotation![INFO ] 2020-06-22 12:23:22,476
method:org.apache.submarine.server.submitter.k8s.K8sSubmitter.parseResponseObject(K8sSubmitter.java:168)Upstream
response JSON:
{"apiVersion":"kubeflow.org/v1","kind":"TFJob","metadata":{"creationTimestamp":"2020-06-22T04:21:38Z","generation":1.0,"name":"mnist-1","namespace":"default","resourceVersion":"5719","selfLink":"/apis/kubeflow.org/v1/namespaces/default/tfjobs/mnist-1","uid":"ddb2d259-b43f-11ea-b167-0242ac120002"},"spec":{"tfReplicaSpecs":{"Ps":{"replicas":1.0,"restartPolicy":"OnFailure","template":{"spec":{"containers":[{"command":["python","/var/tf_mnist/mnist_with_summaries.py","--log_dir\u003d/train/log","--learning_rate\u003d0.01","--batch_size\u003d150"],"env":[{"name":"ENV1","value":"ENV1"}],"image":"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0","name":"tensorflow","resources":{"limits":{"cpu":"1","memory":"1024M"}}}]}}},"Worker":{"replicas":1.0,"restartPolicy":"OnFailure","template":{"spec":{"containers":[{"command":["python","/var/tf_mnist/mnist_with_summaries.py","--log_dir\u003d/train/log","--learning_rate\u003d0.01","--batch_size\u003d150"],"env":[{"name":"ENV1","value":"ENV1"}],"image":"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0","name":"tensorflow","resources":{"limits":{"cpu":"1","memory":"1024M"}}}]}}}}},"status":{"conditions":[{"lastTransitionTime":"2020-06-22T04:21:38Z","lastUpdateTime":"2020-06-22T04:21:38Z","message":"TFJob
mnist-1 is
created.","reason":"TFJobCreated","status":"True","type":"Created"},{"lastTransitionTime":"2020-06-22T04:21:40Z","lastUpdateTime":"2020-06-22T04:21:40Z","message":"TFJob
mnist-1 is
running.","reason":"TFJobRunning","status":"True","type":"Running"}],"replicaStatuses":{"PS":{"active":1.0},"Worker":{"active":1.0}},"startTime":"2020-06-22T04:21:38Z"}}[WARN
] 2020-06-22 12:23:22,478
method:org.apache.submarine.server.response.DictAnnotation.parseDictAnnotation(DictAnnotation.java:180)Unsupported
parse class org.apache.submarine.server.api.experiment.Experiment Dict
Annotation![INFO ] 2020-06-22 12:23:22,492
method:org.apache.submarine.server.submitter.k8s.K8sSubmitter.parseResponseObject(K8sSubmitter.java:168)Upstream
response JSON:
{"apiVersion":"kubeflow.org/v1","kind":"TFJob","metadata":{"creationTimestamp":"2020-06-22T04:21:38Z","generation":1.0,"name":"mnist-1","namespace":"default","resourceVersion":"5719","selfLink":"/apis/kubeflow.org/v1/namespaces/default/tfjobs/mnist-1","uid":"ddb2d259-b43f-11ea-b167-0242ac120002"},"spec":{"tfReplicaSpecs":{"Ps":{"replicas":1.0,"restartPolicy":"OnFailure","template":{"spec":{"containers":[{"command":["python","/var/tf_mnist/mnist_with_summaries.py","--log_dir\u003d/train/log","--learning_rate\u003d0.01","--batch_size\u003d150"],"env":[{"name":"ENV1","value":"ENV1"}],"image":"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0","name":"tensorflow","resources":{"limits":{"cpu":"1","memory":"1024M"}}}]}}},"Worker":{"replicas":1.0,"restartPolicy":"OnFailure","template":{"spec":{"containers":[{"command":["python","/var/tf_mnist/mnist_with_summaries.py","--log_dir\u003d/train/log","--learning_rate\u003d0.01","--batch_size\u003d150"],"env":[{"name":"ENV1","value":"ENV1"}],"image":"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0","name":"tensorflow","resources":{"limits":{"cpu":"1","memory":"1024M"}}}]}}}}},"status":{"conditions":[{"lastTransitionTime":"2020-06-22T04:21:38Z","lastUpdateTime":"2020-06-22T04:21:38Z","message":"TFJob
mnist-1 is
created.","reason":"TFJobCreated","status":"True","type":"Created"},{"lastTransitionTime":"2020-06-22T04:21:40Z","lastUpdateTime":"2020-06-22T04:21:40Z","message":"TFJob
mnist-1 is
running.","reason":"TFJobRunning","status":"True","type":"Running"}],"replicaStatuses":{"PS":{"active":1.0},"Worker":{"active":1.0}},"startTime":"2020-06-22T04:21:38Z"}}[ERROR]
2020-06-22 12:23:22,501
method:org.apache.submarine.server.submitter.k8s.K8sSubmitter.getExperimentLog(K8sSubmitter.java:225)Error
when listing pod for experiment:mnist-1[WARN ] 2020-06-22 12:23:22,501
method:org.apache.submarine.server.response.DictAnnotation.parseDictAnnotation(DictAnnotation.java:180)Unsupported
parse class org.apache.submarine.server.api.experiment.ExperimentLog Dict
Annotation![INFO ] 2020-06-22 12:26:48,742
method:org.apache.submarine.server.submitter.k8s.K8sSubmitter.parseResponseObject(K8sSubmitter.java:168)Upstream
response JSON:
{"apiVersion":"kubeflow.org/v1","kind":"TFJob","metadata":{"creationTimestamp":"2020-06-22T04:21:38Z","generation":1.0,"name":"mnist-1","namespace":"default","resourceVersion":"6086","selfLink":"/apis/kubeflow.org/v1/namespaces/default/tfjobs/mnist-1","uid":"ddb2d259-b43f-11ea-b167-0242ac120002"},"spec":{"tfReplicaSpecs":{"Ps":{"replicas":1.0,"restartPolicy":"OnFailure","template":{"spec":{"containers":[{"command":["python","/var/tf_mnist/mnist_with_summaries.py","--log_dir\u003d/train/log","--learning_rate\u003d0.01","--batch_size\u003d150"],"env":[{"name":"ENV1","value":"ENV1"}],"image":"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0","name":"tensorflow","resources":{"limits":{"cpu":"1","memory":"1024M"}}}]}}},"Worker":{"replicas":1.0,"restartPolicy":"OnFailure","template":{"spec":{"containers":[{"command":["python","/var/tf_mnist/mnist_with_summaries.py","--log_dir\u003d/train/log","--learning_rate\u003d0.01","--batch_size\u003d150"],"env":[{"name":"ENV1","value":"ENV1"}],"image":"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0","name":"tensorflow","resources":{"limits":{"cpu":"1","memory":"1024M"}}}]}}}}},"status":{"completionTime":"2020-06-22T04:24:08Z","conditions":[{"lastTransitionTime":"2020-06-22T04:21:38Z","lastUpdateTime":"2020-06-22T04:21:38Z","message":"TFJob
mnist-1 is
created.","reason":"TFJobCreated","status":"True","type":"Created"},{"lastTransitionTime":"2020-06-22T04:21:40Z","lastUpdateTime":"2020-06-22T04:21:40Z","message":"TFJob
mnist-1 is
running.","reason":"TFJobRunning","status":"False","type":"Running"},{"lastTransitionTime":"2020-06-22T04:24:08Z","lastUpdateTime":"2020-06-22T04:24:08Z","message":"TFJob
mnist-1 successfully
completed.","reason":"TFJobSucceeded","status":"True","type":"Succeeded"}],"replicaStatuses":{"PS":{"succeeded":1.0},"Worker":{"succeeded":1.0}},"startTime":"2020-06-22T04:21:38Z"}}[WARN
] 2020-06-22 12:26:48,744
method:org.apache.submarine.server.response.DictAnnotation.parseDictAnnotation(DictAnnotation.java:180)Unsupported
parse class org.apache.submarine.server.api.experiment.Experiment Dict
Annotation![INFO ] 2020-06-22 12:26:48,754
method:org.apache.submarine.server.submitter.k8s.K8sSubmitter.parseResponseObject(K8sSubmitter.java:168)Upstream
response JSON:
{"apiVersion":"kubeflow.org/v1","kind":"TFJob","metadata":{"creationTimestamp":"2020-06-22T04:21:38Z","generation":1.0,"name":"mnist-1","namespace":"default","resourceVersion":"6086","selfLink":"/apis/kubeflow.org/v1/namespaces/default/tfjobs/mnist-1","uid":"ddb2d259-b43f-11ea-b167-0242ac120002"},"spec":{"tfReplicaSpecs":{"Ps":{"replicas":1.0,"restartPolicy":"OnFailure","template":{"spec":{"containers":[{"command":["python","/var/tf_mnist/mnist_with_summaries.py","--log_dir\u003d/train/log","--learning_rate\u003d0.01","--batch_size\u003d150"],"env":[{"name":"ENV1","value":"ENV1"}],"image":"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0","name":"tensorflow","resources":{"limits":{"cpu":"1","memory":"1024M"}}}]}}},"Worker":{"replicas":1.0,"restartPolicy":"OnFailure","template":{"spec":{"containers":[{"command":["python","/var/tf_mnist/mnist_with_summaries.py","--log_dir\u003d/train/log","--learning_rate\u003d0.01","--batch_size\u003d150"],"env":[{"name":"ENV1","value":"ENV1"}],"image":"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0","name":"tensorflow","resources":{"limits":{"cpu":"1","memory":"1024M"}}}]}}}}},"status":{"completionTime":"2020-06-22T04:24:08Z","conditions":[{"lastTransitionTime":"2020-06-22T04:21:38Z","lastUpdateTime":"2020-06-22T04:21:38Z","message":"TFJob
mnist-1 is
created.","reason":"TFJobCreated","status":"True","type":"Created"},{"lastTransitionTime":"2020-06-22T04:21:40Z","lastUpdateTime":"2020-06-22T04:21:40Z","message":"TFJob
mnist-1 is
running.","reason":"TFJobRunning","status":"False","type":"Running"},{"lastTransitionTime":"2020-06-22T04:24:08Z","lastUpdateTime":"2020-06-22T04:24:08Z","message":"TFJob
mnist-1 successfully
completed.","reason":"TFJobSucceeded","status":"True","type":"Succeeded"}],"replicaStatuses":{"PS":{"succeeded":1.0},"Worker":{"succeeded":1.0}},"startTime":"2020-06-22T04:21:38Z"}}[ERROR]
2020-06-22 12:26:48,758
method:org.apache.submarine.server.submitter.k8s.K8sSubmitter.getExperimentLog(K8sSubmitter.java:225)Error
when listing pod for experiment:mnist-1[WARN ] 2020-06-22 12:26:48,758
method:org.apache.submarine.server.response.DictAnnotation.parseDictAnnotation(DictAnnotation.java:180)Unsupported
parse class org.apache.submarine.server.api.experiment.ExperimentLog Dict
Annotation! {code}
> [SDK] get_log error when experiment is not started
> --------------------------------------------------
>
> Key: SUBMARINE-542
> URL: https://issues.apache.org/jira/browse/SUBMARINE-542
> Project: Apache Submarine
> Issue Type: Bug
> Components: SDK
> Reporter: Kevin Su
> Assignee: Kevin Su
> Priority: Blocker
>
> >>> submarine_client.get_log(id)
> {code:java}
> IndexError Traceback (most recent call last)
> <ipython-input-13-8bf55eff97d1> in <module>
> ----> 1
> submarine_client.get_log(id)~/opt/anaconda3/envs/python3-7/lib/python3.7/site-packages/submarine/experiment/api/experiment_client.py
> in get_log(self, id, master)
> 124
> 125 if master is True:
> --> 126 log_contents = [log_contents[0]]
> 127
> 128 for log_content in log_contents:IndexError: list index out of
> range{code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]