[ 
https://issues.apache.org/jira/browse/YARN-8960?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16684788#comment-16684788
 ] 

Zac Zhou commented on YARN-8960:
--------------------------------

As discussion offline, we can use the same kerberos keytab parameter for both 
service and user.

Two parameters --keytab, --principal are added to the submarine job.

We can submit a submarine job like this:

./yarn jar 
/home/hadoop/hadoop-current/share/hadoop/yarn/hadoop-yarn-submarine-3.2.0-SNAPSHOT.jar
 job run \
 --env DOCKER_JAVA_HOME=/opt/java \
 --env DOCKER_HADOOP_HDFS_HOME=/hadoop-3.1.0 --name distributed-tf-gpu \
 --env YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_NETWORK=calico-network \
 --worker_docker_image 0.0.0.0:5000/gpu-cuda9.0-tf1.8.0-with-models \
 --input_path hdfs://mldev/tmp/cifar-10-data \
 --checkpoint_path hdfs://mldev/user/hadoop/tf-distributed-checkpoint \
 --num_ps 1 \
 --ps_resources memory=4G,vcores=2,gpu=0 \
 --ps_launch_cmd "python /test/cifar10_estimator/cifar10_main.py 
--data-dir=hdfs://mldev/tmp/cifar-10-data 
--job-dir=hdfs://mldev/tmp/cifar-10-jobdir --num-gpus=0" \
 --ps_docker_image 0.0.0.0:5000/dockerfile-cpu-tf1.8.0-with-models \
 --worker_resources memory=4G,vcores=2,gpu=1 --verbose \
 --num_workers 2 \
 --worker_launch_cmd "python /test/cifar10_estimator/cifar10_main.py 
--data-dir=hdfs://mldev/tmp/cifar-10-data 
--job-dir=hdfs://mldev/tmp/cifar-10-jobdir --train-steps=500 
--eval-batch-size=16 --train-batch-size=16 --sync --num-gpus=1" \
 --keytab /tmp/keytabs/hadoop.keytab \
 --principal hadoop/ad...@corp.com

 

> [Submarine] Can't get submarine service status using the command of "yarn app 
> -status" under security environment
> -----------------------------------------------------------------------------------------------------------------
>
>                 Key: YARN-8960
>                 URL: https://issues.apache.org/jira/browse/YARN-8960
>             Project: Hadoop YARN
>          Issue Type: Sub-task
>            Reporter: Zac Zhou
>            Assignee: Zac Zhou
>            Priority: Major
>         Attachments: YARN-8960.001.patch, YARN-8960.002.patch, 
> YARN-8960.003.patch
>
>
> After submitting a submarine job, we tried to get service status using the 
> following command:
> yarn app -status ${service_name}
> But we got the following error:
> HTTP error code : 500
>  
> The stack in resourcemanager log is :
> ERROR org.apache.hadoop.yarn.service.webapp.ApiServer: Get service failed: {}
> java.lang.reflect.UndeclaredThrowableException
>  at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1748)
>  at 
> org.apache.hadoop.yarn.service.webapp.ApiServer.getServiceFromClient(ApiServer.java:800)
>  at 
> org.apache.hadoop.yarn.service.webapp.ApiServer.getService(ApiServer.java:186)
>  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>  at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>  at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>  at java.lang.reflect.Method.invoke(Method.java:498)
>  at 
> com.sun.jersey.spi.container.JavaMethodInvokerFactory$1.invoke(JavaMethodInvokerFactory.java:60)
>  at 
> com.sun.jersey.server.impl.model.method.dispatch.AbstractResourceMethodDispatchProvider$ResponseOutInvoker
> ._dispatch(AbstractResourceMethodDispatchProvider.java:205)
>  at 
> com.sun.jersey.server.impl.model.method.dispatch.ResourceJavaMethodDispatcher.dispatch(ResourceJavaMethodD
> ispatcher.java:75)
>  at 
> com.sun.jersey.server.impl.uri.rules.HttpMethodRule.accept(HttpMethodRule.java:302)
>  at 
> com.sun.jersey.server.impl.uri.rules.RightHandPathRule.accept(RightHandPathRule.java:147)
>  at 
> com.sun.jersey.server.impl.uri.rules.ResourceClassRule.accept(ResourceClassRule.java:108)
>  at 
> com.sun.jersey.server.impl.uri.rules.RightHandPathRule.accept(RightHandPathRule.java:147)
>  at 
> com.sun.jersey.server.impl.uri.rules.RootResourceClassesRule.accept(RootResourceClassesRule.java:84)
>  at 
> com.sun.jersey.server.impl.application.WebApplicationImpl._handleRequest(WebApplicationImpl.java:1542)
>  at 
> com.sun.jersey.server.impl.application.WebApplicationImpl._handleRequest(WebApplicationImpl.java:1473)
>  at 
> com.sun.jersey.server.impl.application.WebApplicationImpl.handleRequest(WebApplicationImpl.java:1419)
>  at 
> com.sun.jersey.server.impl.application.WebApplicationImpl.handleRequest(WebApplicationImpl.java:1409)
>  at 
> com.sun.jersey.spi.container.servlet.WebComponent.service(WebComponent.java:409)
>  at 
> com.sun.jersey.spi.container.servlet.ServletContainer.service(ServletContainer.java:558)
>  at 
> com.sun.jersey.spi.container.servlet.ServletContainer.service(ServletContainer.java:733)
>  at javax.servlet.http.HttpServlet.service(HttpServlet.java:790)
>  at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:848)
>  at 
> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1772)
>  at 
> com.google.inject.servlet.FilterChainInvocation.doFilter(FilterChainInvocation.java:89)
>  at 
> com.sun.jersey.spi.container.servlet.ServletContainer.doFilter(ServletContainer.java:941)
>  at 
> com.sun.jersey.spi.container.servlet.ServletContainer.doFilter(ServletContainer.java:875)
>  at 
> org.apache.hadoop.yarn.server.resourcemanager.webapp.RMWebAppFilter.doFilter(RMWebAppFilter.java:179)
>  at 
> com.sun.jersey.spi.container.servlet.ServletContainer.doFilter(ServletContainer.java:829)
>  at 
> com.google.inject.servlet.FilterChainInvocation.doFilter(FilterChainInvocation.java:82)
>  at 
> com.google.inject.servlet.ManagedFilterPipeline.dispatch(ManagedFilterPipeline.java:119)
>  at com.google.inject.servlet.GuiceFilter$1.call(GuiceFilter.java:133)
>  at com.google.inject.servlet.GuiceFilter$1.call(GuiceFilter.java:130)
>  at com.google.inject.servlet.GuiceFilter$Context.call(GuiceFilter.java:203)
>  at com.google.inject.servlet.GuiceFilter.doFilter(GuiceFilter.java:130)
>  at 
> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759)
>  at 
> org.apache.hadoop.security.http.XFrameOptionsFilter.doFilter(XFrameOptionsFilter.java:57)
>  at 
> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759)
>  at 
> org.apache.hadoop.security.authentication.server.AuthenticationFilter.doFilter(AuthenticationFilter.java:6
> 44)
>  at 
> org.apache.hadoop.security.authentication.server.AuthenticationFilter.doFilter(AuthenticationFilter.java:5
> 92)
>  at 
> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759)
>  at 
> org.apache.hadoop.http.HttpServer2$QuotingInputFilter.doFilter(HttpServer2.java:1610)
>  at 
> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759)
>  at org.apache.hadoop.http.NoCacheFilter.doFilter(NoCacheFilter.java:45)
>  at 
> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759)
>  at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582)
>  at 
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
>  at 
> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
>  at 
> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226)
>  at 
> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180)
>  at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512)
>  at 
> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185)
>  at 
> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112)
>  at 
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
>  at 
> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119)
>  at 
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134)
>  at org.eclipse.jetty.server.Server.handle(Server.java:534)
>  at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320)
>  at 
> org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251)
>  at 
> org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:283)
>  at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:108)
>  at 
> org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93)
>  at 
> org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.j
> ava:303)
>  at 
> org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148
> )
>  at 
> org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.run(ExecuteProduceConsume.java:136)
>  at 
> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:671)
>  at 
> org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:589)
>  at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.hadoop.yarn.exceptions.YarnException: No principal 
> specified in the persisted service definitio
> n, fail to connect to AM.
>  at 
> org.apache.hadoop.yarn.service.client.ServiceClient.createAMProxy(ServiceClient.java:1500)
>  at 
> org.apache.hadoop.yarn.service.client.ServiceClient.getStatus(ServiceClient.java:1376)
>  at 
> org.apache.hadoop.yarn.service.webapp.ApiServer.lambda$getServiceFromClient$4(ApiServer.java:804)
>  at java.security.AccessController.doPrivileged(Native Method)
>  at javax.security.auth.Subject.doAs(Subject.java:422)
>  at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
>  ... 68 more



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: yarn-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: yarn-issues-h...@hadoop.apache.org

Reply via email to