[ 
https://issues.apache.org/jira/browse/SPARK-47114?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

melin resolved SPARK-47114.
---------------------------
    Resolution: Resolved

> In the spark driver pod. Failed to access the krb5 file
> -------------------------------------------------------
>
>                 Key: SPARK-47114
>                 URL: https://issues.apache.org/jira/browse/SPARK-47114
>             Project: Spark
>          Issue Type: New Feature
>          Components: Kubernetes
>    Affects Versions: 3.4.1
>            Reporter: melin
>            Priority: Major
>
> spark runs in kubernetes and accesses an external hdfs cluster (kerberos),pod 
> error logs
> {code:java}
> Caused by: java.lang.IllegalArgumentException: KrbException: krb5.conf 
> loading failed{code}
> This error generally occurs when the krb5 file cannot be found
> [~yao] [~Qin Yao] 
> {code:java}
> ./bin/spark-submit \
>     --master k8s://https://172.18.5.44:6443 \
>     --deploy-mode cluster \
>     --name spark-pi \
>     --class org.apache.spark.examples.SparkPi \
>     --conf spark.executor.instances=1 \
>     --conf spark.kubernetes.submission.waitAppCompletion=true \
>     --conf spark.kubernetes.driver.pod.name=spark-xxxxxxx \
>     --conf spark.kubernetes.executor.podNamePrefix=spark-executor-xxxxxxx \
>     --conf spark.kubernetes.driver.label.profile=production \
>     --conf spark.kubernetes.executor.label.profile=production \
>     --conf spark.kubernetes.namespace=superior \
>     --conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \
>     --conf 
> spark.kubernetes.container.image=registry.cn-hangzhou.aliyuncs.com/melin1204/spark-jobserver:3.4.0
>  \
>     --conf 
> spark.kubernetes.file.upload.path=hdfs://cdh1:8020/user/superior/kubernetes/ \
>     --conf spark.kubernetes.container.image.pullPolicy=Always \
>     --conf spark.kubernetes.container.image.pullSecrets=docker-reg-demos \
>     --conf spark.kubernetes.kerberos.krb5.path=/etc/krb5.conf  \
>     --conf spark.kerberos.principal=superior/ad...@datacyber.com  \
>     --conf spark.kerberos.keytab=/root/superior.keytab  \
>     
> file:///root/spark-3.4.2-bin-hadoop3/examples/jars/spark-examples_2.12-3.4.2.jar
>   5{code}
> {code:java}
> (base) [root@cdh1 ~]# kubectl logs spark-xxxxxxx -n superior
> Exception in thread "main" java.lang.IllegalArgumentException: Can't get 
> Kerberos realm
>         at 
> org.apache.hadoop.security.HadoopKerberosName.setConfiguration(HadoopKerberosName.java:71)
>         at 
> org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:315)
>         at 
> org.apache.hadoop.security.UserGroupInformation.ensureInitialized(UserGroupInformation.java:300)
>         at 
> org.apache.hadoop.security.UserGroupInformation.isAuthenticationMethodEnabled(UserGroupInformation.java:395)
>         at 
> org.apache.hadoop.security.UserGroupInformation.isSecurityEnabled(UserGroupInformation.java:389)
>         at 
> org.apache.hadoop.security.UserGroupInformation.loginUserFromKeytab(UserGroupInformation.java:1119)
>         at 
> org.apache.spark.deploy.SparkSubmit.prepareSubmitEnvironment(SparkSubmit.scala:385)
>         at 
> org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:955)
>         at 
> org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:192)
>         at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:215)
>         at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:91)
>         at 
> org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1111)
>         at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1120)
>         at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> Caused by: java.lang.IllegalArgumentException: KrbException: krb5.conf 
> loading failed
>         at 
> java.security.jgss/javax.security.auth.kerberos.KerberosPrincipal.<init>(Unknown
>  Source)
>         at 
> org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:120)
>         at 
> org.apache.hadoop.security.HadoopKerberosName.setConfiguration(HadoopKerberosName.java:69)
>         ... 13 more
> (base) [root@cdh1 ~]# kubectl describe pod spark-xxxxxxx -n superior
> Name:             spark-xxxxxxx
> Namespace:        superior
> Priority:         0
> Service Account:  spark
> Node:             cdh2/172.18.5.45
> Start Time:       Wed, 21 Feb 2024 15:48:08 +0800
> Labels:           profile=production
>                   spark-app-name=spark-pi
>                   spark-app-selector=spark-728e24e49f9040fa86b04c521463020b
>                   spark-role=driver
>                   spark-version=3.4.2
> Annotations:      <none>
> Status:           Failed
> IP:               10.244.1.4
> IPs:
>   IP:  10.244.1.4
> Containers:
>   spark-kubernetes-driver:
>     Container ID:  
> containerd://cceaf13b70cc5f21a639e71cb8663989ec73e122380844624d4bfac3946bae15
>     Image:         spark:3.4.1
>     Image ID:      
> docker.io/library/spark@sha256:69fb485a0bcad88f9a2bf066e1b5d555f818126dc9df5a0b7e6a3b6d364bc694
>     Ports:         7078/TCP, 7079/TCP, 4040/TCP
>     Host Ports:    0/TCP, 0/TCP, 0/TCP
>     Args:
>       driver
>       --properties-file
>       /opt/spark/conf/spark.properties
>       --class
>       org.apache.spark.examples.SparkPi
>       spark-internal
>       5
>     State:          Terminated
>       Reason:       Error
>       Exit Code:    1
>       Started:      Wed, 21 Feb 2024 15:49:54 +0800
>       Finished:     Wed, 21 Feb 2024 15:49:56 +0800
>     Ready:          False
>     Restart Count:  0
>     Limits:
>       memory:  1408Mi
>     Requests:
>       cpu:     1
>       memory:  1408Mi
>     Environment:
>       SPARK_USER:                 superior
>       SPARK_APPLICATION_ID:       spark-728e24e49f9040fa86b04c521463020b
>       SPARK_DRIVER_BIND_ADDRESS:   (v1:status.podIP)
>       HADOOP_CONF_DIR:            /opt/hadoop/conf
>       SPARK_LOCAL_DIRS:           
> /var/data/spark-5e734880-8e00-4349-a88e-e6062ecee6f8
>       SPARK_CONF_DIR:             /opt/spark/conf
>     Mounts:
>       /etc/krb5.conf from krb5-file (rw,path="krb5.conf")
>       /mnt/secrets/kerberos-keytab from kerberos-keytab (rw)
>       /opt/hadoop/conf from hadoop-properties (rw)
>       /opt/spark/conf from spark-conf-volume-driver (rw)
>       /var/data/spark-5e734880-8e00-4349-a88e-e6062ecee6f8 from 
> spark-local-dir-1 (rw)
>       /var/run/secrets/kubernetes.io/serviceaccount from 
> kube-api-access-mn8dm (ro)
> Conditions:
>   Type              Status
>   Initialized       True 
>   Ready             False 
>   ContainersReady   False 
>   PodScheduled      True 
> Volumes:
>   hadoop-properties:
>     Type:      ConfigMap (a volume populated by a ConfigMap)
>     Name:      spark-pi-ea209a8dcaa2d678-hadoop-config
>     Optional:  false
>   krb5-file:
>     Type:      ConfigMap (a volume populated by a ConfigMap)
>     Name:      spark-pi-ea209a8dcaa2d678-krb5-file
>     Optional:  false
>   kerberos-keytab:
>     Type:        Secret (a volume populated by a Secret)
>     SecretName:  spark-pi-ea209a8dcaa2d678-kerberos-keytab
>     Optional:    false
>   spark-local-dir-1:
>     Type:       EmptyDir (a temporary directory that shares a pod's lifetime)
>     Medium:     
>     SizeLimit:  <unset>
>   spark-conf-volume-driver:
>     Type:      ConfigMap (a volume populated by a ConfigMap)
>     Name:      spark-drv-0a84c78dcaa2de11-conf-map
>     Optional:  false
>   kube-api-access-mn8dm:
>     Type:                    Projected (a volume that contains injected data 
> from multiple sources)
>     TokenExpirationSeconds:  3607
>     ConfigMapName:           kube-root-ca.crt
>     ConfigMapOptional:       <nil>
>     DownwardAPI:             true
> QoS Class:                   Burstable
> Node-Selectors:              <none>
> Tolerations:                 node.kubernetes.io/not-ready:NoExecute op=Exists 
> for 300s
>                              node.kubernetes.io/unreachable:NoExecute 
> op=Exists for 300s
> Events:
>   Type     Reason       Age    From               Message
>   ----     ------       ----   ----               -------
>   Normal   Scheduled    2m46s  default-scheduler  Successfully assigned 
> superior/spark-xxxxxxx to cdh2
>   Warning  FailedMount  2m46s  kubelet            MountVolume.SetUp failed 
> for volume "krb5-file" : configmap "spark-pi-ea209a8dcaa2d678-krb5-file" not 
> found
>   Warning  FailedMount  2m46s  kubelet            MountVolume.SetUp failed 
> for volume "hadoop-properties" : configmap 
> "spark-pi-ea209a8dcaa2d678-hadoop-config" not found
>   Warning  FailedMount  2m46s  kubelet            MountVolume.SetUp failed 
> for volume "kerberos-keytab" : secret 
> "spark-pi-ea209a8dcaa2d678-kerberos-keytab" not found
>   Warning  FailedMount  2m46s  kubelet            MountVolume.SetUp failed 
> for volume "spark-conf-volume-driver" : configmap 
> "spark-drv-0a84c78dcaa2de11-conf-map" not found
>   Normal   Pulling      2m45s  kubelet            Pulling image "spark:3.4.1"
>   Normal   Pulled       60s    kubelet            Successfully pulled image 
> "spark:3.4.1" in 1m44.871s (1m44.871s including waiting)
>   Normal   Created      60s    kubelet            Created container 
> spark-kubernetes-driver
>   Normal   Started      60s    kubelet            Started container 
> spark-kubernetes-driver{code}
>  
>  cm:  spark-pi-ea209a8dcaa2d678-kerberos-keytab not exists
> {code:java}
> (base) [root@cdh1 ~]# kubectl get cm -n superior
> NAME                                      DATA   AGE
> kube-root-ca.crt                          1      161m
> spark-drv-0a84c78dcaa2de11-conf-map       2      8m43s
> spark-pi-ea209a8dcaa2d678-hadoop-config   11     8m43s
> spark-pi-ea209a8dcaa2d678-krb5-file       1      8m43s {code}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to