?????? ??????????flink 1.7.2??????YARN Session??????????????Hadoop ????2.7.3??hdfs namenode??????ha????????????????????????????????????????????????????????HADOOP_HOME,YARN_CONF_DIR??HADOOP_CONF_DIR??HADOOP_CLASSPATH????flink_conf.yaml????????fs.hdfs.hadoopconf
2020-04-10 19:12:02,908 INFO org.apache.flink.runtime.jobmaster.JobMaster - Connecting to ResourceManager akka.tcp://flink@trusfortpoc1:23584/user/resourcemanager(00000000000000000000000000000000) 2020-04-10 19:12:02,909 INFO org.apache.flink.runtime.jobmaster.slotpool.SlotPool - Cannot serve slot request, no ResourceManager connected. Adding as pending request [SlotRequestId{0feacbb4fe16c8c7a70249f1396565d0}] 2020-04-10 19:12:02,911 INFO org.apache.flink.runtime.jobmaster.JobMaster - Resolved ResourceManager address, beginning registration 2020-04-10 19:12:02,911 INFO org.apache.flink.runtime.jobmaster.JobMaster - Registration at ResourceManager attempt 1 (timeout=100ms) 2020-04-10 19:12:02,912 INFO org.apache.flink.runtime.jobmaster.slotpool.SlotPool - Cannot serve slot request, no ResourceManager connected. Adding as pending request [SlotRequestId{35ad2384e9cd0efd30b43f5302db24b6}] 2020-04-10 19:12:02,913 INFO org.apache.flink.yarn.YarnResourceManager - Registering job manager 00000000000000000000000000000...@akka.tcp://flink@trusfortpoc1:23584/user/jobmanager_0 for job 24691b33c18d7ad73b1f52edb3d68ae4. 2020-04-10 19:12:02,917 INFO org.apache.flink.yarn.YarnResourceManager - Registered job manager 00000000000000000000000000000...@akka.tcp://flink@trusfortpoc1:23584/user/jobmanager_0 for job 24691b33c18d7ad73b1f52edb3d68ae4. 2020-04-10 19:12:02,919 INFO org.apache.flink.runtime.jobmaster.JobMaster - JobManager successfully registered at ResourceManager, leader id: 00000000000000000000000000000000. 2020-04-10 19:12:02,919 INFO org.apache.flink.runtime.jobmaster.slotpool.SlotPool - Requesting new slot [SlotRequestId{35ad2384e9cd0efd30b43f5302db24b6}] and profile ResourceProfile{cpuCores=-1.0, heapMemoryInMB=-1, directMemoryInMB=0, nativeMemoryInMB=0, networkMemoryInMB=0} from resource manager. 2020-04-10 19:12:02,920 INFO org.apache.flink.yarn.YarnResourceManager - Request slot with profile ResourceProfile{cpuCores=-1.0, heapMemoryInMB=-1, directMemoryInMB=0, nativeMemoryInMB=0, networkMemoryInMB=0} for job 24691b33c18d7ad73b1f52edb3d68ae4 with allocation id AllocationID{5a12237c7f2bd8b1cc760ddcbab5a1c0}. 2020-04-10 19:12:02,921 INFO org.apache.flink.runtime.jobmaster.slotpool.SlotPool - Requesting new slot [SlotRequestId{0feacbb4fe16c8c7a70249f1396565d0}] and profile ResourceProfile{cpuCores=-1.0, heapMemoryInMB=-1, directMemoryInMB=0, nativeMemoryInMB=0, networkMemoryInMB=0} from resource manager. 2020-04-10 19:12:02,924 INFO org.apache.flink.yarn.YarnResourceManager - Requesting new TaskExecutor container with resources <memory:4096, vCores:6>. Number pending requests 1. 2020-04-10 19:12:02,926 INFO org.apache.flink.yarn.YarnResourceManager - Request slot with profile ResourceProfile{cpuCores=-1.0, heapMemoryInMB=-1, directMemoryInMB=0, nativeMemoryInMB=0, networkMemoryInMB=0} for job 24691b33c18d7ad73b1f52edb3d68ae4 with allocation id AllocationID{37dd666a18040bf63ffbf2e022b2ea9b}. 2020-04-10 19:12:06,531 INFO org.apache.hadoop.yarn.client.api.impl.AMRMClientImpl - Received new token for : trusfortpoc3:35206 2020-04-10 19:12:06,543 INFO org.apache.flink.yarn.YarnResourceManager - Received new container: container_1586426824930_0006_01_000002 - Remaining pending container requests: 1 2020-04-10 19:12:06,543 INFO org.apache.flink.yarn.YarnResourceManager - Removing container request Capability[<memory:4096, vCores:6>]Priority[1]. Pending container requests 0. 2020-04-10 19:12:06,568 ERROR org.apache.flink.yarn.YarnResourceManager - Could not start TaskManager in container container_1586426824930_0006_01_000002. java.lang.IllegalArgumentException: java.net.UnknownHostException: hdfsClusterForML at org.apache.hadoop.security.SecurityUtil.buildTokenService(SecurityUtil.java:378) at org.apache.hadoop.hdfs.NameNodeProxies.createNonHAProxy(NameNodeProxies.java:320) at org.apache.hadoop.hdfs.NameNodeProxies.createProxy(NameNodeProxies.java:176) at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:687) at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:628) at org.apache.hadoop.hdfs.DistributedFileSystem.initialize(DistributedFileSystem.java:149) at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2667) at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:93) at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2701) at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2683) at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:372) at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295) at org.apache.flink.yarn.Utils.createTaskExecutorContext(Utils.java:453) at org.apache.flink.yarn.YarnResourceManager.createTaskExecutorLaunchContext(YarnResourceManager.java:555) at org.apache.flink.yarn.YarnResourceManager.lambda$onContainersAllocated$1(YarnResourceManager.java:390) at org.apache.flink.yarn.YarnResourceManager$$Lambda$183/1182651376.run(Unknown Source) at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRunAsync(AkkaRpcActor.java:332) at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:158) at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:70) at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.onReceive(AkkaRpcActor.java:142) at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.onReceive(FencedAkkaRpcActor.java:40) at akka.actor.UntypedActor$$anonfun$receive$1.applyOrElse(UntypedActor.scala:165) at akka.actor.Actor$class.aroundReceive(Actor.scala:502) at akka.actor.UntypedActor.aroundReceive(UntypedActor.scala:95) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:526) at akka.actor.ActorCell.invoke(ActorCell.scala:495) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:257) at akka.dispatch.Mailbox.run(Mailbox.scala:224) at akka.dispatch.Mailbox.exec(Mailbox.scala:234) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) Caused by: java.net.UnknownHostException: hdfsClusterForML ... 33 more ????hdfsClusterForML??namenode ha ??nameservice??????????????????hdfs-site.xml???????????? ??????????Hadoop??????????????????flink ??conf??????????????????????????YarnResourceManager???????????????????????? public YarnResourceManager( RpcService rpcService, String resourceManagerEndpointId, ResourceID resourceId, Configuration flinkConfig, Map<String, String> env, HighAvailabilityServices highAvailabilityServices, HeartbeatServices heartbeatServices, SlotManager slotManager, MetricRegistry metricRegistry, JobLeaderIdService jobLeaderIdService, ClusterInformation clusterInformation, FatalErrorHandler fatalErrorHandler, @Nullable String webInterfaceUrl, JobManagerMetricGroup jobManagerMetricGroup) { super( rpcService, resourceManagerEndpointId, resourceId, highAvailabilityServices, heartbeatServices, slotManager, metricRegistry, jobLeaderIdService, clusterInformation, fatalErrorHandler, jobManagerMetricGroup); this.flinkConfig = flinkConfig; this.yarnConfig = new YarnConfiguration(HadoopUtils.getHadoopConfiguration(flinkConfig)); ????????????????????????????????????????????????????????????????????????