[ https://issues.apache.org/jira/browse/YARN-10060?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
zhoukang updated YARN-10060: ---------------------------- Attachment: YARN-10060-001.patch > Historyserver may recover too slow since JobHistory init too slow when there > exist too many job > ----------------------------------------------------------------------------------------------- > > Key: YARN-10060 > URL: https://issues.apache.org/jira/browse/YARN-10060 > Project: Hadoop YARN > Issue Type: Improvement > Components: yarn > Reporter: zhoukang > Assignee: zhoukang > Priority: Major > Attachments: YARN-10060-001.patch > > > Like below it cost >7min to listen to the service port > {code:java} > 2019-12-24,20:01:37,272 INFO org.apache.zookeeper.ClientCnxn: EventThread > shut down > 2019-12-24,20:01:47,354 INFO > org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager: Initializing Existing > Jobs... > 2019-12-24,20:08:29,589 INFO org.apache.zookeeper.ClientCnxn: Opening socket > connection to server xxx. Will not attempt to authenticate using SASL > (unknown error) > 2019-12-24,20:08:29,589 INFO org.apache.zookeeper.ClientCnxn: Socket > connection established to xxx, initiating session > 2019-12-24,20:08:29,590 INFO org.apache.zookeeper.ClientCnxn: Session > establishment complete on server xxx, sessionid = 0x66d1a13e596ddc9, > negotiated timeout = 5000 > 2019-12-24,20:08:29,593 INFO org.apache.zookeeper.ZooKeeper: Session: > 0x66d1a13e596ddc9 closed > 2019-12-24,20:08:29,593 INFO org.apache.zookeeper.ClientCnxn: EventThread > shut down > 2019-12-24,20:08:29,655 INFO > org.apache.hadoop.mapreduce.v2.hs.CachedHistoryStorage: CachedHistoryStorage > Init > 2019-12-24,20:08:29,681 INFO org.apache.hadoop.ipc.CallQueueManager: Using > callQueue class java.util.concurrent.LinkedBlockingQueue > 2019-12-24,20:08:29,715 INFO org.apache.hadoop.ipc.CallQueueManager: Using > callQueue class java.util.concurrent.LinkedBlockingQueue > 2019-12-24,20:08:29,800 INFO org.apache.hadoop.metrics2.impl.MetricsConfig: > loaded properties from hadoop-metrics2.properties > 2019-12-24,20:08:29,943 INFO > org.apache.hadoop.metrics2.impl.MetricsSystemImpl: Scheduled snapshot period > at 10 second(s). > 2019-12-24,20:08:29,943 INFO > org.apache.hadoop.metrics2.impl.MetricsSystemImpl: JobHistoryServer metrics > system started > 2019-12-24,20:08:29,950 INFO > org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager: > Updating the current master key for generating delegation tokens > 2019-12-24,20:08:29,951 INFO > org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager: > Starting expired delegation token remover thread, > tokenRemoverScanInterval=60 min(s) > 2019-12-24,20:08:29,952 INFO > org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager: > Updating the current master key for generating delegation tokens > 2019-12-24,20:08:30,015 INFO org.apache.hadoop.http.HttpRequestLog: Http > request log for http.requests.jobhistory is not defined > 2019-12-24,20:08:30,025 INFO org.apache.hadoop.http.HttpServer2: Added global > filter 'safety' (class=org.apache.hadoop.http.HttpServer2$QuotingInputFilter) > 2019-12-24,20:08:30,027 INFO org.apache.hadoop.http.HttpServer2: Added filter > static_user_filter > (class=org.apache.hadoop.http.lib.StaticUserWebFilter$StaticUserFilter) to > context jobhistory > 2019-12-24,20:08:30,027 INFO org.apache.hadoop.http.HttpServer2: Added filter > static_user_filter > (class=org.apache.hadoop.http.lib.StaticUserWebFilter$StaticUserFilter) to > context static > 2019-12-24,20:08:30,030 INFO org.apache.hadoop.http.HttpServer2: adding path > spec: /jobhistory/* > 2019-12-24,20:08:30,030 INFO org.apache.hadoop.http.HttpServer2: adding path > spec: /ws/* > 2019-12-24,20:08:30,057 INFO org.apache.hadoop.http.HttpServer2: Jetty bound > to port 20901 > 2019-12-24,20:08:30,939 INFO org.apache.hadoop.yarn.webapp.WebApps: Web app > /jobhistory started at 20901 > 2019-12-24,20:08:31,177 INFO org.apache.hadoop.yarn.webapp.WebApps: > Registered webapp guice modules > 2019-12-24,20:08:31,187 INFO org.apache.hadoop.ipc.CallQueueManager: Using > callQueue class java.util.concurrent.LinkedBlockingQueue > 2019-12-24,20:08:31,187 INFO org.apache.hadoop.ipc.CallQueueManager: Using > callQueue class java.util.concurrent.LinkedBlockingQueue > 2019-12-24,20:08:31,189 INFO > org.apache.hadoop.yarn.factories.impl.pb.RpcServerFactoryPBImpl: Adding > protocol org.apache.hadoop.mapreduce.v2.api.HSClientProtocolPB to the server > 2019-12-24,20:08:31,216 INFO > org.apache.hadoop.mapreduce.v2.hs.HistoryClientService: Instantiated > HistoryClientService at xxx > 2019-12-24,20:08:31,344 INFO > org.apache.hadoop.yarn.logaggregation.AggregatedLogDeletionService: > aggregated log deletion started. > 2019-12-24,20:08:31,690 INFO org.apache.zookeeper.ZooKeeper: Initiating > client connection, connectString=xxx sessionTimeout=5000 watcher=org > {code} > {code:java} > protected void serviceInit(Configuration conf) throws Exception { > LOG.info("JobHistory Init"); > this.conf = conf; > this.appID = ApplicationId.newInstance(0, 0); > this.appAttemptID = RecordFactoryProvider.getRecordFactory(conf) > .newRecordInstance(ApplicationAttemptId.class); > moveThreadInterval = conf.getLong( > JHAdminConfig.MR_HISTORY_MOVE_INTERVAL_MS, > JHAdminConfig.DEFAULT_MR_HISTORY_MOVE_INTERVAL_MS); > hsManager = createHistoryFileManager(); > hsManager.init(conf); > try { > hsManager.initExisting(); > } catch (IOException e) { > throw new YarnRuntimeException("Failed to initialize existing > directories", e); > } > storage = createHistoryStorage(); > > if (storage instanceof Service) { > ((Service) storage).init(conf); > } > storage.setHistoryFileManager(hsManager); > super.serviceInit(conf); > } > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: yarn-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: yarn-issues-h...@hadoop.apache.org