[ 
https://issues.apache.org/jira/browse/YARN-10060?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

zhoukang updated YARN-10060:
----------------------------
    Attachment: YARN-10060.001.patch

> Historyserver may recover too slow since JobHistory init too slow when there 
> exist too many job
> -----------------------------------------------------------------------------------------------
>
>                 Key: YARN-10060
>                 URL: https://issues.apache.org/jira/browse/YARN-10060
>             Project: Hadoop YARN
>          Issue Type: Improvement
>          Components: yarn
>            Reporter: zhoukang
>            Assignee: zhoukang
>            Priority: Major
>         Attachments: YARN-10060.001.patch
>
>
> Like below it cost >7min to listen to the service port
> {code:java}
> 2019-12-24,20:01:37,272 INFO org.apache.zookeeper.ClientCnxn: EventThread 
> shut down
> 2019-12-24,20:01:47,354 INFO 
> org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager: Initializing Existing 
> Jobs...
> 2019-12-24,20:08:29,589 INFO org.apache.zookeeper.ClientCnxn: Opening socket 
> connection to server xxx. Will not attempt to authenticate using SASL 
> (unknown error)
> 2019-12-24,20:08:29,589 INFO org.apache.zookeeper.ClientCnxn: Socket 
> connection established to xxx, initiating session
> 2019-12-24,20:08:29,590 INFO org.apache.zookeeper.ClientCnxn: Session 
> establishment complete on server xxx, sessionid = 0x66d1a13e596ddc9, 
> negotiated timeout = 5000
> 2019-12-24,20:08:29,593 INFO org.apache.zookeeper.ZooKeeper: Session: 
> 0x66d1a13e596ddc9 closed
> 2019-12-24,20:08:29,593 INFO org.apache.zookeeper.ClientCnxn: EventThread 
> shut down
> 2019-12-24,20:08:29,655 INFO 
> org.apache.hadoop.mapreduce.v2.hs.CachedHistoryStorage: CachedHistoryStorage 
> Init
> 2019-12-24,20:08:29,681 INFO org.apache.hadoop.ipc.CallQueueManager: Using 
> callQueue class java.util.concurrent.LinkedBlockingQueue
> 2019-12-24,20:08:29,715 INFO org.apache.hadoop.ipc.CallQueueManager: Using 
> callQueue class java.util.concurrent.LinkedBlockingQueue
> 2019-12-24,20:08:29,800 INFO org.apache.hadoop.metrics2.impl.MetricsConfig: 
> loaded properties from hadoop-metrics2.properties
> 2019-12-24,20:08:29,943 INFO 
> org.apache.hadoop.metrics2.impl.MetricsSystemImpl: Scheduled snapshot period 
> at 10 second(s).
> 2019-12-24,20:08:29,943 INFO 
> org.apache.hadoop.metrics2.impl.MetricsSystemImpl: JobHistoryServer metrics 
> system started
> 2019-12-24,20:08:29,950 INFO 
> org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager:
>  Updating the current master key for generating delegation tokens
> 2019-12-24,20:08:29,951 INFO 
> org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager:
>  Starting expired delegation token remover thread, 
> tokenRemoverScanInterval=60 min(s)
> 2019-12-24,20:08:29,952 INFO 
> org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager:
>  Updating the current master key for generating delegation tokens
> 2019-12-24,20:08:30,015 INFO org.apache.hadoop.http.HttpRequestLog: Http 
> request log for http.requests.jobhistory is not defined
> 2019-12-24,20:08:30,025 INFO org.apache.hadoop.http.HttpServer2: Added global 
> filter 'safety' (class=org.apache.hadoop.http.HttpServer2$QuotingInputFilter)
> 2019-12-24,20:08:30,027 INFO org.apache.hadoop.http.HttpServer2: Added filter 
> static_user_filter 
> (class=org.apache.hadoop.http.lib.StaticUserWebFilter$StaticUserFilter) to 
> context jobhistory
> 2019-12-24,20:08:30,027 INFO org.apache.hadoop.http.HttpServer2: Added filter 
> static_user_filter 
> (class=org.apache.hadoop.http.lib.StaticUserWebFilter$StaticUserFilter) to 
> context static
> 2019-12-24,20:08:30,030 INFO org.apache.hadoop.http.HttpServer2: adding path 
> spec: /jobhistory/*
> 2019-12-24,20:08:30,030 INFO org.apache.hadoop.http.HttpServer2: adding path 
> spec: /ws/*
> 2019-12-24,20:08:30,057 INFO org.apache.hadoop.http.HttpServer2: Jetty bound 
> to port 20901
> 2019-12-24,20:08:30,939 INFO org.apache.hadoop.yarn.webapp.WebApps: Web app 
> /jobhistory started at 20901
> 2019-12-24,20:08:31,177 INFO org.apache.hadoop.yarn.webapp.WebApps: 
> Registered webapp guice modules
> 2019-12-24,20:08:31,187 INFO org.apache.hadoop.ipc.CallQueueManager: Using 
> callQueue class java.util.concurrent.LinkedBlockingQueue
> 2019-12-24,20:08:31,187 INFO org.apache.hadoop.ipc.CallQueueManager: Using 
> callQueue class java.util.concurrent.LinkedBlockingQueue
> 2019-12-24,20:08:31,189 INFO 
> org.apache.hadoop.yarn.factories.impl.pb.RpcServerFactoryPBImpl: Adding 
> protocol org.apache.hadoop.mapreduce.v2.api.HSClientProtocolPB to the server
> 2019-12-24,20:08:31,216 INFO 
> org.apache.hadoop.mapreduce.v2.hs.HistoryClientService: Instantiated 
> HistoryClientService at xxx
> 2019-12-24,20:08:31,344 INFO 
> org.apache.hadoop.yarn.logaggregation.AggregatedLogDeletionService: 
> aggregated log deletion started.
> 2019-12-24,20:08:31,690 INFO org.apache.zookeeper.ZooKeeper: Initiating 
> client connection, connectString=xxx sessionTimeout=5000 watcher=org
> {code}
> {code:java}
> protected void serviceInit(Configuration conf) throws Exception {
>     LOG.info("JobHistory Init");
>     this.conf = conf;
>     this.appID = ApplicationId.newInstance(0, 0);
>     this.appAttemptID = RecordFactoryProvider.getRecordFactory(conf)
>         .newRecordInstance(ApplicationAttemptId.class);
>     moveThreadInterval = conf.getLong(
>         JHAdminConfig.MR_HISTORY_MOVE_INTERVAL_MS,
>         JHAdminConfig.DEFAULT_MR_HISTORY_MOVE_INTERVAL_MS);
>     hsManager = createHistoryFileManager();
>     hsManager.init(conf);
>     try {
>       hsManager.initExisting();
>     } catch (IOException e) {
>       throw new YarnRuntimeException("Failed to initialize existing 
> directories", e);
>     }
>     storage = createHistoryStorage();
>     
>     if (storage instanceof Service) {
>       ((Service) storage).init(conf);
>     }
>     storage.setHistoryFileManager(hsManager);
>     super.serviceInit(conf);
>   }
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: yarn-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: yarn-issues-h...@hadoop.apache.org

Reply via email to