sv2000 commented on a change in pull request #2909: [GOBBLIN-1071] Retry task initialization URL: https://github.com/apache/incubator-gobblin/pull/2909#discussion_r388396992
########## File path: gobblin-cluster/src/main/java/org/apache/gobblin/cluster/GobblinHelixTask.java ########## @@ -97,28 +105,35 @@ public GobblinHelixTask(TaskRunnerSuiteBase.Builder builder, builder.getAppWorkPath(), this.jobId); - Config dynamicConfig = builder.getDynamicConfig() - .withValue(GobblinClusterConfigurationKeys.TASK_RUNNER_HOST_NAME_KEY, ConfigValueFactory.fromAnyRef(builder.getHostName())) - .withValue(GobblinClusterConfigurationKeys.CONTAINER_ID_KEY, ConfigValueFactory.fromAnyRef(builder.getContainerId())) - .withValue(GobblinClusterConfigurationKeys.HELIX_INSTANCE_NAME_KEY, ConfigValueFactory.fromAnyRef(builder.getInstanceName())) - .withValue(GobblinClusterConfigurationKeys.HELIX_JOB_ID_KEY, ConfigValueFactory.fromAnyRef(this.helixJobId)) - .withValue(GobblinClusterConfigurationKeys.HELIX_TASK_ID_KEY, ConfigValueFactory.fromAnyRef(this.helixTaskId)); - Integer partitionNum = getPartitionForHelixTask(taskDriver); - if (partitionNum == null) { throw new IllegalStateException(String.format("Task %s, job %s on instance %s has no partition assigned", this.helixTaskId, builder.getInstanceName(), this.helixJobId)); } - dynamicConfig = dynamicConfig.withValue(GobblinClusterConfigurationKeys.HELIX_PARTITION_ID_KEY, ConfigValueFactory.fromAnyRef(partitionNum)); - this.task = new SingleTask(this.jobId, - this.workUnitFilePath, - jobStateFilePath, - builder.getFs(), - taskAttemptBuilder, - stateStores, - dynamicConfig); + final Config taskLevelConfig = builder.getConfig() + .withValue(GobblinClusterConfigurationKeys.TASK_RUNNER_HOST_NAME_KEY, ConfigValueFactory.fromAnyRef(builder.getHostName())) + .withValue(GobblinClusterConfigurationKeys.CONTAINER_ID_KEY, ConfigValueFactory.fromAnyRef(builder.getContainerId())) + .withValue(GobblinClusterConfigurationKeys.HELIX_INSTANCE_NAME_KEY, ConfigValueFactory.fromAnyRef(builder.getInstanceName())) + .withValue(GobblinClusterConfigurationKeys.HELIX_JOB_ID_KEY, ConfigValueFactory.fromAnyRef(this.helixJobId)) + .withValue(GobblinClusterConfigurationKeys.HELIX_TASK_ID_KEY, ConfigValueFactory.fromAnyRef(this.helixTaskId)) + .withValue(GobblinClusterConfigurationKeys.HELIX_PARTITION_ID_KEY, ConfigValueFactory.fromAnyRef(partitionNum)); + + Retryer<SingleTask> retryer = RetryerFactory.newInstance(taskLevelConfig); Review comment: Check the retryer defaults. I think 5ms for a retry attempy may be too aggressive, particularly if we have connceting to state stores as part of the initialization. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services