LakshSingla commented on code in PR #14172:
URL: https://github.com/apache/druid/pull/14172#discussion_r1181392138
##########
extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/TaskStartTimeoutFault.java:
##########
@@ -80,21 +90,14 @@ public boolean equals(Object o)
return false;
}
TaskStartTimeoutFault that = (TaskStartTimeoutFault) o;
- return numTasks == that.numTasks && timeout == that.timeout;
+ return numTasksNotStarted == that.numTasksNotStarted && totalTasks ==
that.totalTasks && timeout == that.timeout;
}
@Override
public int hashCode()
{
- return Objects.hash(super.hashCode(), numTasks, timeout);
+ return Objects.hash(super.hashCode(), numTasksNotStarted, totalTasks,
timeout);
}
- @Override
Review Comment:
Any reason why this change is required?
##########
extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/TaskStartTimeoutFault.java:
##########
@@ -32,33 +32,43 @@ public class TaskStartTimeoutFault extends BaseMSQFault
{
static final String CODE = "TaskStartTimeout";
- private final int numTasks;
+ private final int numTasksNotStarted;
+ private final int totalTasks;
private final long timeout;
@JsonCreator
public TaskStartTimeoutFault(
- @JsonProperty("numTasks") int numTasks,
+ @JsonProperty("numTasksNotStarted") int numTasksNotStarted,
+ @JsonProperty("totalTasks") int totalTasks,
@JsonProperty("timeout") long timeout
)
{
super(
CODE,
- "Unable to launch [%d] worker tasks within [%,d] seconds. "
+ "Unable to launch [%d] workers out of the total [%d] worker tasks
within [%,d] seconds of the last successful worker launch."
+ "There might be insufficient available slots to start all worker
tasks simultaneously. "
+ "Try lowering '%s' in your query context to a number that fits
within your available task capacity, "
+ "or try increasing capacity.",
- numTasks,
+ numTasksNotStarted,
+ totalTasks,
TimeUnit.MILLISECONDS.toSeconds(timeout),
MultiStageQueryContext.CTX_MAX_NUM_TASKS
);
- this.numTasks = numTasks;
+ this.numTasksNotStarted = numTasksNotStarted;
+ this.totalTasks = totalTasks;
this.timeout = timeout;
}
@JsonProperty
- public int getNumTasks()
+ public int getNumTasksNotStarted()
Review Comment:
I think a better name might e `getPendingTasks`
##########
extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTaskLauncher.java:
##########
@@ -706,11 +715,14 @@ public boolean didFail()
return status != null && status.getStatusCode().isFailure();
}
+ /**
+ * The timeout is checked from the recentFullyStartedWorkerTimeInMs. If
it's more than maxTaskStartDelayMillis return true.
Review Comment:
nit: It would be better if we use Ms or Millis as the convention in a single
class
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]