This is an automated email from the ASF dual-hosted git repository.
karan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 2d563de8de9 By default dont allow index_hadoop tasks to run on a
cluster, forcing operators to acknowledge that they are using a deprecated
feature (#18239)
2d563de8de9 is described below
commit 2d563de8de9b03773e6960f716bac24addfd387f
Author: Lucas Capistrant <[email protected]>
AuthorDate: Fri Jul 18 11:04:12 2025 -0500
By default dont allow index_hadoop tasks to run on a cluster, forcing
operators to acknowledge that they are using a deprecated feature (#18239)
* By default dont allow index_hadoop tasks to run on a cluster, forcing
operators to acknolwedge that they are using a deprecated feature
* update unclear recommendation from log
* Fixup codeql warning
* fix UT
---
docs/configuration/index.md | 1 +
.../org/apache/druid/indexer/HadoopIndexTask.java | 16 +++++++++
.../apache/druid/indexer/HadoopIndexTaskTest.java | 42 ++++++++++++++++++++++
.../druid/indexing/common/config/TaskConfig.java | 24 ++++++++++---
.../indexing/common/config/TaskConfigBuilder.java | 10 +++++-
5 files changed, 88 insertions(+), 5 deletions(-)
diff --git a/docs/configuration/index.md b/docs/configuration/index.md
index 61d9e8e8322..0976190cf99 100644
--- a/docs/configuration/index.md
+++ b/docs/configuration/index.md
@@ -1414,6 +1414,7 @@ Additional Peon configs include:
|`druid.indexer.task.ignoreTimestampSpecForDruidInputSource`|If true, tasks
using the [Druid input source](../ingestion/input-sources.md) will ignore the
provided timestampSpec, and will use the `__time` column of the input
datasource. This option is provided for compatibility with ingestion specs
written before Druid 0.22.0.|false|
|`druid.indexer.task.storeEmptyColumns`|Boolean value for whether or not to
store empty columns during ingestion. When set to true, Druid stores every
column specified in the
[`dimensionsSpec`](../ingestion/ingestion-spec.md#dimensionsspec). If you use
the string-based schemaless ingestion and don't specify any dimensions to
ingest, you must also set
[`includeAllDimensions`](../ingestion/ingestion-spec.md#dimensionsspec) for
Druid to store empty columns.<br/><br/>If you set `storeEmptyCo [...]
|`druid.indexer.task.tmpStorageBytesPerTask`|Maximum number of bytes per task
to be used to store temporary files on disk. This config is generally intended
for internal usage. Attempts to set it are very likely to be overwritten by the
TaskRunner that executes the task, so be sure of what you expect to happen
before directly adjusting this configuration parameter. The config is
documented here primarily to provide an understanding of what it means if/when
someone sees that it has been s [...]
+|`druid.indexer.task.allowHadoopTaskExecution`|Conditional dictating if the
cluster allows `index_hadoop` tasks to be executed. `index_hadoop` is
deprecated, and defaulting to false will force cluster operators to acknowledge
the deprecation and consciously opt in to using index_hadoop with the
understanding that it will be removed in the future.|false|
|`druid.indexer.server.maxChatRequests`|Maximum number of concurrent requests
served by a task's chat handler. Set to 0 to disable limiting.|0|
If the Peon is running in remote mode, there must be an Overlord up and
running. Peons in remote mode can set the following configurations:
diff --git
a/indexing-hadoop/src/main/java/org/apache/druid/indexer/HadoopIndexTask.java
b/indexing-hadoop/src/main/java/org/apache/druid/indexer/HadoopIndexTask.java
index 6af5cfcfb1c..1fd9784a29a 100644
---
a/indexing-hadoop/src/main/java/org/apache/druid/indexer/HadoopIndexTask.java
+++
b/indexing-hadoop/src/main/java/org/apache/druid/indexer/HadoopIndexTask.java
@@ -295,6 +295,22 @@ public class HadoopIndexTask extends HadoopTask implements
ChatHandler
{
try {
taskConfig = toolbox.getConfig();
+ if (!taskConfig.isAllowHadoopTaskExecution()) {
+ errorMsg = StringUtils.format(
+ "Hadoop tasks are deprecated and will be removed in a future
release. "
+ + "Currently, they are not allowed to run on this cluster. If you
wish to run them despite deprecation, "
+ + "please set [%s] to true.",
+ TaskConfig.ALLOW_HADOOP_TASK_EXECUTION_KEY
+ );
+ log.error(errorMsg);
+ toolbox.getTaskReportFileWriter().write(getId(),
getTaskCompletionReports());
+ return TaskStatus.failure(getId(), errorMsg);
+ }
+ log.warn("Running deprecated index_hadoop task [%s]. "
+ + "Hadoop batch indexing is deprecated and will be removed in a
future release. "
+ + "Please plan your migration to one of Druid's supported
indexing patterns.",
+ getId()
+ );
if (chatHandlerProvider.isPresent()) {
log.info("Found chat handler of class[%s]",
chatHandlerProvider.get().getClass().getName());
chatHandlerProvider.get().register(getId(), this, false);
diff --git
a/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopIndexTaskTest.java
b/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopIndexTaskTest.java
index d3ecea88de8..a2726da91d6 100644
---
a/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopIndexTaskTest.java
+++
b/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopIndexTaskTest.java
@@ -23,6 +23,9 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.indexer.granularity.UniformGranularitySpec;
+import org.apache.druid.indexing.common.TaskToolbox;
+import org.apache.druid.indexing.common.config.TaskConfigBuilder;
+import org.apache.druid.indexing.overlord.TestTaskToolboxFactory;
import org.apache.druid.jackson.DefaultObjectMapper;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.granularity.Granularities;
@@ -41,6 +44,45 @@ public class HadoopIndexTaskTest
{
private final ObjectMapper jsonMapper = new DefaultObjectMapper();
+ @Test
+ public void testHadoopTaskWontRunWithDefaultTaskConfig()
+ {
+ final HadoopIndexTask task = new HadoopIndexTask(
+ null,
+ new HadoopIngestionSpec(
+ DataSchema.builder()
+ .withDataSource("foo")
+ .withGranularity(
+ new UniformGranularitySpec(
+ Granularities.DAY,
+ null,
+ ImmutableList.of(Intervals.of("2010-01-01/P1D"))
+ )
+ )
+ .withObjectMapper(jsonMapper)
+ .build(),
+ new HadoopIOConfig(ImmutableMap.of("paths", "bar"), null, null),
+ null
+ ),
+ null,
+ null,
+ "blah",
+ jsonMapper,
+ null,
+ AuthTestUtils.TEST_AUTHORIZER_MAPPER,
+ null,
+ new HadoopTaskConfig(null, null)
+ );
+
+ TestTaskToolboxFactory.Builder builder = new
TestTaskToolboxFactory.Builder().setConfig(new TaskConfigBuilder().build());
+ TaskToolbox toolbox = new TestTaskToolboxFactory(builder).build(task);
+
+ Assert.assertEquals("Hadoop tasks are deprecated and will be removed in a
future release. Currently, "
+ + "they are not allowed to run on this cluster. If you
wish to run them despite deprecation, "
+ + "please set
[druid.indexer.task.allowHadoopTaskExecution] to true.",
+ task.runTask(toolbox).getErrorMsg());
+ }
+
@Test
public void testCorrectInputSourceResources()
{
diff --git
a/indexing-service/src/main/java/org/apache/druid/indexing/common/config/TaskConfig.java
b/indexing-service/src/main/java/org/apache/druid/indexing/common/config/TaskConfig.java
index 9c88144259f..dc7afcb4ab4 100644
---
a/indexing-service/src/main/java/org/apache/druid/indexing/common/config/TaskConfig.java
+++
b/indexing-service/src/main/java/org/apache/druid/indexing/common/config/TaskConfig.java
@@ -41,6 +41,7 @@ import java.util.List;
*/
public class TaskConfig implements TaskDirectory
{
+ public static final String ALLOW_HADOOP_TASK_EXECUTION_KEY =
"druid.indexer.task.allowHadoopTaskExecution";
private static final Period DEFAULT_DIRECTORY_LOCK_TIMEOUT = new
Period("PT10M");
private static final Period DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT = new
Period("PT5M");
private static final boolean DEFAULT_STORE_EMPTY_COLUMNS = true;
@@ -76,6 +77,9 @@ public class TaskConfig implements TaskDirectory
@JsonProperty
private final long tmpStorageBytesPerTask;
+ @JsonProperty
+ private final boolean allowHadoopTaskExecution;
+
@JsonCreator
public TaskConfig(
@JsonProperty("baseDir") String baseDir,
@@ -87,7 +91,8 @@ public class TaskConfig implements TaskDirectory
@JsonProperty("ignoreTimestampSpecForDruidInputSource") boolean
ignoreTimestampSpecForDruidInputSource,
@JsonProperty("storeEmptyColumns") @Nullable Boolean storeEmptyColumns,
@JsonProperty("encapsulatedTask") boolean enableTaskLevelLogPush,
- @JsonProperty("tmpStorageBytesPerTask") @Nullable Long
tmpStorageBytesPerTask
+ @JsonProperty("tmpStorageBytesPerTask") @Nullable Long
tmpStorageBytesPerTask,
+ @JsonProperty("allowHadoopTaskExecution") boolean
allowHadoopTaskExecution
)
{
this.baseDir = Configs.valueOrDefault(baseDir,
System.getProperty("java.io.tmpdir"));
@@ -113,6 +118,7 @@ public class TaskConfig implements TaskDirectory
this.storeEmptyColumns = Configs.valueOrDefault(storeEmptyColumns,
DEFAULT_STORE_EMPTY_COLUMNS);
this.tmpStorageBytesPerTask =
Configs.valueOrDefault(tmpStorageBytesPerTask,
DEFAULT_TMP_STORAGE_BYTES_PER_TASK);
+ this.allowHadoopTaskExecution = allowHadoopTaskExecution;
}
private TaskConfig(
@@ -125,7 +131,8 @@ public class TaskConfig implements TaskDirectory
boolean ignoreTimestampSpecForDruidInputSource,
boolean storeEmptyColumns,
boolean encapsulatedTask,
- long tmpStorageBytesPerTask
+ long tmpStorageBytesPerTask,
+ boolean allowHadoopTaskExecution
)
{
this.baseDir = baseDir;
@@ -138,6 +145,7 @@ public class TaskConfig implements TaskDirectory
this.storeEmptyColumns = storeEmptyColumns;
this.encapsulatedTask = encapsulatedTask;
this.tmpStorageBytesPerTask = tmpStorageBytesPerTask;
+ this.allowHadoopTaskExecution = allowHadoopTaskExecution;
}
@JsonProperty
@@ -230,6 +238,12 @@ public class TaskConfig implements TaskDirectory
return tmpStorageBytesPerTask;
}
+ @JsonProperty
+ public boolean isAllowHadoopTaskExecution()
+ {
+ return allowHadoopTaskExecution;
+ }
+
private String defaultDir(@Nullable String configParameter, final String
defaultVal)
{
if (configParameter == null) {
@@ -251,7 +265,8 @@ public class TaskConfig implements TaskDirectory
ignoreTimestampSpecForDruidInputSource,
storeEmptyColumns,
encapsulatedTask,
- tmpStorageBytesPerTask
+ tmpStorageBytesPerTask,
+ allowHadoopTaskExecution
);
}
@@ -267,7 +282,8 @@ public class TaskConfig implements TaskDirectory
ignoreTimestampSpecForDruidInputSource,
storeEmptyColumns,
encapsulatedTask,
- tmpStorageBytesPerTask
+ tmpStorageBytesPerTask,
+ allowHadoopTaskExecution
);
}
}
diff --git
a/indexing-service/src/test/java/org/apache/druid/indexing/common/config/TaskConfigBuilder.java
b/indexing-service/src/test/java/org/apache/druid/indexing/common/config/TaskConfigBuilder.java
index ff8f86d8e7e..6ce4d044505 100644
---
a/indexing-service/src/test/java/org/apache/druid/indexing/common/config/TaskConfigBuilder.java
+++
b/indexing-service/src/test/java/org/apache/druid/indexing/common/config/TaskConfigBuilder.java
@@ -36,6 +36,7 @@ public class TaskConfigBuilder
private Boolean storeEmptyColumns;
private boolean enableTaskLevelLogPush;
private Long tmpStorageBytesPerTask;
+ private boolean allowHadoopTaskExecution;
public TaskConfigBuilder setBaseDir(String baseDir)
{
@@ -97,6 +98,12 @@ public class TaskConfigBuilder
return this;
}
+ public TaskConfigBuilder setAllowHadoopTaskExecution(boolean
allowHadoopTaskExecution)
+ {
+ this.allowHadoopTaskExecution = allowHadoopTaskExecution;
+ return this;
+ }
+
public TaskConfig build()
{
return new TaskConfig(
@@ -109,7 +116,8 @@ public class TaskConfigBuilder
ignoreTimestampSpecForDruidInputSource,
storeEmptyColumns,
enableTaskLevelLogPush,
- tmpStorageBytesPerTask
+ tmpStorageBytesPerTask,
+ allowHadoopTaskExecution
);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]