This is an automated email from the ASF dual-hosted git repository.
snlee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 555a4a35ef append UUID to segment name in SegmentGenerationAndPushTask
(#10335)
555a4a35ef is described below
commit 555a4a35ef1cb9918f221e7b22ee3aed48d9886b
Author: Haitao Zhang <[email protected]>
AuthorDate: Mon Feb 27 19:47:08 2023 -0800
append UUID to segment name in SegmentGenerationAndPushTask (#10335)
---
.../SegmentGenerationAndPushTaskExecutor.java | 2 ++
.../SegmentGenerationAndPushTaskGenerator.java | 8 ++++++++
2 files changed, 10 insertions(+)
diff --git
a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskExecutor.java
b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskExecutor.java
index 6475d6f895..af49c6e2f1 100644
---
a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskExecutor.java
+++
b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskExecutor.java
@@ -321,6 +321,8 @@ public class SegmentGenerationAndPushTaskExecutor extends
BaseTaskExecutor {
segmentNameGeneratorSpec.setType(taskConfigs.get(BatchConfigProperties.SEGMENT_NAME_GENERATOR_TYPE));
segmentNameGeneratorSpec.setConfigs(IngestionConfigUtils.getConfigMapWithPrefix(taskConfigs,
BatchConfigProperties.SEGMENT_NAME_GENERATOR_PROP_PREFIX));
+
segmentNameGeneratorSpec.addConfig(SegmentGenerationTaskRunner.APPEND_UUID_TO_SEGMENT_NAME,
+
taskConfigs.getOrDefault(BatchConfigProperties.APPEND_UUID_TO_SEGMENT_NAME,
Boolean.toString(false)));
taskSpec.setSegmentNameGeneratorSpec(segmentNameGeneratorSpec);
taskSpec.setCustomProperty(BatchConfigProperties.INPUT_DATA_FILE_URI_KEY,
inputFileURI.toString());
diff --git
a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskGenerator.java
b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskGenerator.java
index e1bdd97cac..268f84456a 100644
---
a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskGenerator.java
+++
b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskGenerator.java
@@ -300,6 +300,14 @@ public class SegmentGenerationAndPushTaskGenerator extends
BaseTaskGenerator {
segmentName);
}
}
+ // The SEQUENCE_ID used to identify each segment is only unique to each
round of task generation. Across multiple
+ // rounds of task generation, the SEQUENCE_ID can be the same.
+ // This may lead to segment name collision, and existing segments will be
overridden. Since old segments are
+ // overridden, corresponding files become un-ingested, the generator will
generate new tasks, which generates
+ // segments with same names, and override existing segments again... It
becomes an endless loop
+ // We add uuid to segment name to avoid segment collision across multiple
rounds of task generation to solve the
+ // problem.
+
singleFileGenerationTaskConfig.put(BatchConfigProperties.APPEND_UUID_TO_SEGMENT_NAME,
Boolean.toString(true));
if ((outputDirURI == null) || (pushMode == null)) {
singleFileGenerationTaskConfig.put(BatchConfigProperties.PUSH_MODE,
DEFAULT_SEGMENT_PUSH_TYPE.toString());
} else {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]