This is an automated email from the ASF dual-hosted git repository.

snlee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 555a4a35ef append UUID to segment name in SegmentGenerationAndPushTask 
(#10335)
555a4a35ef is described below

commit 555a4a35ef1cb9918f221e7b22ee3aed48d9886b
Author: Haitao Zhang <[email protected]>
AuthorDate: Mon Feb 27 19:47:08 2023 -0800

    append UUID to segment name in SegmentGenerationAndPushTask (#10335)
---
 .../SegmentGenerationAndPushTaskExecutor.java                     | 2 ++
 .../SegmentGenerationAndPushTaskGenerator.java                    | 8 ++++++++
 2 files changed, 10 insertions(+)

diff --git 
a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskExecutor.java
 
b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskExecutor.java
index 6475d6f895..af49c6e2f1 100644
--- 
a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskExecutor.java
+++ 
b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskExecutor.java
@@ -321,6 +321,8 @@ public class SegmentGenerationAndPushTaskExecutor extends 
BaseTaskExecutor {
       
segmentNameGeneratorSpec.setType(taskConfigs.get(BatchConfigProperties.SEGMENT_NAME_GENERATOR_TYPE));
       
segmentNameGeneratorSpec.setConfigs(IngestionConfigUtils.getConfigMapWithPrefix(taskConfigs,
           BatchConfigProperties.SEGMENT_NAME_GENERATOR_PROP_PREFIX));
+      
segmentNameGeneratorSpec.addConfig(SegmentGenerationTaskRunner.APPEND_UUID_TO_SEGMENT_NAME,
+          
taskConfigs.getOrDefault(BatchConfigProperties.APPEND_UUID_TO_SEGMENT_NAME, 
Boolean.toString(false)));
       taskSpec.setSegmentNameGeneratorSpec(segmentNameGeneratorSpec);
       
taskSpec.setCustomProperty(BatchConfigProperties.INPUT_DATA_FILE_URI_KEY, 
inputFileURI.toString());
 
diff --git 
a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskGenerator.java
 
b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskGenerator.java
index e1bdd97cac..268f84456a 100644
--- 
a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskGenerator.java
+++ 
b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/segmentgenerationandpush/SegmentGenerationAndPushTaskGenerator.java
@@ -300,6 +300,14 @@ public class SegmentGenerationAndPushTaskGenerator extends 
BaseTaskGenerator {
             segmentName);
       }
     }
+    // The SEQUENCE_ID used to identify each segment is only unique to each 
round of task generation. Across multiple
+    // rounds of task generation, the SEQUENCE_ID can be the same.
+    // This may lead to segment name collision, and existing segments will be 
overridden. Since old segments are
+    // overridden, corresponding files become un-ingested, the generator will 
generate new tasks, which generates
+    // segments with same names, and override existing segments again... It 
becomes an endless loop
+    // We add uuid to segment name to avoid segment collision across multiple 
rounds of task generation to solve the
+    // problem.
+    
singleFileGenerationTaskConfig.put(BatchConfigProperties.APPEND_UUID_TO_SEGMENT_NAME,
 Boolean.toString(true));
     if ((outputDirURI == null) || (pushMode == null)) {
       singleFileGenerationTaskConfig.put(BatchConfigProperties.PUSH_MODE, 
DEFAULT_SEGMENT_PUSH_TYPE.toString());
     } else {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to