jihoonson commented on a change in pull request #7048: Make 
IngestSegmentFirehoseFactory splittable for parallel ingestion
URL: https://github.com/apache/incubator-druid/pull/7048#discussion_r264418002
 
 

 ##########
 File path: 
indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java
 ##########
 @@ -52,28 +56,38 @@
 import org.apache.druid.timeline.TimelineObjectHolder;
 import org.apache.druid.timeline.VersionedIntervalTimeline;
 import org.apache.druid.timeline.partition.PartitionChunk;
+import org.apache.druid.timeline.partition.PartitionHolder;
 import org.joda.time.Duration;
 import org.joda.time.Interval;
 
 import javax.annotation.Nullable;
 import java.io.File;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
+import java.util.stream.Stream;
 
-public class IngestSegmentFirehoseFactory implements 
FirehoseFactory<InputRowParser>
+public class IngestSegmentFirehoseFactory implements 
FiniteFirehoseFactory<InputRowParser, List<WindowedSegmentId>>
 {
   private static final EmittingLogger log = new 
EmittingLogger(IngestSegmentFirehoseFactory.class);
+  private static final long DEFAULT_MAX_INPUT_SEGMENT_BYTES_PER_TASK = 150 * 
1024 * 1024;
   private final String dataSource;
   private final Interval interval;
+  private final List<WindowedSegmentId> segmentIds;
   private final DimFilter dimFilter;
   private final List<String> dimensions;
   private final List<String> metrics;
+  private final long maxInputSegmentBytesPerTask;
+  private List<InputSplit<List<WindowedSegmentId>>> splits;
 
 Review comment:
   nit: it would be more clear to read codes if final variables and non-finals 
are separated.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to