jsun98 commented on a change in pull request #6431: Add Kinesis Indexing 
Service to core Druid
URL: https://github.com/apache/incubator-druid/pull/6431#discussion_r241908832
 
 

 ##########
 File path: 
extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisor.java
 ##########
 @@ -0,0 +1,507 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.kinesis.supervisor;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Joiner;
+import com.google.common.base.Optional;
+import com.google.common.collect.ImmutableMap;
+import org.apache.druid.indexing.common.stats.RowIngestionMetersFactory;
+import org.apache.druid.indexing.common.task.Task;
+import org.apache.druid.indexing.common.task.TaskResource;
+import org.apache.druid.indexing.kinesis.KinesisDataSourceMetadata;
+import org.apache.druid.indexing.kinesis.KinesisIOConfig;
+import org.apache.druid.indexing.kinesis.KinesisIndexTask;
+import org.apache.druid.indexing.kinesis.KinesisIndexTaskClientFactory;
+import org.apache.druid.indexing.kinesis.KinesisRecordSupplier;
+import org.apache.druid.indexing.kinesis.KinesisSequenceNumber;
+import org.apache.druid.indexing.kinesis.KinesisTuningConfig;
+import org.apache.druid.indexing.overlord.DataSourceMetadata;
+import org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator;
+import org.apache.druid.indexing.overlord.TaskMaster;
+import org.apache.druid.indexing.overlord.TaskStorage;
+import 
org.apache.druid.indexing.seekablestream.SeekableStreamDataSourceMetadata;
+import org.apache.druid.indexing.seekablestream.SeekableStreamIOConfig;
+import org.apache.druid.indexing.seekablestream.SeekableStreamIndexTask;
+import org.apache.druid.indexing.seekablestream.SeekableStreamPartitions;
+import org.apache.druid.indexing.seekablestream.SeekableStreamTuningConfig;
+import org.apache.druid.indexing.seekablestream.common.OrderedSequenceNumber;
+import org.apache.druid.indexing.seekablestream.common.RecordSupplier;
+import org.apache.druid.indexing.seekablestream.common.StreamPartition;
+import 
org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisor;
+import 
org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorIOConfig;
+import 
org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisorReportPayload;
+import org.apache.druid.java.util.common.StringUtils;
+import org.joda.time.DateTime;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeoutException;
+
+/**
+ * Supervisor responsible for managing the KinesisIndexTask for a single 
dataSource. At a high level, the class accepts a
+ * {@link KinesisSupervisorSpec} which includes the Kinesis stream and 
configuration as well as an ingestion spec which will
+ * be used to generate the indexing tasks. The run loop periodically refreshes 
its view of the Kinesis stream's partitions
+ * and the list of running indexing tasks and ensures that all partitions are 
being read from and that there are enough
+ * tasks to satisfy the desired number of replicas. As tasks complete, new 
tasks are queued to process the next range of
+ * Kinesis sequences.
+ * <p>
+ * the Kinesis supervisor does not yet support lag calculations
+ */
+public class KinesisSupervisor extends SeekableStreamSupervisor<String, String>
+{
+  private static final String NOT_SET = "-1";
+  private final KinesisSupervisorSpec spec;
+
+  public KinesisSupervisor(
+      final TaskStorage taskStorage,
+      final TaskMaster taskMaster,
+      final IndexerMetadataStorageCoordinator 
indexerMetadataStorageCoordinator,
+      final KinesisIndexTaskClientFactory taskClientFactory,
+      final ObjectMapper mapper,
+      final KinesisSupervisorSpec spec,
+      final RowIngestionMetersFactory rowIngestionMetersFactory
+  )
+  {
+    super(
+        StringUtils.format("KinesisSupervisor-%s", 
spec.getDataSchema().getDataSource()),
+        taskStorage,
+        taskMaster,
+        indexerMetadataStorageCoordinator,
+        taskClientFactory,
+        mapper,
+        spec,
+        rowIngestionMetersFactory,
+        true
+    );
+
+    this.spec = spec;
+  }
+
+  @Override
+  protected SeekableStreamIOConfig createIoConfig(
+      int groupId,
+      Map<String, String> startPartitions,
+      Map<String, String> endPartitions,
+      String baseSequenceName,
+      DateTime minimumMessageTime,
+      DateTime maximumMessageTime,
+      Set<String> exclusiveStartSequenceNumberPartitions,
+      SeekableStreamSupervisorIOConfig ioConfigg
+  )
+  {
+    KinesisSupervisorIOConfig ioConfig = (KinesisSupervisorIOConfig) ioConfigg;
+    return new KinesisIOConfig(
+        groupId,
+        baseSequenceName,
+        new SeekableStreamPartitions<>(ioConfig.getStream(), startPartitions),
+        new SeekableStreamPartitions<>(ioConfig.getStream(), endPartitions),
+        true,
+        true, // should pause after reading otherwise the task may complete 
early which will confuse the supervisor
+        minimumMessageTime,
+        maximumMessageTime,
+        ioConfig.getEndpoint(),
+        ioConfig.getRecordsPerFetch(),
+        ioConfig.getFetchDelayMillis(),
+        ioConfig.getAwsAccessKeyId(),
+        ioConfig.getAwsSecretAccessKey(),
+        exclusiveStartSequenceNumberPartitions,
+        ioConfig.getAwsAssumedRoleArn(),
+        ioConfig.getAwsExternalId(),
+        ioConfig.isDeaggregate()
+    );
+  }
+
+  @Override
+  protected List<SeekableStreamIndexTask<String, String>> createIndexTasks(
+      int replicas,
+      String baseSequenceName,
+      ObjectMapper sortingMapper,
+      TreeMap<Integer, Map<String, String>> sequenceOffsets,
+      SeekableStreamIOConfig taskIoConfig,
+      SeekableStreamTuningConfig taskTuningConfig,
+      RowIngestionMetersFactory rowIngestionMetersFactory
+  ) throws JsonProcessingException, NoSuchMethodException, 
IllegalAccessException, ClassNotFoundException
+  {
+    final String checkpoints = sortingMapper.writerFor(new 
TypeReference<TreeMap<Integer, Map<String, String>>>()
+    {
+    }).writeValueAsString(sequenceOffsets);
+    final Map<String, Object> context = spec.getContext() == null
+                                        ? ImmutableMap.of(
+        "checkpoints",
+        checkpoints,
+        IS_INCREMENTAL_HANDOFF_SUPPORTED,
+        true
+    ) : ImmutableMap.<String, Object>builder()
+                                            .put("checkpoints", checkpoints)
+                                            
.put(IS_INCREMENTAL_HANDOFF_SUPPORTED, true)
+                                            .putAll(spec.getContext())
+                                            .build();
+    List<SeekableStreamIndexTask<String, String>> taskList = new ArrayList<>();
+    for (int i = 0; i < replicas; i++) {
+      String taskId = Joiner.on("_").join(baseSequenceName, getRandomId());
+      taskList.add(new KinesisIndexTask(
+          taskId,
+          new TaskResource(baseSequenceName, 1),
+          spec.getDataSchema(),
+          (KinesisTuningConfig) taskTuningConfig,
+          (KinesisIOConfig) taskIoConfig,
+          context,
+          null,
+          null,
+          rowIngestionMetersFactory
+      ));
+    }
+    return taskList;
+  }
+
+
+  @Override
+  protected RecordSupplier<String, String> setupRecordSupplier()
+      throws IllegalAccessException, NoSuchMethodException, 
ClassNotFoundException
+  {
+    KinesisSupervisorIOConfig ioConfig = spec.getIoConfig();
+    KinesisTuningConfig taskTuningConfig = spec.getTuningConfig();
+
+    return new KinesisRecordSupplier(
+        KinesisRecordSupplier.getAmazonKinesisClient(
+            ioConfig.getEndpoint(),
+            ioConfig.getAwsAccessKeyId(),
+            ioConfig.getAwsSecretAccessKey(),
+            ioConfig.getAwsAssumedRoleArn(),
+            ioConfig.getAwsExternalId()
+        ),
+        ioConfig.getRecordsPerFetch(),
+        ioConfig.getFetchDelayMillis(),
+        1,
+        ioConfig.isDeaggregate(),
+        taskTuningConfig.getRecordBufferSize(),
+        taskTuningConfig.getRecordBufferOfferTimeout(),
+        taskTuningConfig.getRecordBufferFullWait(),
+        taskTuningConfig.getFetchSequenceNumberTimeout(),
+        taskTuningConfig.getMaxRecordsPerPoll()
+    );
+
+  }
+
+
+  @Override
+  protected void scheduleReporting(ScheduledExecutorService reportingExec)
+  {
+    // Implement this for Kinesis which uses approximate time from latest 
instead of offset lag
+/*
 
 Review comment:
   opened an issue for this 
https://github.com/apache/incubator-druid/issues/6739

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to