wangxianghu commented on a change in pull request #1827:
URL: https://github.com/apache/hudi/pull/1827#discussion_r484940203



##########
File path: 
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java
##########
@@ -77,34 +81,44 @@
 import org.apache.parquet.hadoop.ParquetReader;
 import org.apache.parquet.hadoop.metadata.ParquetMetadata;
 import org.apache.parquet.schema.MessageType;
-import org.apache.spark.Partitioner;
+import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.time.Duration;
+import java.time.Instant;
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
 
-public class BootstrapCommitActionExecutor<T extends HoodieRecordPayload<T>>
-    extends BaseCommitActionExecutor<T, HoodieBootstrapWriteMetadata> {
+public class SparkBootstrapCommitActionExecutor<T extends HoodieRecordPayload>
+    extends BaseCommitActionExecutor<T, JavaRDD<HoodieRecord<T>>, 
JavaRDD<HoodieKey>, JavaRDD<WriteStatus>, JavaPairRDD<HoodieKey, 
Option<Pair<String, String>>>, HoodieBootstrapWriteMetadata> {
 
-  private static final Logger LOG = 
LogManager.getLogger(BootstrapCommitActionExecutor.class);
+  private static final Logger LOG = 
LogManager.getLogger(SparkBootstrapCommitActionExecutor.class);
   protected String bootstrapSchema = null;
   private transient FileSystem bootstrapSourceFileSystem;
 
-  public BootstrapCommitActionExecutor(JavaSparkContext jsc, HoodieWriteConfig 
config, HoodieTable<?> table,
-      Option<Map<String, String>> extraMetadata) {
-    super(jsc, new HoodieWriteConfig.Builder().withProps(config.getProps())
-        .withAutoCommit(true).withWriteStatusClass(BootstrapWriteStatus.class)
-        .withBulkInsertParallelism(config.getBootstrapParallelism())
-        .build(), table, HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS, 
WriteOperationType.BOOTSTRAP,
+  public SparkBootstrapCommitActionExecutor(HoodieSparkEngineContext context,
+                                            HoodieWriteConfig config,
+                                            HoodieTable<T, 
JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>, 
JavaPairRDD<HoodieKey, Option<Pair<String, String>>>> table,
+                                            Option<Map<String, String>> 
extraMetadata) {
+    super(context, new HoodieWriteConfig.Builder().withProps(config.getProps())
+            
.withAutoCommit(true).withWriteStatusClass(BootstrapWriteStatus.class)
+            .withBulkInsertParallelism(config.getBootstrapParallelism())
+            .build(), table, HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS, 
WriteOperationType.BOOTSTRAP,
         extraMetadata);
     bootstrapSourceFileSystem = 
FSUtils.getFs(config.getBootstrapSourceBasePath(), hadoopConf);
   }
 
+  @Override
+  public HoodieWriteMetadata<JavaRDD<WriteStatus>> 
execute(JavaRDD<HoodieRecord<T>> inputRecordsRDD) {

Review comment:
       > hmmm? why do we return null here
   
   `BootstrapCommitActionExecutor` dose not need this method actually, 
inherited from its parent class.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to