bvaradar commented on a change in pull request #1004: [HUDI-328] Adding delete 
api to HoodieWriteClient
URL: https://github.com/apache/incubator-hudi/pull/1004#discussion_r346443167
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/HoodieWriteClient.java
 ##########
 @@ -327,22 +325,35 @@ public static SparkConf registerClasses(SparkConf conf) {
   }
 
   /**
-   * Deletes a bunch of keys from the Hoodie table, at the supplied commitTime
+   * Deletes a list of {@link HoodieKey}s from the Hoodie table, at the 
supplied commitTime {@link HoodieKey}s will be
+   * deduped and non existant keys will be removed before deleting.
+   *
+   * @param keys {@link List} of {@link HoodieKey}s to be deleted
+   * @param commitTime Commit time handle
+   * @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and 
counts
    */
   public JavaRDD<WriteStatus> delete(JavaRDD<HoodieKey> keys, final String 
commitTime) {
     HoodieTable<T> table = getTableAndInitCtx();
     try {
       // De-dupe/merge if needed
       JavaRDD<HoodieKey> dedupedKeys =
-          combineKeysOnCondition(config.shouldCombineBeforeUpsert(), keys, 
config.getUpsertShuffleParallelism());
+          config.shouldCombineBeforeDelete() ? deduplicateKeys(keys, 
config.getDeleteShuffleParallelism()) : keys;
 
-      JavaRDD<HoodieRecord<T>> dedupedRecords = 
generateHoodieRecordsToDeleteFromKeys(dedupedKeys);
+      JavaRDD<HoodieRecord<T>> dedupedRecords =
+          dedupedKeys.map(key -> new HoodieRecord(key, new 
EmptyHoodieRecordPayload()));
       indexTimer = metrics.getIndexCtx();
       // perform index loop up to get existing location of records
       JavaRDD<HoodieRecord<T>> taggedRecords = 
index.tagLocation(dedupedRecords, jsc, table);
-      metrics.updateIndexMetrics("lookup", metrics.getDurationInMs(indexTimer 
== null ? 0L : indexTimer.stop()));
-      indexTimer = null;
-      return upsertRecordsInternal(taggedRecords, commitTime, table, true);
+      // filter out non existant keys/records
+      JavaRDD<HoodieRecord<T>> taggedValidRecords = 
taggedRecords.filter(record -> record.getCurrentLocation() != null);
+      if (!taggedValidRecords.isEmpty()) {
 
 Review comment:
   This is ok

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to