[GitHub] [hudi] wangxianghu commented on a change in pull request #1827: [HUDI-1089] Refactor hudi-client to support multi-engine

GitBox Tue, 08 Sep 2020 03:30:09 -0700


wangxianghu commented on a change in pull request #1827:
URL: https://github.com/apache/hudi/pull/1827#discussion_r484815402




##########
File path: 
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndex.java
##########
@@ -21,94 +21,52 @@
 import org.apache.hudi.ApiMaturityLevel;
 import org.apache.hudi.PublicAPIClass;
 import org.apache.hudi.PublicAPIMethod;
-import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.HoodieEngineContext;
 import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.ReflectionUtils;
-import org.apache.hudi.common.util.StringUtils;
-import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieIndexException;
-import org.apache.hudi.index.bloom.HoodieBloomIndex;
-import org.apache.hudi.index.bloom.HoodieGlobalBloomIndex;
-import org.apache.hudi.index.hbase.HBaseIndex;
-import org.apache.hudi.index.simple.HoodieGlobalSimpleIndex;
-import org.apache.hudi.index.simple.HoodieSimpleIndex;
 import org.apache.hudi.table.HoodieTable;
 
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-
 import java.io.Serializable;
 
 /**
  * Base class for different types of indexes to determine the mapping from 
uuid.
  */
 @PublicAPIClass(maturity = ApiMaturityLevel.EVOLVING)
-public abstract class HoodieIndex<T extends HoodieRecordPayload> implements 
Serializable {
+public abstract class HoodieIndex<T extends HoodieRecordPayload, I, K, O, P> 
implements Serializable {
 
   protected final HoodieWriteConfig config;
 
   protected HoodieIndex(HoodieWriteConfig config) {
     this.config = config;
   }
 
-  public static <T extends HoodieRecordPayload> HoodieIndex<T> createIndex(
-      HoodieWriteConfig config) throws HoodieIndexException {
-    // first use index class config to create index.
-    if (!StringUtils.isNullOrEmpty(config.getIndexClass())) {
-      Object instance = ReflectionUtils.loadClass(config.getIndexClass(), 
config);
-      if (!(instance instanceof HoodieIndex)) {
-        throw new HoodieIndexException(config.getIndexClass() + " is not a 
subclass of HoodieIndex");
-      }
-      return (HoodieIndex) instance;
-    }
-    switch (config.getIndexType()) {
-      case HBASE:
-        return new HBaseIndex<>(config);
-      case INMEMORY:
-        return new InMemoryHashIndex<>(config);
-      case BLOOM:
-        return new HoodieBloomIndex<>(config);
-      case GLOBAL_BLOOM:
-        return new HoodieGlobalBloomIndex<>(config);
-      case SIMPLE:
-        return new HoodieSimpleIndex<>(config);
-      case GLOBAL_SIMPLE:
-        return new HoodieGlobalSimpleIndex<>(config);
-      default:
-        throw new HoodieIndexException("Index type unspecified, set " + 
config.getIndexType());
-    }
-  }
-
   /**
    * Checks if the given [Keys] exists in the hoodie table and returns [Key, 
Option[partitionPath, fileID]] If the
    * optional is empty, then the key is not found.
    */
   @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
-  public abstract JavaPairRDD<HoodieKey, Option<Pair<String, String>>> 
fetchRecordLocation(
-      JavaRDD<HoodieKey> hoodieKeys, final JavaSparkContext jsc, 
HoodieTable<T> hoodieTable);
+  public abstract P fetchRecordLocation(
+      K hoodieKeys, final HoodieEngineContext context, HoodieTable<T, I, K, O, 
P> hoodieTable);
 
   /**
    * Looks up the index and tags each incoming record with a location of a 
file that contains the row (if it is actually
    * present).
    */
   @PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)

Review comment:
       > these annotations needs to moved over to a `SparkHoodieIndex` class? 
it will be hard for end developers to program against `HoodieIndex` directly 
anymore. This is a general point actually. The current public APIs should all 
be annotated against the Spark child classes. wdyt?
   
   good idea, done




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [hudi] wangxianghu commented on a change in pull request #1827: [HUDI-1089] Refactor hudi-client to support multi-engine

Reply via email to