This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new c8c25b111d [HUDI-4888] Throw exception if COW table and consistent 
hashing bucket index (#7172)
c8c25b111d is described below

commit c8c25b111db03fc87fa24020f6b52fbb3af8315d
Author: Jon Vexler <[email protected]>
AuthorDate: Thu Nov 10 13:42:23 2022 -0500

    [HUDI-4888] Throw exception if COW table and consistent hashing bucket 
index (#7172)
    
    Co-authored-by: Jonathan Vexler <=>
---
 .../org/apache/hudi/config/HoodieIndexConfig.java  |  2 +-
 .../hudi/common/table/HoodieTableMetaClient.java   | 17 ++++++++++++
 .../org/apache/hudi/TestHoodieSparkSqlWriter.scala | 30 +++++++++++++++++++++-
 3 files changed, 47 insertions(+), 2 deletions(-)

diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java
index 3c8bc636ed..c250e07f33 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java
@@ -255,7 +255,7 @@ public class HoodieIndexConfig extends HoodieConfig {
       .withDocumentation("Type of bucket index engine to use. Default is 
SIMPLE bucket index, with fixed number of bucket."
           + "Possible options are [SIMPLE | CONSISTENT_HASHING]."
           + "Consistent hashing supports dynamic resizing of the number of 
bucket, solving potential data skew and file size "
-          + "issues of the SIMPLE hashing engine.");
+          + "issues of the SIMPLE hashing engine. Consistent hashing only 
works with MOR tables, only use simple hashing on COW tables.");
 
   /**
    * Bucket num equals file groups num in each partition.
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
 
b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
index 87f2410af4..4fbf7c53b7 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
@@ -115,6 +115,12 @@ public class HoodieTableMetaClient implements Serializable 
{
   private FileSystemRetryConfig fileSystemRetryConfig = 
FileSystemRetryConfig.newBuilder().build();
   protected HoodieMetastoreConfig metastoreConfig;
 
+  /**
+   *
+   * Instantiate HoodieTableMetaClient.
+   * Can only be called if table already exists
+   *
+   */
   protected HoodieTableMetaClient(Configuration conf, String basePath, boolean 
loadActiveTimelineOnLoad,
                                 ConsistencyGuardConfig consistencyGuardConfig, 
Option<TimelineLayoutVersion> layoutVersion,
                                 String payloadClassName, FileSystemRetryConfig 
fileSystemRetryConfig) {
@@ -412,6 +418,17 @@ public class HoodieTableMetaClient implements Serializable 
{
         throw new HoodieException("Only simple, non-partitioned or complex key 
generator are supported when meta-fields are disabled. Used: " + keyGenClass);
       }
     }
+
+    //Check to make sure it's not a COW table with consistent hashing bucket 
index
+    if (tableType == HoodieTableType.COPY_ON_WRITE) {
+      String indexType = properties.getProperty("hoodie.index.type");
+      if (indexType != null && indexType.equals("BUCKET")) {
+        String bucketEngine = 
properties.getProperty("hoodie.index.bucket.engine");
+        if (bucketEngine != null && bucketEngine.equals("CONSISTENT_HASHING")) 
{
+          throw new HoodieException("Consistent hashing bucket index does not 
work with COW table. Use simple bucket index or an MOR table.");
+        }
+      }
+    }
   }
 
   /**
diff --git 
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala
 
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala
index 2ce76b2bff..732d8d7ec0 100644
--- 
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala
+++ 
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala
@@ -24,7 +24,7 @@ import org.apache.hudi.client.SparkRDDWriteClient
 import org.apache.hudi.common.model._
 import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient, 
TableSchemaResolver}
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator
-import org.apache.hudi.config.{HoodieBootstrapConfig, HoodieWriteConfig}
+import org.apache.hudi.config.{HoodieBootstrapConfig, HoodieIndexConfig, 
HoodieWriteConfig}
 import org.apache.hudi.exception.HoodieException
 import org.apache.hudi.execution.bulkinsert.BulkInsertSortMode
 import org.apache.hudi.functional.TestBootstrap
@@ -1137,6 +1137,34 @@ class TestHoodieSparkSqlWriter {
     val kg2 = HoodieWriterUtils.getOriginKeyGenerator(m2)
     assertTrue(kg2 == classOf[SimpleKeyGenerator].getName)
   }
+
+  /**
+   *
+   * Test that you can't have consistent hashing bucket index on a COW table
+   * */
+  @Test
+  def testCOWConsistentHashing(): Unit = {
+    val _spark = spark
+    import _spark.implicits._
+    val df = Seq((1, "a1", 10, 1000, "2021-10-16")).toDF("id", "name", 
"value", "ts", "dt")
+    val options = Map(
+      DataSourceWriteOptions.RECORDKEY_FIELD.key -> "id",
+      DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "ts",
+      DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "dt",
+      HoodieIndexConfig.BUCKET_INDEX_ENGINE_TYPE.key -> "CONSISTENT_HASHING",
+      HoodieIndexConfig.INDEX_TYPE.key -> "BUCKET"
+    )
+
+    val (tableName1, tablePath1) = ("hoodie_test_params_1", s"$tempBasePath" + 
"_1")
+    val exc = intercept[HoodieException] {
+      df.write.format("hudi")
+        .options(options)
+        .option(HoodieWriteConfig.TBL_NAME.key, tableName1)
+        .option(HoodieWriteConfig.KEYGENERATOR_CLASS_NAME.key, 
classOf[NonpartitionedKeyGenerator].getName)
+        .mode(SaveMode.Overwrite).save(tablePath1)
+    }
+    assert(exc.getMessage.contains("Consistent hashing bucket index does not 
work with COW table. Use simple bucket index or an MOR table."))
+  }
 }
 
 object TestHoodieSparkSqlWriter {

Reply via email to