Re: [PR] [HUDI-7480] Fix functional index and avoid multiple initializations [hudi]

via GitHub Thu, 14 Mar 2024 03:14:08 -0700


codope commented on code in PR #10860:
URL: https://github.com/apache/hudi/pull/10860#discussion_r1524587040



##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java:
##########
@@ -1919,7 +1920,7 @@ public HoodieRecord next() {
   public static Schema 
getProjectedSchemaForFunctionalIndex(HoodieFunctionalIndexDefinition 
indexDefinition, HoodieTableMetaClient metaClient) throws Exception {
     TableSchemaResolver schemaResolver = new TableSchemaResolver(metaClient);
     Schema tableSchema = schemaResolver.getTableAvroSchema();
-    return HoodieAvroUtils.getSchemaForFields(tableSchema, 
indexDefinition.getSourceFields());
+    return addMetadataFields(getSchemaForFields(tableSchema, 
indexDefinition.getSourceFields()));

Review Comment:
   yeah for update we need record key field.



##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestFunctionalIndex.scala:
##########
@@ -253,6 +253,47 @@ class TestFunctionalIndex extends HoodieSparkSqlTestBase {
     }
   }
 
+  test("Test functional index update after initialization") {
+    withTempDir(tmp => {
+      val tableName = generateTableName
+      val basePath = s"${tmp.getCanonicalPath}/$tableName"
+      spark.sql(
+        s"""create table $tableName (
+            id int,
+            name string,
+            price double,
+            ts long
+            ) using hudi
+            options (
+            primaryKey ='id',
+            type = 'mor',
+            preCombineField = 'ts',
+            hoodie.metadata.record.index.enable = 'true',
+            hoodie.datasource.write.recordkey.field = 'id'
+            )
+            partitioned by(ts)
+            location '$basePath'""".stripMargin)
+      spark.sql(s"insert into $tableName values(1, 'a1', 10, 1000)")
+      spark.sql(s"insert into $tableName values(2, 'a2', 10, 1001)")
+      spark.sql(s"insert into $tableName values(3, 'a3', 10, 1002)")
+
+      checkAnswer(s"select id, name from $tableName where from_unixtime(ts, 
'yyyy-MM-dd') = '1970-01-01'")(
+        Seq(1, "a1"),
+        Seq(2, "a2"),
+        Seq(3, "a3")
+      )
+      // create functional index
+      val createIndexSql = s"create index idx_datestr on $tableName using 
column_stats(ts) options(func='from_unixtime', format='yyyy-MM-dd')"
+      spark.sql(createIndexSql)
+      // do another insert after initializing the index
+      spark.sql(s"insert into $tableName values(4, 'a4', 10, 10000000)")
+      // check query result
+      checkAnswer(s"select id, name from $tableName where from_unixtime(ts, 
'yyyy-MM-dd') = '1970-04-26'")(
+        Seq(4, "a4")
+      )

Review Comment:
   ack.. will do



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] [HUDI-7480] Fix functional index and avoid multiple initializations [hudi]

Reply via email to