[
https://issues.apache.org/jira/browse/HUDI-9680?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Vamsi Karnika reassigned HUDI-9680:
-----------------------------------
Assignee: Y Ethan Guo
> Failure to Create Secondary Index in Hudi 1.0.2
> -----------------------------------------------
>
> Key: HUDI-9680
> URL: https://issues.apache.org/jira/browse/HUDI-9680
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: Vamsi Karnika
> Assignee: Y Ethan Guo
> Priority: Major
>
> Creating secondary index is failing with `secondary_index is not supported`
> in 1.0.2
> Steps to reproduce
> spark.sql("DROP TABLE IF EXISTS hudi_test_sec_index")
> spark.sql("""
> CREATE TABLE hudi_test_sec_index (
> id STRING,
> Name STRING,
> Age INT
> )
> USING hudi
> OPTIONS (
> primaryKey = 'id',
> 'hoodie.metadata.record.index.enable' = 'true',
> 'hoodie.write.record.merge.mode' = 'COMMIT_TIME_ORDERING'
> )
> LOCATION 'file:///tmp/hudi_test_sec_index'
> """)
> spark.sql("""
> INSERT INTO hudi_test_sec_index VALUES
> ('1', 'Alice', 25),
> ('2', 'Bob', 30)
> """)
> spark.sql("CREATE INDEX idx_age ON hudi_test_sec_index USING
> secondary_index(Age)");
> ```
> org.apache.hudi.exception.HoodieIndexException: secondary_index is not
> supported
> at
> org.apache.spark.sql.hudi.command.CreateIndexCommand.run(IndexCommands.scala:79)
> at
> org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
> at
> org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
> at
> org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
> at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125)
> at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
> at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)
> at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107)
> at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461)
> at
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461)
> at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)
> at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
> at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437)
> at
> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98)
> at
> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85)
> at
> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83)
> at org.apache.spark.sql.Dataset.<init>(Dataset.scala:220)
> at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
> at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
> at org.apache.spark.sql.SparkSession.$anonfun$sql$4(SparkSession.scala:691)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
> at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:682)
> at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:713)
> at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:744)
> ... 47 elided
> ```
--
This message was sent by Atlassian Jira
(v8.20.10#820010)