[
https://issues.apache.org/jira/browse/HUDI-9203?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
sivabalan narayanan updated HUDI-9203:
--------------------------------------
Parent: HUDI-9026
Issue Type: Sub-task (was: Improvement)
> Fix usability w/ upgrading from 0.x to 1.x and wish to stay in table version 6
> ------------------------------------------------------------------------------
>
> Key: HUDI-9203
> URL: https://issues.apache.org/jira/browse/HUDI-9203
> Project: Apache Hudi
> Issue Type: Sub-task
> Components: writer-core
> Reporter: sivabalan narayanan
> Priority: Major
>
> When a user upgrades a table from 0.15.0 to 1.x and prefers to stay with
> table version 6, as per the migration guide
> ([https://hudi.apache.org/docs/deployment#upgrading-to-100),] we expect the
> user to set only two configs.
>
> {{hoodie.write.auto.upgrade}} to false.
> {{hoodie.metadata.enable = false. }}
> {{}}
> {{{}and w/ latest master, we have added support for b/w compatible writer as
> well. So, that leaves us w/ just 1 config i.e.
> "{}}}{{{}hoodie.write.auto.upgrade{}}}{{{}=false". {}}}
> {{}}
> {{But when attempted to user 1.0 binary to write to a hudi table created
> using 0.x, we error out as below. }}
> {{}}
> {code:java}
> df1.write.format("hudi").
> | options(getQuickstartWriteConfigs).
> | option("hoodie.write.auto.upgrade","false").
> | option(PRECOMBINE_FIELD_OPT_KEY, "ts").
> | option(RECORDKEY_FIELD_OPT_KEY, "uuid").
> | option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
> | option(TABLE_NAME, tableName).
> | mode(Append).
> | save(basePath)
> warning: one deprecation; for details, enable `:setting -deprecation' or
> `:replay -deprecation'
> # WARNING: Unable to attach Serviceability Agent. Unable to attach even with
> module exceptions: [org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException:
> Sense failed., org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException:
> Sense failed., org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException:
> Sense failed.]
> org.apache.hudi.exception.HoodieUpgradeDowngradeException: Please disable
> metadata table before upgrading from version SIX to EIGHT.
> at
> org.apache.hudi.table.upgrade.UpgradeDowngrade.needsUpgradeOrDowngrade(UpgradeDowngrade.java:72)
> at
> org.apache.hudi.table.upgrade.UpgradeDowngrade.needsUpgradeOrDowngrade(UpgradeDowngrade.java:80)
> at
> org.apache.hudi.client.BaseHoodieWriteClient.needsUpgradeOrDowngrade(BaseHoodieWriteClient.java:1490)
> at
> org.apache.hudi.client.BaseHoodieWriteClient.startCommitWithTime(BaseHoodieWriteClient.java:956)
> at
> org.apache.hudi.client.BaseHoodieWriteClient.startCommitWithTime(BaseHoodieWriteClient.java:949)
> at
> org.apache.hudi.HoodieSparkSqlWriterInternal.writeInternal(HoodieSparkSqlWriter.scala:514)
> at
> org.apache.hudi.HoodieSparkSqlWriterInternal.$anonfun$write$1(HoodieSparkSqlWriter.scala:192)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:118)
> at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:195)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:103)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
> at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65)
> at
> org.apache.spark.sql.adapter.BaseSpark3Adapter.sqlExecutionWithNewExecutionId(BaseSpark3Adapter.scala:107)
> at
> org.apache.hudi.HoodieSparkSqlWriterInternal.write(HoodieSparkSqlWriter.scala:214)
> at
> org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:129)
> at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:171)
> at
> org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:47)
> at
> org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
> at
> org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
> at
> org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
> at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:98)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:118)
> at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:195)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:103)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
> at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65)
> at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
> at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:94)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:512)
> at
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:104)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:512)
> at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:31)
> at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
> at
> org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
> at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
> at
> org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:488)
> at
> org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:94)
> at
> org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:81)
> at
> org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:79)
> at
> org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:133)
> at
> org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:856)
> at
> org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:387)
> at
> org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:360)
> at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
> ... 67 elided
> scala> {code}
> {{}}
> {{}}
> {{}}
> {{}}
> {{script to reproduce: }}
> {{Just follow the quick start. }}
> {{Using 0.14.1 spark bundle: }}
> {code:java}
> import org.apache.hudi.QuickstartUtils._
> import scala.collection.JavaConversions._
> import org.apache.spark.sql.SaveMode._
> import org.apache.hudi.DataSourceReadOptions._
> import org.apache.hudi.DataSourceWriteOptions._
> import org.apache.hudi.config.HoodieWriteConfig._
> import org.apache.hudi.common.model.HoodieRecord
> val tableName = "hudi_trips_cow"
> val basePath = "file:///tmp/hudi_trips_cow"
> val dataGen = new DataGenerator{code}
> {code:java}
> val inserts = convertToStringList(dataGen.generateInserts(10))
> val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))
> df.write.format("hudi").
> options(getQuickstartWriteConfigs).
> option(PRECOMBINE_FIELD_OPT_KEY, "ts").
> option(RECORDKEY_FIELD_OPT_KEY, "uuid").
> option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
> option(TABLE_NAME, tableName).
> mode(Overwrite).
> save(basePath){code}
> {code:java}
> {code}
> re-launch spark shell from latest master.
> {code:java}
> import org.apache.hudi.QuickstartUtils._
> import scala.collection.JavaConversions._
> import org.apache.spark.sql.SaveMode._
> import org.apache.hudi.DataSourceReadOptions._
> import org.apache.hudi.DataSourceWriteOptions._
> import org.apache.hudi.config.HoodieWriteConfig._
> import org.apache.hudi.common.model.HoodieRecord
> val tableName = "hudi_trips_cow"
> val basePath = "file:///tmp/hudi_trips_cow"
> val dataGen = new DataGenerator{code}
> {code:java}
> val df1 =
> spark.read.format("hudi").load(basePath).drop("_hoodie_commit_time","_hoodie_commit_seqno","_hoodie_record_key","_hoodie_partition_path","_hoodie_file_name").limit(2){code}
> {code:java}
> df1.write.format("hudi").
> | options(getQuickstartWriteConfigs).
> | option("hoodie.write.auto.upgrade","false").
> | option(PRECOMBINE_FIELD_OPT_KEY, "ts").
> | option(RECORDKEY_FIELD_OPT_KEY, "uuid").
> | option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
> | option(TABLE_NAME, tableName).
> | mode(Append).
> | save(basePath){code}
> {{}}
> {{you will hit the exception quoted above. }}
> {{}}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)