[
https://issues.apache.org/jira/browse/HUDI-6582?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
voon updated HUDI-6582:
-----------------------
Description:
When recreating a hudi table with an existing hoodie.properties, the name of
the hudi table will be changed to "topLevelRecord".
This ticket is a followup for HUDI-6145 to ensure that the name/namespace of
the table is standardised accordingly.
{code:java}
test("Test Create Hoodie Table with existing hoodie.properties") {
withTempDir { tmp =>
val tableName = generateTableName
val tablePath = s"${tmp.getCanonicalPath}"
spark.sql(
s"""
|create table $tableName (
| id int,
| name string,
| price double,
| ts long
|) using hudi
| location '$tablePath'
| tblproperties (
| primaryKey ='id',
| type = 'cow',
| preCombineField = 'ts'
| )
""".stripMargin)
//
hoodie.table.create.schema={"type":"record","name":"h0_record","namespace":"hoodie.h0","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]},
but got
{"type":"record","name":"topLevelRecord","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]}
// drop the table without purging hdfs directory
spark.sql(s"drop table $tableName".stripMargin)
val tableSchemaAfterCreate1 = HoodieTableMetaClient.builder()
.setConf(spark.sparkContext.hadoopConfiguration)
.setBasePath(tablePath).build().getTableConfig.getTableCreateSchema
// avro schema name and namespace should not change should not change
spark.newSession().sql(
s"""
|create table $tableName (
| id int,
| name string,
| price double,
| ts long
|) using hudi
| location '$tablePath'
| tblproperties (
| primaryKey ='id',
| type = 'cow',
| preCombineField = 'ts'
| )
""".stripMargin)
///
hoodie.table.create.schema={"type":"record","name":"topLevelRecord","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]}
val tableSchemaAfterCreate2 = HoodieTableMetaClient.builder()
.setConf(spark.sparkContext.hadoopConfiguration)
.setBasePath(tablePath).build().getTableConfig.getTableCreateSchema
assertResult(tableSchemaAfterCreate1.get)(tableSchemaAfterCreate2.get)
} {code}
was:
When recreating a hudi table with an existing hoodie.properties, the name of
the hudi table will be changed to "topLevelRecord".
This ticket is a followup for HUDI-8587 to ensure that the name/namespace of
the table is standardised accordingly.
{code:java}
test("Test Create Hoodie Table with existing hoodie.properties") {
withTempDir { tmp =>
val tableName = generateTableName
val tablePath = s"${tmp.getCanonicalPath}"
spark.sql(
s"""
|create table $tableName (
| id int,
| name string,
| price double,
| ts long
|) using hudi
| location '$tablePath'
| tblproperties (
| primaryKey ='id',
| type = 'cow',
| preCombineField = 'ts'
| )
""".stripMargin)
//
hoodie.table.create.schema={"type":"record","name":"h0_record","namespace":"hoodie.h0","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]},
but got
{"type":"record","name":"topLevelRecord","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]}
// drop the table without purging hdfs directory
spark.sql(s"drop table $tableName".stripMargin)
val tableSchemaAfterCreate1 = HoodieTableMetaClient.builder()
.setConf(spark.sparkContext.hadoopConfiguration)
.setBasePath(tablePath).build().getTableConfig.getTableCreateSchema
// avro schema name and namespace should not change should not change
spark.newSession().sql(
s"""
|create table $tableName (
| id int,
| name string,
| price double,
| ts long
|) using hudi
| location '$tablePath'
| tblproperties (
| primaryKey ='id',
| type = 'cow',
| preCombineField = 'ts'
| )
""".stripMargin)
///
hoodie.table.create.schema={"type":"record","name":"topLevelRecord","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]}
val tableSchemaAfterCreate2 = HoodieTableMetaClient.builder()
.setConf(spark.sparkContext.hadoopConfiguration)
.setBasePath(tablePath).build().getTableConfig.getTableCreateSchema
assertResult(tableSchemaAfterCreate1.get)(tableSchemaAfterCreate2.get)
} {code}
> Table create schema's name should be set accordingly
> ----------------------------------------------------
>
> Key: HUDI-6582
> URL: https://issues.apache.org/jira/browse/HUDI-6582
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: voon
> Assignee: voon
> Priority: Major
>
> When recreating a hudi table with an existing hoodie.properties, the name of
> the hudi table will be changed to "topLevelRecord".
>
> This ticket is a followup for HUDI-6145 to ensure that the name/namespace of
> the table is standardised accordingly.
>
> {code:java}
> test("Test Create Hoodie Table with existing hoodie.properties") {
> withTempDir { tmp =>
> val tableName = generateTableName
> val tablePath = s"${tmp.getCanonicalPath}"
> spark.sql(
> s"""
> |create table $tableName (
> | id int,
> | name string,
> | price double,
> | ts long
> |) using hudi
> | location '$tablePath'
> | tblproperties (
> | primaryKey ='id',
> | type = 'cow',
> | preCombineField = 'ts'
> | )
> """.stripMargin)
> //
> hoodie.table.create.schema={"type":"record","name":"h0_record","namespace":"hoodie.h0","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]},
> but got
> {"type":"record","name":"topLevelRecord","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]}
> // drop the table without purging hdfs directory
> spark.sql(s"drop table $tableName".stripMargin)
> val tableSchemaAfterCreate1 = HoodieTableMetaClient.builder()
> .setConf(spark.sparkContext.hadoopConfiguration)
> .setBasePath(tablePath).build().getTableConfig.getTableCreateSchema
> // avro schema name and namespace should not change should not change
> spark.newSession().sql(
> s"""
> |create table $tableName (
> | id int,
> | name string,
> | price double,
> | ts long
> |) using hudi
> | location '$tablePath'
> | tblproperties (
> | primaryKey ='id',
> | type = 'cow',
> | preCombineField = 'ts'
> | )
> """.stripMargin)
> ///
> hoodie.table.create.schema={"type":"record","name":"topLevelRecord","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]}
> val tableSchemaAfterCreate2 = HoodieTableMetaClient.builder()
> .setConf(spark.sparkContext.hadoopConfiguration)
> .setBasePath(tablePath).build().getTableConfig.getTableCreateSchema
> assertResult(tableSchemaAfterCreate1.get)(tableSchemaAfterCreate2.get)
> } {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)