[jira] [Updated] (HUDI-6582) Table create schema's name should be set accordingly

voon (Jira) Mon, 24 Jul 2023 03:10:31 -0700


     [ 
https://issues.apache.org/jira/browse/HUDI-6582?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]


voon updated HUDI-6582:
-----------------------
    Description: 
When recreating a hudi table with an existing hoodie.properties, the name of 
the hudi table will be changed to "topLevelRecord".

 

This ticket is a followup for HUDI-6145 to ensure that the name/namespace of 
the table is standardised accordingly.

 
{code:java}
test("Test Create Hoodie Table with existing hoodie.properties") {
  withTempDir { tmp =>
    val tableName = generateTableName
    val tablePath = s"${tmp.getCanonicalPath}"
    spark.sql(
      s"""
         |create table $tableName (
         |  id int,
         |  name string,
         |  price double,
         |  ts long
         |) using hudi
         | location '$tablePath'
         | tblproperties (
         |  primaryKey ='id',
         |  type = 'cow',
         |  preCombineField = 'ts'
         | )
     """.stripMargin)
    // 
hoodie.table.create.schema={"type":"record","name":"h0_record","namespace":"hoodie.h0","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]},
 but got 
{"type":"record","name":"topLevelRecord","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]}

    // drop the table without purging hdfs directory
    spark.sql(s"drop table $tableName".stripMargin)

    val tableSchemaAfterCreate1 = HoodieTableMetaClient.builder()
      .setConf(spark.sparkContext.hadoopConfiguration)
      .setBasePath(tablePath).build().getTableConfig.getTableCreateSchema

    // avro schema name and namespace should not change should not change
    spark.newSession().sql(
      s"""
         |create table $tableName (
         |  id int,
         |  name string,
         |  price double,
         |  ts long
         |) using hudi
         | location '$tablePath'
         | tblproperties (
         |  primaryKey ='id',
         |  type = 'cow',
         |  preCombineField = 'ts'
         | )
     """.stripMargin)
    /// 
hoodie.table.create.schema={"type":"record","name":"topLevelRecord","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]}

    val tableSchemaAfterCreate2 = HoodieTableMetaClient.builder()
      .setConf(spark.sparkContext.hadoopConfiguration)
      .setBasePath(tablePath).build().getTableConfig.getTableCreateSchema

    assertResult(tableSchemaAfterCreate1.get)(tableSchemaAfterCreate2.get)
  } {code}

  was:
When recreating a hudi table with an existing hoodie.properties, the name of 
the hudi table will be changed to "topLevelRecord".

 

This ticket is a followup for HUDI-8587 to ensure that the name/namespace of 
the table is standardised accordingly.

 
{code:java}
test("Test Create Hoodie Table with existing hoodie.properties") {
  withTempDir { tmp =>
    val tableName = generateTableName
    val tablePath = s"${tmp.getCanonicalPath}"
    spark.sql(
      s"""
         |create table $tableName (
         |  id int,
         |  name string,
         |  price double,
         |  ts long
         |) using hudi
         | location '$tablePath'
         | tblproperties (
         |  primaryKey ='id',
         |  type = 'cow',
         |  preCombineField = 'ts'
         | )
     """.stripMargin)
    // 
hoodie.table.create.schema={"type":"record","name":"h0_record","namespace":"hoodie.h0","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]},
 but got 
{"type":"record","name":"topLevelRecord","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]}

    // drop the table without purging hdfs directory
    spark.sql(s"drop table $tableName".stripMargin)

    val tableSchemaAfterCreate1 = HoodieTableMetaClient.builder()
      .setConf(spark.sparkContext.hadoopConfiguration)
      .setBasePath(tablePath).build().getTableConfig.getTableCreateSchema

    // avro schema name and namespace should not change should not change
    spark.newSession().sql(
      s"""
         |create table $tableName (
         |  id int,
         |  name string,
         |  price double,
         |  ts long
         |) using hudi
         | location '$tablePath'
         | tblproperties (
         |  primaryKey ='id',
         |  type = 'cow',
         |  preCombineField = 'ts'
         | )
     """.stripMargin)
    /// 
hoodie.table.create.schema={"type":"record","name":"topLevelRecord","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]}

    val tableSchemaAfterCreate2 = HoodieTableMetaClient.builder()
      .setConf(spark.sparkContext.hadoopConfiguration)
      .setBasePath(tablePath).build().getTableConfig.getTableCreateSchema

    assertResult(tableSchemaAfterCreate1.get)(tableSchemaAfterCreate2.get)
  } {code}


> Table create schema's name should be set accordingly
> ----------------------------------------------------
>
>                 Key: HUDI-6582
>                 URL: https://issues.apache.org/jira/browse/HUDI-6582
>             Project: Apache Hudi
>          Issue Type: Bug
>            Reporter: voon
>            Assignee: voon
>            Priority: Major
>
> When recreating a hudi table with an existing hoodie.properties, the name of 
> the hudi table will be changed to "topLevelRecord".
>  
> This ticket is a followup for HUDI-6145 to ensure that the name/namespace of 
> the table is standardised accordingly.
>  
> {code:java}
> test("Test Create Hoodie Table with existing hoodie.properties") {
>   withTempDir { tmp =>
>     val tableName = generateTableName
>     val tablePath = s"${tmp.getCanonicalPath}"
>     spark.sql(
>       s"""
>          |create table $tableName (
>          |  id int,
>          |  name string,
>          |  price double,
>          |  ts long
>          |) using hudi
>          | location '$tablePath'
>          | tblproperties (
>          |  primaryKey ='id',
>          |  type = 'cow',
>          |  preCombineField = 'ts'
>          | )
>      """.stripMargin)
>     // 
> hoodie.table.create.schema={"type":"record","name":"h0_record","namespace":"hoodie.h0","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]},
>  but got 
> {"type":"record","name":"topLevelRecord","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]}
>     // drop the table without purging hdfs directory
>     spark.sql(s"drop table $tableName".stripMargin)
>     val tableSchemaAfterCreate1 = HoodieTableMetaClient.builder()
>       .setConf(spark.sparkContext.hadoopConfiguration)
>       .setBasePath(tablePath).build().getTableConfig.getTableCreateSchema
>     // avro schema name and namespace should not change should not change
>     spark.newSession().sql(
>       s"""
>          |create table $tableName (
>          |  id int,
>          |  name string,
>          |  price double,
>          |  ts long
>          |) using hudi
>          | location '$tablePath'
>          | tblproperties (
>          |  primaryKey ='id',
>          |  type = 'cow',
>          |  preCombineField = 'ts'
>          | )
>      """.stripMargin)
>     /// 
> hoodie.table.create.schema={"type":"record","name":"topLevelRecord","fields":[{"name":"id","type":["int","null"]},{"name":"name","type":["string","null"]},{"name":"price","type":["double","null"]},{"name":"ts","type":["long","null"]}]}
>     val tableSchemaAfterCreate2 = HoodieTableMetaClient.builder()
>       .setConf(spark.sparkContext.hadoopConfiguration)
>       .setBasePath(tablePath).build().getTableConfig.getTableCreateSchema
>     assertResult(tableSchemaAfterCreate1.get)(tableSchemaAfterCreate2.get)
>   } {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

[jira] [Updated] (HUDI-6582) Table create schema's name should be set accordingly

Reply via email to