[GitHub] [hudi] voonhous commented on issue #8540: [SUPPORT] Getting error when writing into COW HUDI table if schema changed (datatype changed / column dropped)

via GitHub Fri, 28 Apr 2023 01:34:13 -0700


voonhous commented on issue #8540:
URL: https://github.com/apache/hudi/issues/8540#issuecomment-1527190526


   ```scala
     test("Test add column + change column type + drop partition") {
       withTempDir { tmp =>
         val tableName = generateTableName
         spark.sql(
           s"""
              |create table $tableName (
              | id string,
              | name string,
              | price string,
              | ts long,
              | year string,
              | month string,
              | day string
              |) using hudi
              |tblproperties(
              | type = 'cow',
              | primaryKey = 'id',
              | preCombineField = 'ts'
              |) partitioned by (`year`, `month`, `day`)
              |location '${tmp.getCanonicalPath}'
             """.stripMargin)
   
         spark.sql(s"insert into $tableName values 
(1,'danny','2.22',1000,'2023','04','25')");
         checkAnswer(s"select id, name, price, ts, year, month, day from 
$tableName")(
           Seq("1", "danny", "2.22", 1000, "2023", "04", "25")
         )
   
         // enable hudi-full schema evolution
         spark.sql(s"set hoodie.schema.on.read.enable=true")
   
         // add a column
         spark.sql(s"alter table $tableName add column (new_col bigint)")
   
         spark.sql(s"insert into $tableName values " +
           s"(2,'danny','2.22',1001,222222,'2023','04','23'), " +
           s"(3,'danny','3.33',1001,333333,'2023','04','24')")
         checkAnswer(s"select id, name, price, ts, new_col, year, month, day 
from $tableName")(
           Seq("1", "danny", "2.22", 1000, null, "2023", "04", "25"),
           Seq("2", "danny", "2.22", 1001, 222222, "2023", "04", "23"),
           Seq("3", "danny", "3.33", 1001, 333333, "2023", "04", "24")
         )
   
         // change column type of ts to string
         spark.sql(s"alter table $tableName alter ts type string")
   
         // insert a new record into a different partition
         spark.sql(s"insert into $tableName values 
(4,'danny','4.44','1002',444444,'2023','04','22')")
         checkAnswer(s"select id, name, price, ts, new_col, year, month, day 
from $tableName")(
           Seq("1", "danny", "2.22", "1000", null, "2023", "04", "25"),
           Seq("2", "danny", "2.22", "1001", 222222, "2023", "04", "23"),
           Seq("3", "danny", "3.33", "1001", 333333, "2023", "04", "24"),
           Seq("4", "danny", "4.44", "1002", 444444, "2023", "04", "22")
         )
   
         import spark.implicits._
   
         val dataToDelete = Seq(("1", "2023", "04", "25")).toDF("id", "year", 
"month", "day")
   
         dataToDelete
           .write
           .format("org.apache.hudi")
           .option(OPERATION_OPT_KEY, DELETE_OPERATION_OPT_VAL)
           .option(PRECOMBINE_FIELD_OPT_KEY, "ts")
           .option(RECORDKEY_FIELD_OPT_KEY, "id")
           .option(HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY, 
classOf[MultiPartKeysValueExtractor].getName)
           .option("hoodie.schema.on.read.enable", "true")
           .option("hoodie.datasource.write.reconcile.schema", "true")
           .option(HoodieWriteConfig.TABLE_NAME, tableName)
           .mode("append")
           .save(s"${tmp.getCanonicalPath}")
   
         val df = 
spark.read.format("org.apache.hudi").load(s"${tmp.getCanonicalPath}")
         df.show(false)
   
         //      spark.sql(s"DELETE FROM $tableName WHERE id='1'")
         //      spark.sql(s"SELECT * FROM $tableName").show(false)
       }
     }
   ```
   
   I tried running this, no issue... Can't reproduce your error. 
   
   Note: My test case might be wrong as the returned results are incorrect.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [hudi] voonhous commented on issue #8540: [SUPPORT] Getting error when writing into COW HUDI table if schema changed (datatype changed / column dropped)

Reply via email to