voonhous commented on issue #8540:
URL: https://github.com/apache/hudi/issues/8540#issuecomment-1527190526
```scala
test("Test add column + change column type + drop partition") {
withTempDir { tmp =>
val tableName = generateTableName
spark.sql(
s"""
|create table $tableName (
| id string,
| name string,
| price string,
| ts long,
| year string,
| month string,
| day string
|) using hudi
|tblproperties(
| type = 'cow',
| primaryKey = 'id',
| preCombineField = 'ts'
|) partitioned by (`year`, `month`, `day`)
|location '${tmp.getCanonicalPath}'
""".stripMargin)
spark.sql(s"insert into $tableName values
(1,'danny','2.22',1000,'2023','04','25')");
checkAnswer(s"select id, name, price, ts, year, month, day from
$tableName")(
Seq("1", "danny", "2.22", 1000, "2023", "04", "25")
)
// enable hudi-full schema evolution
spark.sql(s"set hoodie.schema.on.read.enable=true")
// add a column
spark.sql(s"alter table $tableName add column (new_col bigint)")
spark.sql(s"insert into $tableName values " +
s"(2,'danny','2.22',1001,222222,'2023','04','23'), " +
s"(3,'danny','3.33',1001,333333,'2023','04','24')")
checkAnswer(s"select id, name, price, ts, new_col, year, month, day
from $tableName")(
Seq("1", "danny", "2.22", 1000, null, "2023", "04", "25"),
Seq("2", "danny", "2.22", 1001, 222222, "2023", "04", "23"),
Seq("3", "danny", "3.33", 1001, 333333, "2023", "04", "24")
)
// change column type of ts to string
spark.sql(s"alter table $tableName alter ts type string")
// insert a new record into a different partition
spark.sql(s"insert into $tableName values
(4,'danny','4.44','1002',444444,'2023','04','22')")
checkAnswer(s"select id, name, price, ts, new_col, year, month, day
from $tableName")(
Seq("1", "danny", "2.22", "1000", null, "2023", "04", "25"),
Seq("2", "danny", "2.22", "1001", 222222, "2023", "04", "23"),
Seq("3", "danny", "3.33", "1001", 333333, "2023", "04", "24"),
Seq("4", "danny", "4.44", "1002", 444444, "2023", "04", "22")
)
import spark.implicits._
val dataToDelete = Seq(("1", "2023", "04", "25")).toDF("id", "year",
"month", "day")
dataToDelete
.write
.format("org.apache.hudi")
.option(OPERATION_OPT_KEY, DELETE_OPERATION_OPT_VAL)
.option(PRECOMBINE_FIELD_OPT_KEY, "ts")
.option(RECORDKEY_FIELD_OPT_KEY, "id")
.option(HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY,
classOf[MultiPartKeysValueExtractor].getName)
.option("hoodie.schema.on.read.enable", "true")
.option("hoodie.datasource.write.reconcile.schema", "true")
.option(HoodieWriteConfig.TABLE_NAME, tableName)
.mode("append")
.save(s"${tmp.getCanonicalPath}")
val df =
spark.read.format("org.apache.hudi").load(s"${tmp.getCanonicalPath}")
df.show(false)
// spark.sql(s"DELETE FROM $tableName WHERE id='1'")
// spark.sql(s"SELECT * FROM $tableName").show(false)
}
}
```
I tried running this, no issue... Can't reproduce your error.
Note: My test case might be wrong as the returned results are incorrect.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]