[
https://issues.apache.org/jira/browse/HUDI-2390?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
renhao updated HUDI-2390:
-------------------------
Description:
Test Case:
{code:java}
import org.apache.hudi.QuickstartUtils._
import scala.collection.JavaConversions._
import org.apache.spark.sql.SaveMode._
import org.apache.hudi.DataSourceReadOptions._
import org.apache.hudi.DataSourceWriteOptions._
import org.apache.hudi.config.HoodieWriteConfig._{code}
1.
{code:java}
spark.read.parquet("/tmp/tb_base").createTempView("test1"){code}
2.
{code:java}
spark.sql("create table testdb.sql_test_cow (primary_key int, col0 int, col1
string, col2 int, col3 string, col4 double, col5 date, col6 timestamp, col7
int) using hudi partitioned by(col0) options(primaryKey='primary_key',
preCombineField='col2')"){code}
3.
{code:java}
val base_data = spark.read.parquet("/tmp/tb_base"){code}
{code:java}
base_data.write.format("hudi").
option(TABLE_TYPE_OPT_KEY, COW_TABLE_TYPE_OPT_VAL).
option(PRECOMBINE_FIELD_OPT_KEY, "col2").
option(RECORDKEY_FIELD_OPT_KEY, "primary_key").
option(PARTITIONPATH_FIELD_OPT_KEY, "col0").
option(KEYGENERATOR_CLASS_OPT_KEY,
"org.apache.hudi.keygen.SimpleKeyGenerator"). option(OPERATION_OPT_KEY,
"bulk_insert").
option(HIVE_SYNC_ENABLED_OPT_KEY, "true").
option(HIVE_PARTITION_FIELDS_OPT_KEY, "col0").
option(HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY,"org.apache.hudi.hive.MultiPartKeysValueExtractor").
option(HIVE_DATABASE_OPT_KEY, "testdb").
option(HIVE_TABLE_OPT_KEY, "sql_test_cow").
option(HIVE_USE_JDBC_OPT_KEY, "true").
option("hoodie.bulkinsert.shuffle.parallelism", 4).
option("hoodie.datasource.write.hive_style_partitioning", "true").
option(TABLE_NAME,"sql_test_cow").mode(Append).save(s"/user/hive/warehouse/testdb.db/sql_test_cow"){code}
4.
{code:java}
sql("delete from testdb.sql_test_cow where primary_key = 1"){code}
5.
{code:java}
sql("select primary_key,col0,col1,col2,col3,col4,col5,col6,col7 from
testdb.sql_test_cow order by primary_key").show(false){code}
*step 4 execute without exception,but record did not delete*
!image-2021-09-02-09-52-24-704.png!
was:
Test Case:
{code:java}
import org.apache.hudi.QuickstartUtils._
import scala.collection.JavaConversions._
import org.apache.spark.sql.SaveMode._
import org.apache.hudi.DataSourceReadOptions._
import org.apache.hudi.DataSourceWriteOptions._
import org.apache.hudi.config.HoodieWriteConfig._{code}
1.
{code:java}
spark.read.parquet("/tmp/tb_base").createTempView("test1"){code}
2.
{code:java}
spark.sql("create table testdb.sql_test_cow (primary_key int, col0 int, col1
string, col2 int, col3 string, col4 double, col5 date, col6 timestamp, col7
int) using hudi partitioned by(col0) options(primaryKey='primary_key',
preCombineField='col2')"){code}
3.
{code:java}
val base_data = spark.read.parquet("/tmp/tb_base"){code}
{code:java}
base_data.write.format("hudi").
option(TABLE_TYPE_OPT_KEY, COW_TABLE_TYPE_OPT_VAL).
option(PRECOMBINE_FIELD_OPT_KEY, "col2").
option(RECORDKEY_FIELD_OPT_KEY, "primary_key").
option(PARTITIONPATH_FIELD_OPT_KEY, "col0").
option(KEYGENERATOR_CLASS_OPT_KEY,
"org.apache.hudi.keygen.SimpleKeyGenerator"). option(OPERATION_OPT_KEY,
"bulk_insert").
option(HIVE_SYNC_ENABLED_OPT_KEY, "true").
option(HIVE_PARTITION_FIELDS_OPT_KEY, "col0").
option(HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY,"org.apache.hudi.hive.MultiPartKeysValueExtractor").
option(HIVE_DATABASE_OPT_KEY, "testdb").
option(HIVE_TABLE_OPT_KEY, "sql_test_cow").
option(HIVE_USE_JDBC_OPT_KEY, "true").
option("hoodie.bulkinsert.shuffle.parallelism", 4).
option("hoodie.datasource.write.hive_style_partitioning", "true").
option(TABLE_NAME,"sql_test_cow").mode(Append).save(s"/user/hive/warehouse/testdb.db/sql_test_cow"){code}
4.
{code:java}
sql("delete from testdb.sql_test_cow where primary_key = 1"){code}
5.
{code:java}
sql("select primary_key,col0,col1,col2,col3,col4,col5,col6,col7 from
testdb.sql_test_cow order by primary_key").show(false){code}
*step 4 execute without exception,but record did not delete*
!image-2021-09-02-09-52-24-704.png!
> Create table by hudisql,write data into table by datasource,hudi delete cmd
> can not delete data
> -----------------------------------------------------------------------------------------------
>
> Key: HUDI-2390
> URL: https://issues.apache.org/jira/browse/HUDI-2390
> Project: Apache Hudi
> Issue Type: Bug
> Components: Spark Integration
> Affects Versions: 0.9.0
> Reporter: renhao
> Priority: Major
> Labels: features
> Attachments: tb_base.7z
>
>
> Test Case:
>
> {code:java}
> import org.apache.hudi.QuickstartUtils._
> import scala.collection.JavaConversions._
> import org.apache.spark.sql.SaveMode._
> import org.apache.hudi.DataSourceReadOptions._
> import org.apache.hudi.DataSourceWriteOptions._
> import org.apache.hudi.config.HoodieWriteConfig._{code}
>
> 1.
> {code:java}
> spark.read.parquet("/tmp/tb_base").createTempView("test1"){code}
>
> 2.
> {code:java}
> spark.sql("create table testdb.sql_test_cow (primary_key int, col0 int, col1
> string, col2 int, col3 string, col4 double, col5 date, col6 timestamp, col7
> int) using hudi partitioned by(col0) options(primaryKey='primary_key',
> preCombineField='col2')"){code}
>
> 3.
> {code:java}
> val base_data = spark.read.parquet("/tmp/tb_base"){code}
>
>
> {code:java}
> base_data.write.format("hudi").
> option(TABLE_TYPE_OPT_KEY, COW_TABLE_TYPE_OPT_VAL).
> option(PRECOMBINE_FIELD_OPT_KEY, "col2").
> option(RECORDKEY_FIELD_OPT_KEY, "primary_key").
> option(PARTITIONPATH_FIELD_OPT_KEY, "col0").
> option(KEYGENERATOR_CLASS_OPT_KEY,
> "org.apache.hudi.keygen.SimpleKeyGenerator"). option(OPERATION_OPT_KEY,
> "bulk_insert").
> option(HIVE_SYNC_ENABLED_OPT_KEY, "true").
> option(HIVE_PARTITION_FIELDS_OPT_KEY, "col0").
> option(HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY,"org.apache.hudi.hive.MultiPartKeysValueExtractor").
>
> option(HIVE_DATABASE_OPT_KEY, "testdb").
> option(HIVE_TABLE_OPT_KEY, "sql_test_cow").
> option(HIVE_USE_JDBC_OPT_KEY, "true").
> option("hoodie.bulkinsert.shuffle.parallelism", 4).
> option("hoodie.datasource.write.hive_style_partitioning", "true").
> option(TABLE_NAME,"sql_test_cow").mode(Append).save(s"/user/hive/warehouse/testdb.db/sql_test_cow"){code}
>
> 4.
> {code:java}
> sql("delete from testdb.sql_test_cow where primary_key = 1"){code}
> 5.
> {code:java}
> sql("select primary_key,col0,col1,col2,col3,col4,col5,col6,col7 from
> testdb.sql_test_cow order by primary_key").show(false){code}
> *step 4 execute without exception,but record did not delete*
> !image-2021-09-02-09-52-24-704.png!
--
This message was sent by Atlassian Jira
(v8.3.4#803005)