[
https://issues.apache.org/jira/browse/HUDI-3282?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
董可伦 updated HUDI-3282:
----------------------
Description:
{{hudi 0.11.0 master build
spark: 2.4.5}}
hive create database test_hudi;
spark-shell --master yarn --deploy-mode client --executor-memory 2G
--num-executors 3 --executor-cores 2 --driver-memory 4G --driver-cores 2 --conf
'spark.serializer=org.apache.spark.serializer.KryoSerializer' --conf
'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension'
--principal .. --keytab .. import org.apache.hudi.DataSourceWriteOptions._
import org.apache.hudi.QuickstartUtils.{DataGenerator, convertToStringList,
getQuickstartWriteConfigs} import
org.apache.hudi.config.HoodieWriteConfig.TBL_NAME import
org.apache.spark.sql.SaveMode._ import org.apache.spark.sql.{SaveMode,
SparkSession} import org.apache.spark.sql.functions.lit import
org.apache.hudi.DataSourceReadOptions._ import
org.apache.hudi.config.HoodieWriteConfig import
org.apache.hudi.keygen.SimpleKeyGenerator import
org.apache.hudi.common.model.{DefaultHoodieRecordPayload, HoodiePayloadProps}
import org.apache.hudi.io.HoodieMergeHandle import
org.apache.hudi.common.table.HoodieTableConfig import
org.apache.spark.sql.functions._ import spark.implicits._ val df = Seq((1,
"a1", 10, 1000, "2022-01-19")).toDF("id", "name", "value", "ts", "dt")
df.write.format("hudi"). option(HoodieWriteConfig.TBL_NAME.key,
"test_hudi_table_sync_hive"). option(TABLE_TYPE.key, COW_TABLE_TYPE_OPT_VAL).
option(RECORDKEY_FIELD.key, "id"). option(PRECOMBINE_FIELD.key, "ts").
option(KEYGENERATOR_CLASS_OPT_KEY,
"org.apache.hudi.keygen.NonpartitionedKeyGenerator").
option("hoodie.datasource.write.partitionpath.field", "").
option("hoodie.metadata.enable", false). option(KEYGENERATOR_CLASS_OPT_KEY,
"org.apache.hudi.keygen.ComplexKeyGenerator"). option(META_SYNC_ENABLED.key(),
true). option(HIVE_USE_JDBC.key(), false). option(HIVE_DATABASE.key(),
"test_hudi"). option(HIVE_AUTO_CREATE_DATABASE.key(), true).
option(HIVE_TABLE.key(), "test_hudi_table_sync_hive").
option(HIVE_PARTITION_EXTRACTOR_CLASS.key(),
"org.apache.hudi.hive.MultiPartKeysValueExtractor"). mode("overwrite").
save("/test_hudi/test_hudi_table_sync_hive")
{{# hoodie.properties
hoodie.table.precombine.field=ts
hoodie.table.partition.fields=
hoodie.table.type=COPY_ON_WRITE
hoodie.archivelog.folder=archived
hoodie.populate.meta.fields=true
hoodie.timeline.layout.version=1
hoodie.table.version=3
hoodie.table.recordkey.fields=id
hoodie.table.base.file.format=PARQUET
hoodie.table.timeline.timezone=LOCAL
hoodie.table.keygenerator.class=org.apache.hudi.keygen.ComplexKeyGenerator
hoodie.table.name=test_hudi_table_sync_hive
hoodie.datasource.write.hive_style_partitioning=false}}
hive
show create table test_hudi_table_sync_hive;
+----------------------------------------------------+ | createtab_stmt |
+----------------------------------------------------+ | CREATE EXTERNAL TABLE
`test_hudi_table_sync_hive`( | | `_hoodie_commit_time` string, | |
`_hoodie_commit_seqno` string, | | `_hoodie_record_key` string, | |
`_hoodie_partition_path` string, | | `_hoodie_file_name` string, | | `id` int,
| | `name` string, | | `value` int, | | `ts` int, | | `dt` string) | | ROW
FORMAT SERDE | | 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
| | WITH SERDEPROPERTIES ( | | 'hoodie.query.as.ro.table'='false', | |
'path'='/test_hudi/test_hudi_table_sync_hive') | | STORED AS INPUTFORMAT | |
'org.apache.hudi.hadoop.HoodieParquetInputFormat' | | OUTPUTFORMAT | |
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' | | LOCATION |
| 'hdfs://cluster1/test_hudi/test_hudi_table_sync_hive' | | TBLPROPERTIES ( | |
'last_commit_time_sync'='20220119110215185', | |
'spark.sql.sources.provider'='hudi', | |
'spark.sql.sources.schema.numParts'='1', | |
'spark.sql.sources.schema.part.0'='{"type":"struct","fields":[\{"name":"_hoodie_commit_time","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_commit_seqno","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_record_key","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_partition_path","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_file_name","type":"string","nullable":true,"metadata":{}},\{"name":"id","type":"integer","nullable":false,"metadata":{}},\{"name":"name","type":"string","nullable":true,"metadata":{}},\{"name":"value","type":"integer","nullable":false,"metadata":{}},\{"name":"ts","type":"integer","nullable":false,"metadata":{}},\{"name":"dt","type":"string","nullable":true,"metadata":{}}]}',
| | 'transient_lastDdlTime'='1642561355') |
+----------------------------------------------------+ 28 rows selected (0.429
seconds)
spark-sql --master yarn --deploy-mode client --conf
'spark.serializer=org.apache.spark.serializer.KryoSerializer' --conf
'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension'
--principal .. --keytab .. delete from test_hudi.test_hudi_table_sync_hive
where id=1;
was:h1. Fix delete exception for Spark SQL when sync Hive
> Fix delete exception for Spark SQL when sync Hive
> -------------------------------------------------
>
> Key: HUDI-3282
> URL: https://issues.apache.org/jira/browse/HUDI-3282
> Project: Apache Hudi
> Issue Type: Bug
> Components: hive-sync, spark-sql
> Affects Versions: 0.10.0
> Reporter: 董可伦
> Assignee: 董可伦
> Priority: Major
> Labels: pull-request-available
> Fix For: 0.10.1
>
>
> {{hudi 0.11.0 master build
> spark: 2.4.5}}
> hive create database test_hudi;
> spark-shell --master yarn --deploy-mode client --executor-memory 2G
> --num-executors 3 --executor-cores 2 --driver-memory 4G --driver-cores 2
> --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' --conf
> 'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension'
> --principal .. --keytab .. import org.apache.hudi.DataSourceWriteOptions._
> import org.apache.hudi.QuickstartUtils.{DataGenerator, convertToStringList,
> getQuickstartWriteConfigs} import
> org.apache.hudi.config.HoodieWriteConfig.TBL_NAME import
> org.apache.spark.sql.SaveMode._ import org.apache.spark.sql.{SaveMode,
> SparkSession} import org.apache.spark.sql.functions.lit import
> org.apache.hudi.DataSourceReadOptions._ import
> org.apache.hudi.config.HoodieWriteConfig import
> org.apache.hudi.keygen.SimpleKeyGenerator import
> org.apache.hudi.common.model.{DefaultHoodieRecordPayload, HoodiePayloadProps}
> import org.apache.hudi.io.HoodieMergeHandle import
> org.apache.hudi.common.table.HoodieTableConfig import
> org.apache.spark.sql.functions._ import spark.implicits._ val df = Seq((1,
> "a1", 10, 1000, "2022-01-19")).toDF("id", "name", "value", "ts", "dt")
> df.write.format("hudi"). option(HoodieWriteConfig.TBL_NAME.key,
> "test_hudi_table_sync_hive"). option(TABLE_TYPE.key, COW_TABLE_TYPE_OPT_VAL).
> option(RECORDKEY_FIELD.key, "id"). option(PRECOMBINE_FIELD.key, "ts").
> option(KEYGENERATOR_CLASS_OPT_KEY,
> "org.apache.hudi.keygen.NonpartitionedKeyGenerator").
> option("hoodie.datasource.write.partitionpath.field", "").
> option("hoodie.metadata.enable", false). option(KEYGENERATOR_CLASS_OPT_KEY,
> "org.apache.hudi.keygen.ComplexKeyGenerator").
> option(META_SYNC_ENABLED.key(), true). option(HIVE_USE_JDBC.key(), false).
> option(HIVE_DATABASE.key(), "test_hudi").
> option(HIVE_AUTO_CREATE_DATABASE.key(), true). option(HIVE_TABLE.key(),
> "test_hudi_table_sync_hive"). option(HIVE_PARTITION_EXTRACTOR_CLASS.key(),
> "org.apache.hudi.hive.MultiPartKeysValueExtractor"). mode("overwrite").
> save("/test_hudi/test_hudi_table_sync_hive")
> {{# hoodie.properties
> hoodie.table.precombine.field=ts
> hoodie.table.partition.fields=
> hoodie.table.type=COPY_ON_WRITE
> hoodie.archivelog.folder=archived
> hoodie.populate.meta.fields=true
> hoodie.timeline.layout.version=1
> hoodie.table.version=3
> hoodie.table.recordkey.fields=id
> hoodie.table.base.file.format=PARQUET
> hoodie.table.timeline.timezone=LOCAL
> hoodie.table.keygenerator.class=org.apache.hudi.keygen.ComplexKeyGenerator
> hoodie.table.name=test_hudi_table_sync_hive
> hoodie.datasource.write.hive_style_partitioning=false}}
> hive
> show create table test_hudi_table_sync_hive;
> +----------------------------------------------------+ | createtab_stmt |
> +----------------------------------------------------+ | CREATE EXTERNAL
> TABLE `test_hudi_table_sync_hive`( | | `_hoodie_commit_time` string, | |
> `_hoodie_commit_seqno` string, | | `_hoodie_record_key` string, | |
> `_hoodie_partition_path` string, | | `_hoodie_file_name` string, | | `id`
> int, | | `name` string, | | `value` int, | | `ts` int, | | `dt` string) | |
> ROW FORMAT SERDE | |
> 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' | | WITH
> SERDEPROPERTIES ( | | 'hoodie.query.as.ro.table'='false', | |
> 'path'='/test_hudi/test_hudi_table_sync_hive') | | STORED AS INPUTFORMAT | |
> 'org.apache.hudi.hadoop.HoodieParquetInputFormat' | | OUTPUTFORMAT | |
> 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' | | LOCATION
> | | 'hdfs://cluster1/test_hudi/test_hudi_table_sync_hive' | | TBLPROPERTIES (
> | | 'last_commit_time_sync'='20220119110215185', | |
> 'spark.sql.sources.provider'='hudi', | |
> 'spark.sql.sources.schema.numParts'='1', | |
> 'spark.sql.sources.schema.part.0'='{"type":"struct","fields":[\{"name":"_hoodie_commit_time","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_commit_seqno","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_record_key","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_partition_path","type":"string","nullable":true,"metadata":{}},\{"name":"_hoodie_file_name","type":"string","nullable":true,"metadata":{}},\{"name":"id","type":"integer","nullable":false,"metadata":{}},\{"name":"name","type":"string","nullable":true,"metadata":{}},\{"name":"value","type":"integer","nullable":false,"metadata":{}},\{"name":"ts","type":"integer","nullable":false,"metadata":{}},\{"name":"dt","type":"string","nullable":true,"metadata":{}}]}',
> | | 'transient_lastDdlTime'='1642561355') |
> +----------------------------------------------------+ 28 rows selected
> (0.429 seconds)
> spark-sql --master yarn --deploy-mode client --conf
> 'spark.serializer=org.apache.spark.serializer.KryoSerializer' --conf
> 'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension'
> --principal .. --keytab .. delete from test_hudi.test_hudi_table_sync_hive
> where id=1;
--
This message was sent by Atlassian Jira
(v8.20.1#820001)