804e opened a new issue, #13069:
URL: https://github.com/apache/hudi/issues/13069
**Describe the problem you faced**
I used flink sql to create table hudi_table_hive, inserted data, flink query
is normal. However, an exception was throw during hive beeline query
**To Reproduce**
Steps to reproduce the behavior:
1. flink sql client create table and insert data
```
CREATE TABLE hudi_table_hive(
ts BIGINT,
uuid VARCHAR(40) PRIMARY KEY NOT ENFORCED,
rider VARCHAR(20),
driver VARCHAR(20),
fare DOUBLE,
city VARCHAR(20)
)
PARTITIONED BY (`city`)
WITH (
'connector' = 'hudi',
'path' = 'hdfs://hudi01.jafron.com:8020/datalake/hudi_table_hive',
'table.type' = 'COPY_ON_WRITE',
'hive_sync.enabled' = 'true',
'hive_sync.db' = 'hive_db',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hudi02.jafron.com:9083',
'hive_sync.conf.dir' = '/opt/hadoop/conf'
);
-- insert data using values
INSERT INTO hudi_table_hive VALUES
(1695159649087,'334e26e9-8355-45cc-97c6-c31daf0df330','rider-A','driver-K',19.10,'san_francisco'),
(1695091554788,'e96c4396-3fad-413a-a942-4cb36106d721','rider-C','driver-M',27.70
,'san_francisco'),
(1695046462179,'9909a8b1-2d15-4d3d-8ec9-efc48c536a00','rider-D','driver-L',33.90
,'san_francisco'),
(1695332066204,'1dced545-862b-4ceb-8b43-d2a568f6616b','rider-E','driver-O',93.50,'san_francisco'),
(1695516137016,'e3cf430c-889d-4015-bc98-59bdce1e530c','rider-F','driver-P',34.15,'sao_paulo'),
(1695376420876,'7a84095f-737f-40bc-b62f-6b69664712d2','rider-G','driver-Q',43.40
,'sao_paulo'),
(1695173887231,'3eeb61f7-c2b0-4636-99bd-5d7a5a1d2c04','rider-I','driver-S',41.06
,'chennai'),
(1695115999911,'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa','rider-J','driver-T',17.85,'chennai');
```
2. beeline can show table structure but cannot query
```
0: jdbc:hive2://hudi02.jafron.com:2181,hudi01> show create table
hudi_table_hive;
INFO : Compiling
command(queryId=hive_20250402112526_4e945f4a-4a57-4086-8153-3530a66bbb40): show
create table hudi_table_hive
INFO : Semantic Analysis Completed (retrial = false)
INFO : Returning Hive schema:
Schema(fieldSchemas:[FieldSchema(name:createtab_stmt, type:string, comment:from
deserializer)], properties:null)
INFO : Completed compiling
command(queryId=hive_20250402112526_4e945f4a-4a57-4086-8153-3530a66bbb40); Time
taken: 0.092 seconds
INFO : Executing
command(queryId=hive_20250402112526_4e945f4a-4a57-4086-8153-3530a66bbb40): show
create table hudi_table_hive
INFO : Starting task [Stage-0:DDL] in serial mode
INFO : Completed executing
command(queryId=hive_20250402112526_4e945f4a-4a57-4086-8153-3530a66bbb40); Time
taken: 0.054 seconds
INFO : OK
+----------------------------------------------------+
| createtab_stmt |
+----------------------------------------------------+
| CREATE EXTERNAL TABLE `hudi_table_hive`( |
| `_hoodie_commit_time` string COMMENT '', |
| `_hoodie_commit_seqno` string COMMENT '', |
| `_hoodie_record_key` string COMMENT '', |
| `_hoodie_partition_path` string COMMENT '', |
| `_hoodie_file_name` string COMMENT '', |
| `ts` bigint COMMENT '', |
| `uuid` string COMMENT '', |
| `rider` string COMMENT '', |
| `driver` string COMMENT '', |
| `fare` double COMMENT '') |
| PARTITIONED BY ( |
| `city` string COMMENT '') |
| ROW FORMAT SERDE |
| 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' |
| WITH SERDEPROPERTIES ( |
| 'hoodie.query.as.ro.table'='false', |
| 'path'='hdfs://hudi01.jafron.com:8020/datalake/hudi_table_hive') |
| STORED AS INPUTFORMAT |
| 'org.apache.hudi.hadoop.HoodieParquetInputFormat' |
| OUTPUTFORMAT |
| 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' |
| LOCATION |
| 'hdfs://hudi01.jafron.com:8020/datalake/hudi_table_hive' |
| TBLPROPERTIES ( |
| 'last_commit_completion_time_sync'='20250402112500650', |
| 'last_commit_time_sync'='20250402112458330', |
| 'spark.sql.sources.provider'='hudi', |
| 'spark.sql.sources.schema.numPartCols'='1', |
| 'spark.sql.sources.schema.numParts'='1', |
|
'spark.sql.sources.schema.part.0'='{"type":"struct","fields":[{"name":"_hoodie_commit_time","type":"string","nullable":true,"metadata":{}},{"name":"_hoodie_commit_seqno","type":"string","nullable":true,"metadata":{}},{"name":"_hoodie_record_key","type":"string","nullable":true,"metadata":{}},{"name":"_hoodie_partition_path","type":"string","nullable":true,"metadata":{}},{"name":"_hoodie_file_name","type":"string","nullable":true,"metadata":{}},{"name":"ts","type":"long","nullable":true,"metadata":{}},{"name":"uuid","type":"string","nullable":false,"metadata":{}},{"name":"rider","type":"string","nullable":true,"metadata":{}},{"name":"driver","type":"string","nullable":true,"metadata":{}},{"name":"fare","type":"double","nullable":true,"metadata":{}},{"name":"city","type":"string","nullable":true,"metadata":{}}]}',
|
| 'spark.sql.sources.schema.partCol.0'='city', |
| 'transient_lastDdlTime'='1743558852') |
+----------------------------------------------------+
33 rows selected (0.213 seconds)
0: jdbc:hive2://hudi02.jafron.com:2181,hudi01> select * from hudi_table_hive;
INFO : Compiling
command(queryId=hive_20250402112530_de8fbc6e-dbd1-4725-84b6-68ba18971c2a):
select * from hudi_table_hive
INFO : Semantic Analysis Completed (retrial = false)
INFO : Returning Hive schema:
Schema(fieldSchemas:[FieldSchema(name:hudi_table_hive._hoodie_commit_time,
type:string, comment:null),
FieldSchema(name:hudi_table_hive._hoodie_commit_seqno, type:string,
comment:null), FieldSchema(name:hudi_table_hive._hoodie_record_key,
type:string, comment:null),
FieldSchema(name:hudi_table_hive._hoodie_partition_path, type:string,
comment:null), FieldSchema(name:hudi_table_hive._hoodie_file_name, type:string,
comment:null), FieldSchema(name:hudi_table_hive.ts, type:bigint, comment:null),
FieldSchema(name:hudi_table_hive.uuid, type:string, comment:null),
FieldSchema(name:hudi_table_hive.rider, type:string, comment:null),
FieldSchema(name:hudi_table_hive.driver, type:string, comment:null),
FieldSchema(name:hudi_table_hive.fare, type:double, comment:null),
FieldSchema(name:hudi_table_hive.city, type:string, comment:null)],
properties:null)
INFO : Completed compiling
command(queryId=hive_20250402112530_de8fbc6e-dbd1-4725-84b6-68ba18971c2a); Time
taken: 0.325 seconds
INFO : Executing
command(queryId=hive_20250402112530_de8fbc6e-dbd1-4725-84b6-68ba18971c2a):
select * from hudi_table_hive
INFO : Completed executing
command(queryId=hive_20250402112530_de8fbc6e-dbd1-4725-84b6-68ba18971c2a); Time
taken: 0.0 seconds
INFO : OK
Error: java.io.IOException: java.lang.IllegalArgumentException
(state=,code=0)
```
hive server error log
```
2025-04-02T11:14:57,719 INFO [HiveServer2-Handler-Pool: Thread-60]:
session.SessionState (:()) - Updating thread name to
f199a7c9-401f-491a-a90e-fa01f9f6f43e HiveServer2-Handler-Pool: Thread-60
2025-04-02T11:14:57,741 INFO [f199a7c9-401f-491a-a90e-fa01f9f6f43e
HiveServer2-Handler-Pool: Thread-60]: utils.HoodieInputFormatUtils (:()) -
Reading hoodie metadata from path
hdfs://hudi01.jafron.com:8020/datalake/hudi_table_hive
2025-04-02T11:14:57,742 INFO [f199a7c9-401f-491a-a90e-fa01f9f6f43e
HiveServer2-Handler-Pool: Thread-60]: table.HoodieTableMetaClient (:()) -
Loading HoodieTableMetaClient from
hdfs://hudi01.jafron.com:8020/datalake/hudi_table_hive
2025-04-02T11:14:57,746 INFO [f199a7c9-401f-491a-a90e-fa01f9f6f43e
HiveServer2-Handler-Pool: Thread-60]: table.HoodieTableConfig (:()) - Loading
table properties from
hdfs://hudi01.jafron.com:8020/datalake/hudi_table_hive/.hoodie/hoodie.properties
2025-04-02T11:14:57,752 INFO [f199a7c9-401f-491a-a90e-fa01f9f6f43e
HiveServer2-Handler-Pool: Thread-60]: conf.HiveConf
(HiveConf.java:getLogIdVar(5037)) - Using the default value passed in for log
id: f199a7c9-401f-491a-a90e-fa01f9f6f43e
2025-04-02T11:14:57,752 INFO [f199a7c9-401f-491a-a90e-fa01f9f6f43e
HiveServer2-Handler-Pool: Thread-60]: session.SessionState (:()) - Resetting
thread name to HiveServer2-Handler-Pool: Thread-60
2025-04-02T11:14:57,755 INFO [HiveServer2-Handler-Pool: Thread-60]:
conf.HiveConf (HiveConf.java:getLogIdVar(5037)) - Using the default value
passed in for log id: f199a7c9-401f-491a-a90e-fa01f9f6f43e
2025-04-02T11:14:57,755 INFO [f199a7c9-401f-491a-a90e-fa01f9f6f43e
HiveServer2-Handler-Pool: Thread-60]: conf.HiveConf
(HiveConf.java:getLogIdVar(5037)) - Using the default value passed in for log
id: f199a7c9-401f-491a-a90e-fa01f9f6f43e
2025-04-02T11:14:57,756 INFO [HiveServer2-Handler-Pool: Thread-60]:
conf.HiveConf (HiveConf.java:getLogIdVar(5037)) - Using the default value
passed in for log id: f199a7c9-401f-491a-a90e-fa01f9f6f43e
2025-04-02T11:14:57,752 WARN [HiveServer2-Handler-Pool: Thread-60]:
thrift.ThriftCLIService (:()) - Error fetching results:
org.apache.hive.service.cli.HiveSQLException: java.io.IOException:
java.lang.IllegalArgumentException
at
org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:465)
~[hive-service-3.1.3.jar:3.1.3]
at
org.apache.hive.service.cli.operation.OperationManager.getOperationNextRowSet(OperationManager.java:309)
~[hive-service-3.1.3.jar:3.1.3]
at
org.apache.hive.service.cli.session.HiveSessionImpl.fetchResults(HiveSessionImpl.java:895)
~[hive-service-3.1.3.jar:3.1.3]
at sun.reflect.GeneratedMethodAccessor23.invoke(Unknown Source) ~[?:?]
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
~[?:1.8.0_412]
at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_412]
at
org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78)
~[hive-service-3.1.3.jar:3.1.3]
at
org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36)
~[hive-service-3.1.3.jar:3.1.3]
at
org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63)
~[hive-service-3.1.3.jar:3.1.3]
at java.security.AccessController.doPrivileged(Native Method)
~[?:1.8.0_412]
at javax.security.auth.Subject.doAs(Subject.java:422) ~[?:1.8.0_412]
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1878)
~[hadoop-common-3.3.4.jar:?]
at
org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59)
~[hive-service-3.1.3.jar:3.1.3]
at com.sun.proxy.$Proxy44.fetchResults(Unknown Source) ~[?:?]
at
org.apache.hive.service.cli.CLIService.fetchResults(CLIService.java:561)
~[hive-service-3.1.3.jar:3.1.3]
at
org.apache.hive.service.cli.thrift.ThriftCLIService.FetchResults(ThriftCLIService.java:786)
~[hive-service-3.1.3.jar:3.1.3]
at
org.apache.hive.service.rpc.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1837)
~[hive-exec-3.1.3.jar:3.1.3]
at
org.apache.hive.service.rpc.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1822)
~[hive-exec-3.1.3.jar:3.1.3]
at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
~[hive-exec-3.1.3.jar:3.1.3]
at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
~[hive-exec-3.1.3.jar:3.1.3]
at
org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge$Server$TUGIAssumingProcessor.process(HadoopThriftAuthBridge.java:647)
~[hive-exec-3.1.3.jar:3.1.3]
at
org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
~[hive-exec-3.1.3.jar:3.1.3]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
~[?:1.8.0_412]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
~[?:1.8.0_412]
at java.lang.Thread.run(Thread.java:750) [?:1.8.0_412]
Caused by: java.io.IOException: java.lang.IllegalArgumentException
at
org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:602)
~[hive-exec-3.1.3.jar:3.1.3]
at
org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:509)
~[hive-exec-3.1.3.jar:3.1.3]
at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:146)
~[hive-exec-3.1.3.jar:3.1.3]
at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2691)
~[hive-exec-3.1.3.jar:3.1.3]
at
org.apache.hadoop.hive.ql.reexec.ReExecDriver.getResults(ReExecDriver.java:229)
~[hive-exec-3.1.3.jar:3.1.3]
at
org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:460)
~[hive-service-3.1.3.jar:3.1.3]
... 24 more
Caused by: java.lang.IllegalArgumentException
at
org.apache.hudi.common.util.ValidationUtils.checkArgument(ValidationUtils.java:33)
~[hudi-hadoop-mr-bundle-0.14.1.jar:0.14.1]
at
org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion.<init>(TimelineLayoutVersion.java:40)
~[hudi-hadoop-mr-bundle-0.14.1.jar:0.14.1]
at
org.apache.hudi.common.table.HoodieTableConfig.getTimelineLayoutVersion(HoodieTableConfig.java:522)
~[hudi-hadoop-mr-bundle-0.14.1.jar:0.14.1]
at
org.apache.hudi.common.table.HoodieTableMetaClient.<init>(HoodieTableMetaClient.java:143)
~[hudi-hadoop-mr-bundle-0.14.1.jar:0.14.1]
at
org.apache.hudi.common.table.HoodieTableMetaClient.newMetaClient(HoodieTableMetaClient.java:692)
~[hudi-hadoop-mr-bundle-0.14.1.jar:0.14.1]
at
org.apache.hudi.common.table.HoodieTableMetaClient.access$000(HoodieTableMetaClient.java:85)
~[hudi-hadoop-mr-bundle-0.14.1.jar:0.14.1]
at
org.apache.hudi.common.table.HoodieTableMetaClient$Builder.build(HoodieTableMetaClient.java:774)
~[hudi-hadoop-mr-bundle-0.14.1.jar:0.14.1]
at
org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.getTableMetaClientForBasePathUnchecked(HoodieInputFormatUtils.java:374)
~[hudi-hadoop-mr-bundle-0.14.1.jar:0.14.1]
at
org.apache.hudi.hadoop.InputPathHandler.parseInputPaths(InputPathHandler.java:110)
~[hudi-hadoop-mr-bundle-0.14.1.jar:0.14.1]
at
org.apache.hudi.hadoop.InputPathHandler.<init>(InputPathHandler.java:72)
~[hudi-hadoop-mr-bundle-0.14.1.jar:0.14.1]
at
org.apache.hudi.hadoop.HoodieCopyOnWriteTableInputFormat.listStatus(HoodieCopyOnWriteTableInputFormat.java:110)
~[hudi-hadoop-mr-bundle-0.14.1.jar:0.14.1]
at
org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:332)
~[hadoop-mapreduce-client-core-3.3.4.jar:?]
at
org.apache.hudi.hadoop.HoodieParquetInputFormatBase.getSplits(HoodieParquetInputFormatBase.java:68)
~[hudi-hadoop-mr-bundle-0.14.1.jar:0.14.1]
at
org.apache.hadoop.hive.ql.exec.FetchOperator.generateWrappedSplits(FetchOperator.java:425)
~[hive-exec-3.1.3.jar:3.1.3]
at
org.apache.hadoop.hive.ql.exec.FetchOperator.getNextSplits(FetchOperator.java:395)
~[hive-exec-3.1.3.jar:3.1.3]
at
org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:314)
~[hive-exec-3.1.3.jar:3.1.3]
at
org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:540)
~[hive-exec-3.1.3.jar:3.1.3]
at
org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:509)
~[hive-exec-3.1.3.jar:3.1.3]
at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:146)
~[hive-exec-3.1.3.jar:3.1.3]
at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2691)
~[hive-exec-3.1.3.jar:3.1.3]
at
org.apache.hadoop.hive.ql.reexec.ReExecDriver.getResults(ReExecDriver.java:229)
~[hive-exec-3.1.3.jar:3.1.3]
at
org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:460)
~[hive-service-3.1.3.jar:3.1.3]
... 24 more
2025-04-02T11:14:57,755 INFO [HiveServer2-Handler-Pool: Thread-60]:
session.SessionState (:()) - Updating thread name to
f199a7c9-401f-491a-a90e-fa01f9f6f43e HiveServer2-Handler-Pool: Thread-60
2025-04-02T11:14:57,755 INFO [f199a7c9-401f-491a-a90e-fa01f9f6f43e
HiveServer2-Handler-Pool: Thread-60]: session.SessionState (:()) - Resetting
thread name to HiveServer2-Handler-Pool: Thread-60
```
check soure code in `TimelineLayoutVersion.java:40`, found property
`hoodie.timeline.layout.version` must be in [0,1], but it was 2 in
`/datalake/hudi_table_hive/.hoodie/hoodie.properties`
```
#Updated at 2025-04-02T03:24:57.110923Z
#Wed Apr 02 11:24:57 HKT 2025
hoodie.table.keygenerator.type=SIMPLE_AVRO
hoodie.table.type=COPY_ON_WRITE
hoodie.table.precombine.field=ts
hoodie.table.create.schema={"type"\:"record","name"\:"hudi_table_hive_record","namespace"\:"hoodie.hudi_table_hive","fields"\:[{"name"\:"ts","type"\:["null","long"],"default"\:null},{"name"\:"uuid","type"\:"string"},{"name"\:"rider","type"\:["null","string"],"default"\:null},{"name"\:"driver","type"\:["null","string"],"default"\:null},{"name"\:"fare","type"\:["null","double"],"default"\:null},{"name"\:"city","type"\:["null","string"],"default"\:null}]}
hoodie.timeline.layout.version=2
hoodie.timeline.history.path=history
hoodie.table.checksum=860864952
hoodie.datasource.write.drop.partition.columns=false
hoodie.record.merge.strategy.id=00000000-0000-0000-0000-000000000000
hoodie.datasource.write.hive_style_partitioning=false
hoodie.table.metadata.partitions.inflight=
hoodie.database.name=default_database
hoodie.datasource.write.partitionpath.urlencode=false
hoodie.record.merge.mode=CUSTOM
hoodie.table.version=8
hoodie.compaction.payload.class=org.apache.hudi.common.model.EventTimeAvroPayload
hoodie.table.initial.version=8
hoodie.table.metadata.partitions=column_stats,files,partition_stats
hoodie.table.partition.fields=city
hoodie.table.cdc.enabled=false
hoodie.archivelog.folder=history
hoodie.table.name=hudi_table_hive
hoodie.table.recordkey.fields=uuid
hoodie.timeline.path=timeline
hoodie.populate.meta.fields=true
```
**Environment Description**
* Hudi version : 0.14.1
* Hive version : 3.13
* Hadoop version : 3.3.4
* Storage (HDFS/S3/GCS..) : HDFS
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]