codejoyan opened a new issue #3514:
URL: https://github.com/apache/hudi/issues/3514
I have written a Hudi COW dataset and synced it to metastore. Post that I am
trying to read the hudi dataset from beeline.
I have added the hudi-hadoop-mr jar and also set the properties
**hive.input.format** and **hive.tez.input.format**. Though I am able to show
create table and see the table created properly, I am unable to query the data.
It throws the exception as below: "java.sql.SQLException: Error retrieving next
row".Please help!!
Write
====
df.write.format("org.apache.hudi").
options(getQuickstartWriteConfigs).
option(PRECOMBINE_FIELD_OPT_KEY, "ts").
option(RECORDKEY_FIELD_OPT_KEY, "uuid").
option(PARTITIONPATH_FIELD_OPT_KEY, "continent,country,city").
option(TABLE_NAME, tableName).
option(DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY, "true").
option(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY, "findl_hudi_poc").
option(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY, "hudi_trips_cow").
option(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY,
"org.apache.hudi.keygen.ComplexKeyGenerator").
option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY,
"continent,country,city").
option(DataSourceWriteOptions.HIVE_URL_OPT_KEY, "gtphudidp00-m-0:9083").
option(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY,
classOf[MultiPartKeysValueExtractor].getName).
option(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY, "true").
mode(Overwrite).
save(basePath)
Read
====
0: jdbc:hive2://finstrnhwa9-0.us-east4.us.wal> show create table
findl_hudi_poc.hudi_trips_cow;
Getting log thread is interrupted, since query is done!
+-------------------------------------------------------------------------+--+
| createtab_stmt |
+-------------------------------------------------------------------------+--+
| CREATE EXTERNAL TABLE `findl_hudi_poc.hudi_trips_cow`( |
| `_hoodie_commit_time` string, |
| `_hoodie_commit_seqno` string, |
| `_hoodie_record_key` string, |
| `_hoodie_partition_path` string, |
| `_hoodie_file_name` string, |
| `begin_lat` string, |
| `begin_lon` string, |
| `driver` string, |
| `end_lat` string, |
| `end_lon` string, |
| `fare` string, |
| `rider` string, |
| `ts` string, |
| `uuid` string) |
| PARTITIONED BY ( |
| `continent` string, |
| `country` string, |
| `city` string) |
| ROW FORMAT SERDE |
| 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' |
| STORED AS INPUTFORMAT |
| 'org.apache.hudi.hadoop.HoodieParquetInputFormat' |
| OUTPUTFORMAT |
| 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' |
| LOCATION |
| 'gs://hudi-storage3/tmp/hudi_trips_cow' |
| TBLPROPERTIES ( |
| 'last_commit_time_sync'='20210819175430', |
| 'transient_lastDdlTime'='1629393099') |
+-------------------------------------------------------------------------+--+
30 rows selected (1.68 seconds)
0: jdbc:hive2://sample-host> add jar
hdfs:///tmp/j0s0j7j/hudi-hadoop-mr-bundle-0.7.0.jar;
INFO : converting to local
hdfs:///tmp/j0s0j7j/hudi-hadoop-mr-bundle-0.7.0.jar
INFO : Added
[/tmp/61f05ef2-b200-4d62-8801-1f2e337cdf54_resources/hudi-hadoop-mr-bundle-0.7.0.jar]
to class path
INFO : Added resources:
[hdfs:///tmp/j0s0j7j/hudi-hadoop-mr-0.9.0-SNAPSHOT.jar]
No rows affected (0.254 seconds)
0: jdbc:hive2://sample-host> set
hive.input.format="org.apache.hudi.hadoop.HoodieParquetInputFormat";
No rows affected (0.007 seconds)
0: jdbc:hive2://sample-host> set
hive.tez.input.format="org.apache.hadoop.hive.ql.io.HiveInputFormat";
No rows affected (0.006 seconds)
0: jdbc:hive2://finstrnhwa9-0.us-east4.us.wal> select * from
findl_hudi_poc.hudi_trips_cow;
Getting log thread is interrupted, since query is done!
org.apache.thrift.transport.TTransportException: HTTP Response code: 500
at
org.apache.thrift.transport.THttpClient.flushUsingHttpClient(THttpClient.java:262)
at org.apache.thrift.transport.THttpClient.flush(THttpClient.java:313)
at org.apache.thrift.TServiceClient.sendBase(TServiceClient.java:73)
at org.apache.thrift.TServiceClient.sendBase(TServiceClient.java:62)
at
org.apache.hive.service.cli.thrift.TCLIService$Client.send_FetchResults(TCLIService.java:503)
at
org.apache.hive.service.cli.thrift.TCLIService$Client.FetchResults(TCLIService.java:495)
at sun.reflect.GeneratedMethodAccessor2.invoke(Unknown Source)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.hive.jdbc.HiveConnection$SynchronizedHandler.invoke(HiveConnection.java:1443)
at com.sun.proxy.$Proxy4.FetchResults(Unknown Source)
at
org.apache.hive.jdbc.HiveQueryResultSet.next(HiveQueryResultSet.java:372)
at org.apache.hive.beeline.BufferedRows.<init>(BufferedRows.java:52)
at
org.apache.hive.beeline.IncrementalRowsWithNormalization.<init>(IncrementalRowsWithNormalization.java:50)
at org.apache.hive.beeline.BeeLine.print(BeeLine.java:1820)
at org.apache.hive.beeline.Commands.execute(Commands.java:883)
at org.apache.hive.beeline.Commands.sql(Commands.java:729)
at org.apache.hive.beeline.BeeLine.dispatch(BeeLine.java:1000)
at org.apache.hive.beeline.BeeLine.execute(BeeLine.java:835)
at org.apache.hive.beeline.BeeLine.begin(BeeLine.java:793)
at
org.apache.hive.beeline.BeeLine.mainWithInputRedirection(BeeLine.java:493)
at org.apache.hive.beeline.BeeLine.main(BeeLine.java:476)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.RunJar.run(RunJar.java:233)
at org.apache.hadoop.util.RunJar.main(RunJar.java:148)
Error: Error retrieving next row (state=,code=0)
java.sql.SQLException: Error retrieving next row
at
org.apache.hive.jdbc.HiveQueryResultSet.next(HiveQueryResultSet.java:396)
at org.apache.hive.beeline.BufferedRows.<init>(BufferedRows.java:52)
at
org.apache.hive.beeline.IncrementalRowsWithNormalization.<init>(IncrementalRowsWithNormalization.java:50)
at org.apache.hive.beeline.BeeLine.print(BeeLine.java:1820)
at org.apache.hive.beeline.Commands.execute(Commands.java:883)
at org.apache.hive.beeline.Commands.sql(Commands.java:729)
at org.apache.hive.beeline.BeeLine.dispatch(BeeLine.java:1000)
at org.apache.hive.beeline.BeeLine.execute(BeeLine.java:835)
at org.apache.hive.beeline.BeeLine.begin(BeeLine.java:793)
at
org.apache.hive.beeline.BeeLine.mainWithInputRedirection(BeeLine.java:493)
at org.apache.hive.beeline.BeeLine.main(BeeLine.java:476)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.RunJar.run(RunJar.java:233)
at org.apache.hadoop.util.RunJar.main(RunJar.java:148)
Caused by: org.apache.thrift.transport.TTransportException: HTTP Response
code: 500
at
org.apache.thrift.transport.THttpClient.flushUsingHttpClient(THttpClient.java:262)
at org.apache.thrift.transport.THttpClient.flush(THttpClient.java:313)
at org.apache.thrift.TServiceClient.sendBase(TServiceClient.java:73)
at org.apache.thrift.TServiceClient.sendBase(TServiceClient.java:62)
at
org.apache.hive.service.cli.thrift.TCLIService$Client.send_FetchResults(TCLIService.java:503)
at
org.apache.hive.service.cli.thrift.TCLIService$Client.FetchResults(TCLIService.java:495)
at sun.reflect.GeneratedMethodAccessor2.invoke(Unknown Source)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.hive.jdbc.HiveConnection$SynchronizedHandler.invoke(HiveConnection.java:1443)
at com.sun.proxy.$Proxy4.FetchResults(Unknown Source)
at
org.apache.hive.jdbc.HiveQueryResultSet.next(HiveQueryResultSet.java:372)
... 16 more
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]