zhangbutao created HIVE-27869:
---------------------------------
Summary: Iceberg: Select HadoopTables will fail at
HiveIcebergStorageHandler::canProvideColStats
Key: HIVE-27869
URL: https://issues.apache.org/jira/browse/HIVE-27869
Project: Hive
Issue Type: Improvement
Components: Iceberg integration
Reporter: zhangbutao
Step to reproduce:
1) Create path-based HadoopTable by Spark:
{code:java}
./spark-3.3.1-bin-hadoop3/bin/spark-sql \--master local \--deploy-mode client
\--conf
spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
\--conf
spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog
\--conf spark.sql.catalog.spark_catalog.type=hadoop \--conf
spark.sql.catalog.spark_catalog.warehouse=hdfs://localhost:8028/tmp/testiceberg;
create table ice_test_001(id int) using iceberg;insert into ice_test_001(id)
values(1),2),(3);{code}
2) Create iceberg table based on the HadoopTable by Hive:
{code:java}
CREATE EXTERNAL TABLE ice_test_001STORED BY
'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' LOCATION
'hdfs://localhost:8028/tmp/testiceberg/default/ice_test_001'TBLPROPERTIES
('iceberg.catalog'='location_based_table'); {code}
3)Select the HadoopTable by Hive
*set hive.fetch.task.conversion=none;*
{code:java}
jdbc:hive2://localhost:10004/default> select * from testicedb118.ice_test_001;
Error: Error while compiling statement: FAILED: IllegalArgumentException
Pathname
/tmp/testiceberg/default/ice_test_001/stats/hdfs:/localhost:8028/tmp/testiceberg/default/ice_test_0018020750642632422610
from
hdfs://localhost:8028/tmp/testiceberg/default/ice_test_001/stats/hdfs:/localhost:8028/tmp/testiceberg/default/ice_test_0018020750642632422610
is not a valid DFS filename. (state=42000,code=40000) {code}
Full stacktrace:
{code:java}
Caused by: java.lang.IllegalArgumentException: Pathname
/tmp/testiceberg/default/ice_test_001/stats/hdfs:/localhost:8028/tmp/testiceberg/default/ice_test_0018020750642632422610
from
hdfs://localhost:8028/tmp/testiceberg/default/ice_test_001/stats/hdfs:/localhost:8028/tmp/testiceberg/default/ice_test_0018020750642632422610
is not a valid DFS filename.
at
org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:256)
~[hadoop-hdfs-client-3.3.1.jar:?]
at
org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1752)
~[hadoop-hdfs-client-3.3.1.jar:?]
at
org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1749)
~[hadoop-hdfs-client-3.3.1.jar:?]
at
org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
~[hadoop-common-3.3.1.jar:?]
at
org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1764)
~[hadoop-hdfs-client-3.3.1.jar:?]
at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:1760)
~[hadoop-common-3.3.1.jar:?]
at
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler.canProvideColStats(HiveIcebergStorageHandler.java:540)
~[hive-iceberg-handler-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler.canProvideColStatistics(HiveIcebergStorageHandler.java:533)
~[hive-iceberg-handler-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.stats.StatsUtils.getTableColumnStats(StatsUtils.java:1073)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:302)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:193)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:181)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$TableScanStatsRule.process(StatsRulesProcFactory.java:173)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.lib.LevelOrderWalker.walk(LevelOrderWalker.java:148)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.lib.LevelOrderWalker.startWalking(LevelOrderWalker.java:125)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics.transform(AnnotateWithStatistics.java:84)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.parse.TezCompiler.runStatsAnnotation(TezCompiler.java:466)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:204)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:181)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.compilePlan(SemanticAnalyzer.java:13053)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:13271)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12627)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:327)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:224)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:107)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:519)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:471)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:436)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:430)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:121)
~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
at
org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:207)
~[hive-service-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
... 27 more {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)