[
https://issues.apache.org/jira/browse/HIVE-27869?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
ASF GitHub Bot updated HIVE-27869:
----------------------------------
Labels: pull-request-available (was: )
> Iceberg: Select HadoopTables will fail at
> HiveIcebergStorageHandler::canProvideColStats
> ----------------------------------------------------------------------------------------
>
> Key: HIVE-27869
> URL: https://issues.apache.org/jira/browse/HIVE-27869
> Project: Hive
> Issue Type: Improvement
> Components: Iceberg integration
> Reporter: zhangbutao
> Assignee: zhangbutao
> Priority: Major
> Labels: pull-request-available
>
> Step to reproduce:(latest master code)
> 1) Create path-based HadoopTable by Spark:
>
> {code:java}
> ./spark-3.3.1-bin-hadoop3/bin/spark-sql \--master local \--deploy-mode client
> \--conf
> spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
> \--conf
> spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog
> \--conf spark.sql.catalog.spark_catalog.type=hadoop \--conf
> spark.sql.catalog.spark_catalog.warehouse=hdfs://localhost:8028/tmp/testiceberg;
> create table ice_test_001(id int) using iceberg;
> insert into ice_test_001(id) values(1),(2),(3);{code}
>
> 2) Create iceberg table based on the HadoopTable by Hive:
> {code:java}
> CREATE EXTERNAL TABLE ice_test_001 STORED BY
> 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' LOCATION
> 'hdfs://localhost:8028/tmp/testiceberg/default/ice_test_001' TBLPROPERTIES
> ('iceberg.catalog'='location_based_table'); {code}
> 3)Select the HadoopTable by Hive
> // launch tez task to scan data
> *set hive.fetch.task.conversion=none;*
> {code:java}
> jdbc:hive2://localhost:10000/default> select * from ice_test_001;
> Error: Error while compiling statement: FAILED: IllegalArgumentException
> Pathname
> /tmp/testiceberg/default/ice_test_001/stats/hdfs:/localhost:8028/tmp/testiceberg/default/ice_test_0018020750642632422610
> from
> hdfs://localhost:8028/tmp/testiceberg/default/ice_test_001/stats/hdfs:/localhost:8028/tmp/testiceberg/default/ice_test_0018020750642632422610
> is not a valid DFS filename. (state=42000,code=40000) {code}
> Full stacktrace:
> {code:java}
> Caused by: java.lang.IllegalArgumentException: Pathname
> /tmp/testiceberg/default/ice_test_001/stats/hdfs:/localhost:8028/tmp/testiceberg/default/ice_test_0018020750642632422610
> from
> hdfs://localhost:8028/tmp/testiceberg/default/ice_test_001/stats/hdfs:/localhost:8028/tmp/testiceberg/default/ice_test_0018020750642632422610
> is not a valid DFS filename.
> at
> org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:256)
> ~[hadoop-hdfs-client-3.3.1.jar:?]
> at
> org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1752)
> ~[hadoop-hdfs-client-3.3.1.jar:?]
> at
> org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1749)
> ~[hadoop-hdfs-client-3.3.1.jar:?]
> at
> org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
> ~[hadoop-common-3.3.1.jar:?]
> at
> org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1764)
> ~[hadoop-hdfs-client-3.3.1.jar:?]
> at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:1760)
> ~[hadoop-common-3.3.1.jar:?]
> at
> org.apache.iceberg.mr.hive.HiveIcebergStorageHandler.canProvideColStats(HiveIcebergStorageHandler.java:540)
> ~[hive-iceberg-handler-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.iceberg.mr.hive.HiveIcebergStorageHandler.canProvideColStatistics(HiveIcebergStorageHandler.java:533)
> ~[hive-iceberg-handler-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.stats.StatsUtils.getTableColumnStats(StatsUtils.java:1073)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:302)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:193)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:181)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$TableScanStatsRule.process(StatsRulesProcFactory.java:173)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.lib.LevelOrderWalker.walk(LevelOrderWalker.java:148)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.lib.LevelOrderWalker.startWalking(LevelOrderWalker.java:125)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics.transform(AnnotateWithStatistics.java:84)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.parse.TezCompiler.runStatsAnnotation(TezCompiler.java:466)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:204)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:181)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.compilePlan(SemanticAnalyzer.java:13053)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:13271)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12627)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:327)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:224)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:107)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:519)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:471)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:436)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:430)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:121)
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> at
> org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:207)
> ~[hive-service-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
> ... 27 more {code}
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)