[ 
https://issues.apache.org/jira/browse/HIVE-29361?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Denys Kuzmenko updated HIVE-29361:
----------------------------------
    Assignee: Denys Kuzmenko
      Status: Patch Available  (was: Open)

> 'analyze table compute statistics for columns' on partitioned Iceberg tables 
> fails with NullPointerException
> ------------------------------------------------------------------------------------------------------------
>
>                 Key: HIVE-29361
>                 URL: https://issues.apache.org/jira/browse/HIVE-29361
>             Project: Hive
>          Issue Type: Bug
>    Affects Versions: 4.2.0
>            Reporter: Sungwoo Park
>            Assignee: Denys Kuzmenko
>            Priority: Major
>              Labels: pull-request-available
>
> I found an error (NullPointerException) while updating statistics of TPC-DS 
> datasets stored on Iceberg.
> These are steps taken to produce the error:
> 1. Load partitioned Iceberg tables, e.g.:
> create table catalog_returns
> ( cr_returned_time_sk bigint, cr_item_sk bigint, cr_refunded_customer_sk 
> bigint, cr_refunded_cdemo_sk bigint, cr_refunded_hdemo_sk bigint, 
> cr_refunded_addr_sk bigint, cr_returning_customer_sk bigint, 
> cr_returning_cdemo_sk bigint, cr_returning_hdemo_sk bigint, 
> cr_returning_addr_sk bigint, cr_call_center_sk bigint, cr_catalog_page_sk 
> bigint, cr_ship_mode_sk bigint, cr_warehouse_sk bigint, cr_reason_sk bigint, 
> cr_order_number bigint, cr_return_quantity int, cr_return_amount double, 
> cr_return_tax double, cr_return_amt_inc_tax double, cr_fee double, 
> cr_return_ship_cost double, cr_refunded_cash double, cr_reversed_charge 
> double, cr_store_credit double, cr_net_loss double)
> partitioned by (cr_returned_date_sk bigint)
> STORED BY ICEBERG
> stored as orc tblproperties ("orc.compress"="SNAPPY");
> insert overwrite table catalog_returns select * from 
> tpcds_bin_partitioned_orc_10000.catalog_returns;
> Loading Iceberg tables works okay, and I can run simple TPC-DS queries like 
> query 12.
> 2. Compute statistics by executing, e.g.:
> analyze table catalog_returns compute statistics for columns;
> Computing statistics seems okay, but updating statistics fails with the 
> following stack trace.
> ...
> 2025-12-07T17:52:57,450  INFO [HiveServer2-Background-Pool: Thread-277] 
> stats.BasicStatsTask: Partition {cr_returned_date_sk=2452924} stats: 
> [numFiles=1, numRows=25, totalSize=6266]
> 2025-12-07T17:52:57,665  INFO [HiveServer2-Background-Pool: Thread-277] 
> stats.BasicStatsTask: [Warning] could not update stats.Failed with exception 
> Unable to alter partition. java.lang.NullPointerException: Cannot invoke 
> "java.util.List.size()" because "vals" is null
> org.apache.hadoop.hive.ql.metadata.HiveException: Unable to alter partition. 
> java.lang.NullPointerException: Cannot invoke "java.util.List.size()" because 
> "vals" is null
>   at org.apache.hadoop.hive.ql.metadata.Hive.alterPartitions(Hive.java:1218)
>   at 
> org.apache.hadoop.hive.ql.stats.BasicStatsTask.aggregateStats(BasicStatsTask.java:406)
>   at 
> org.apache.hadoop.hive.ql.stats.BasicStatsTask.process(BasicStatsTask.java:108)
>   at org.apache.hadoop.hive.ql.exec.StatsTask.execute(StatsTask.java:111)
>   at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214)
>   at 
> org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
>   at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:354)
>   at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:327)
>   at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:244)
>   at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:105)
>   at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:347)
>   at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:191)
>   at org.apache.hadoop.hive.ql.Driver.run(Driver.java:144)
>   at org.apache.hadoop.hive.ql.Driver.run(Driver.java:139)
>   at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:190)
>   at 
> org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:234)
>   at 
> org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:334)
>   at 
> java.base/java.security.AccessController.doPrivileged(AccessController.java:714)
>   at java.base/javax.security.auth.Subject.doAs(Subject.java:525)
>   at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1953)
>   at 
> org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:354)
>   at 
> java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572)
>   at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)
>   at 
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
>   at 
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
>   at java.base/java.lang.Thread.run(Thread.java:1570)
> Caused by: MetaException(message:java.lang.NullPointerException: Cannot 
> invoke "java.util.List.size()" because "vals" is null)
>   at 
> org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$alter_partitions_req_result$alter_partitions_req_resultStandardScheme.read(ThriftHiveMetastore.java)
>   at 
> org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$alter_partitions_req_result$alter_partitions_req_resultStandardScheme.read(ThriftHiveMetastore.java)
>   at 
> org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$alter_partitions_req_result.read(ThriftHiveMetastore.java)
>   at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:88)
>   at 
> org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_alter_partitions_req(ThriftHiveMetastore.java:4625)
>   at 
> org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.alter_partitions_req(ThriftHiveMetastore.java:4612)
>   at 
> org.apache.hadoop.hive.metastore.client.ThriftHiveMetaStoreClient.alter_partitions(ThriftHiveMetaStoreClient.java:2382)
>   at 
> org.apache.hadoop.hive.metastore.client.MetaStoreClientWrapper.alter_partitions(MetaStoreClientWrapper.java:530)
>   at 
> org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.alter_partitions(SessionHiveMetaStoreClient.java:1689)
>   at 
> org.apache.hadoop.hive.metastore.client.MetaStoreClientWrapper.alter_partitions(MetaStoreClientWrapper.java:530)
>   at 
> java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103)
>   at java.base/java.lang.reflect.Method.invoke(Method.java:580)
>   at 
> org.apache.hadoop.hive.metastore.client.SynchronizedMetaStoreClient$SynchronizedHandler.invoke(SynchronizedMetaStoreClient.java:69)
>   at jdk.proxy2/jdk.proxy2.$Proxy32.alter_partitions(Unknown Source)
>   at 
> org.apache.hadoop.hive.metastore.client.MetaStoreClientWrapper.alter_partitions(MetaStoreClientWrapper.java:530)
>   at 
> org.apache.hadoop.hive.metastore.client.BaseMetaStoreClient.alter_partitions(BaseMetaStoreClient.java:620)
>   at 
> java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103)
>   at java.base/java.lang.reflect.Method.invoke(Method.java:580)
>   at 
> org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:232)
>   at jdk.proxy2/jdk.proxy2.$Proxy32.alter_partitions(Unknown Source)
>   at org.apache.hadoop.hive.ql.metadata.Hive.alterPartitions(Hive.java:1214)
>   ... 25 more
> Tested with Hive 4.2.0, Tez 0.10.5, Java 21.
> For Iceberg, I used default values for most configuration keys (with 
> hive.iceberg.stats.source=iceberg).



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to