This is an automated email from the ASF dual-hosted git repository.
kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 63a37b1b Add Column Name to the Error Message in StatsAggregator
(#2190)
63a37b1b is described below
commit 63a37b1bdf67af15188ac3a27915e6865222648f
Author: Peng-Jui Wang <[email protected]>
AuthorDate: Tue Jul 22 11:20:19 2025 -0700
Add Column Name to the Error Message in StatsAggregator (#2190)
<!--
Thanks for opening a pull request!
-->
<!-- In the case this PR will resolve an issue, please replace
${GITHUB_ISSUE_ID} below with the actual Github issue id. -->
Closes #2017
# Rationale for this change
Include the column name in the error message to make it more
descriptive.
# Are these changes tested?
# Are there any user-facing changes?
<!-- In the case of user-facing changes, please add the changelog label.
-->
---
pyiceberg/io/pyarrow.py | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 1d7db412..27973710 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -2410,9 +2410,12 @@ def data_file_statistics_from_parquet_metadata(
continue
if field_id not in col_aggs:
- col_aggs[field_id] = StatsAggregator(
- stats_col.iceberg_type, statistics.physical_type,
stats_col.mode.length
- )
+ try:
+ col_aggs[field_id] = StatsAggregator(
+ stats_col.iceberg_type,
statistics.physical_type, stats_col.mode.length
+ )
+ except ValueError as e:
+ raise ValueError(f"{e} for column
'{stats_col.column_name}'") from e
if isinstance(stats_col.iceberg_type, DecimalType) and
statistics.physical_type != "FIXED_LEN_BYTE_ARRAY":
scale = stats_col.iceberg_type.scale