This is an automated email from the ASF dual-hosted git repository.
dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 2e093bbc8 IMPALA-13085: Add warning and NULL out DECIMAL values in
Iceberg metadata tables
2e093bbc8 is described below
commit 2e093bbc8ae06f89f17bbe57f41d5e91749572c4
Author: Daniel Becker <[email protected]>
AuthorDate: Wed May 15 15:53:22 2024 +0200
IMPALA-13085: Add warning and NULL out DECIMAL values in Iceberg metadata
tables
DECIMAL values are not supported in Iceberg metadata tables and Impala
runs on a DCHECK and crashes if it encounters one.
Until this issue is properly fixed (see IMPALA-13080), this commit
introduces a temporary solution: DECIMAL values coming from Iceberg
metadata tables are NULLed out and a warning is issued.
Testing:
- added a DECIMAL column to the 'iceberg_metadata_alltypes' test table,
so querying the `files` metadata table will include a DECIMAL in the
'readable_metrics' struct.
Change-Id: I0c8791805bc4fa2112e092e65366ca2815f3fa22
Reviewed-on: http://gerrit.cloudera.org:8080/21429
Reviewed-by: Daniel Becker <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
be/src/exec/iceberg-metadata/iceberg-row-reader.cc | 22 ++++++++++++++++++++--
be/src/exec/iceberg-metadata/iceberg-row-reader.h | 6 ++++++
.../functional/functional_schema_template.sql | 5 ++++-
.../queries/QueryTest/iceberg-metadata-tables.test | 3 +--
4 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/be/src/exec/iceberg-metadata/iceberg-row-reader.cc
b/be/src/exec/iceberg-metadata/iceberg-row-reader.cc
index 685a12518..bc42efa2a 100644
--- a/be/src/exec/iceberg-metadata/iceberg-row-reader.cc
+++ b/be/src/exec/iceberg-metadata/iceberg-row-reader.cc
@@ -33,7 +33,8 @@ namespace impala {
IcebergRowReader::IcebergRowReader(ScanNode* scan_node,
IcebergMetadataScanner* metadata_scanner)
: scan_node_(scan_node),
- metadata_scanner_(metadata_scanner) {}
+ metadata_scanner_(metadata_scanner),
+ unsupported_decimal_warning_emitted_(false) {}
Status IcebergRowReader::InitJNI() {
DCHECK(list_cl_ == nullptr) << "InitJNI() already called!";
@@ -120,7 +121,10 @@ Status IcebergRowReader::WriteSlot(JNIEnv* env, const
jobject* struct_like_row,
} case TYPE_DOUBLE: { // java.lang.Double
RETURN_IF_ERROR(WriteDoubleSlot(env, accessed_value, slot));
break;
- } case TYPE_TIMESTAMP: { // org.apache.iceberg.types.TimestampType
+ } case TYPE_DECIMAL: {
+ RETURN_IF_ERROR(WriteDecimalSlot(slot_desc, tuple, state));
+ break;
+ }case TYPE_TIMESTAMP: { // org.apache.iceberg.types.TimestampType
RETURN_IF_ERROR(WriteTimeStampSlot(env, accessed_value, slot));
break;
} case TYPE_STRING: {
@@ -220,6 +224,20 @@ Status IcebergRowReader::WriteDoubleSlot(JNIEnv* env,
const jobject &accessed_va
return Status::OK();
}
+Status IcebergRowReader::WriteDecimalSlot(const SlotDescriptor* slot_desc,
Tuple* tuple,
+ RuntimeState* state) {
+ // TODO IMPALA-13080: Handle DECIMALs without NULLing them out.
+ constexpr const char* warning = "DECIMAL values from Iceberg metadata tables
"
+ "are displayed as NULL. See IMPALA-13080.";
+ if (!unsupported_decimal_warning_emitted_) {
+ unsupported_decimal_warning_emitted_ = true;
+ LOG(WARNING) << warning;
+ state->LogError(ErrorMsg(TErrorCode::NOT_IMPLEMENTED_ERROR, warning));
+ }
+ tuple->SetNull(slot_desc->null_indicator_offset());
+ return Status::OK();
+}
+
Status IcebergRowReader::WriteTimeStampSlot(JNIEnv* env, const jobject
&accessed_value,
void* slot) {
DCHECK(accessed_value != nullptr);
diff --git a/be/src/exec/iceberg-metadata/iceberg-row-reader.h
b/be/src/exec/iceberg-metadata/iceberg-row-reader.h
index 67a51d2fe..f33638a5f 100644
--- a/be/src/exec/iceberg-metadata/iceberg-row-reader.h
+++ b/be/src/exec/iceberg-metadata/iceberg-row-reader.h
@@ -78,6 +78,10 @@ class IcebergRowReader {
/// IcebergMetadataScanner class, used to get and access values inside java
objects.
IcebergMetadataScanner* metadata_scanner_;
+ /// We want to emit a warning about DECIMAL values being NULLed out at most
once. This
+ /// member keeps track of whether the warning has already been emitted.
+ bool unsupported_decimal_warning_emitted_;
+
// Writes a Java value into the target tuple. 'struct_like_row' is only used
for struct
// types. It is needed because struct children reside directly in the parent
tuple of
// the struct.
@@ -99,6 +103,8 @@ class IcebergRowReader {
WARN_UNUSED_RESULT;
Status WriteDoubleSlot(JNIEnv* env, const jobject &accessed_value, void*
slot)
WARN_UNUSED_RESULT;
+ Status WriteDecimalSlot(const SlotDescriptor* slot_desc, Tuple* tuple,
+ RuntimeState* state) WARN_UNUSED_RESULT;
/// Iceberg TimeStamp is parsed into TimestampValue.
Status WriteTimeStampSlot(JNIEnv* env, const jobject &accessed_value, void*
slot)
WARN_UNUSED_RESULT;
diff --git a/testdata/datasets/functional/functional_schema_template.sql
b/testdata/datasets/functional/functional_schema_template.sql
index 9f4c2aa67..a380b17db 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -3905,7 +3905,7 @@ CREATE TABLE IF NOT EXISTS
{db_name}{db_suffix}.{table_name} (
dt date,
s string,
bn binary,
- -- TODO IMPALA-13080: Add decimal.
+ dc decimal,
strct struct<i: int>,
arr array<double>,
mp map<int, float>
@@ -3924,6 +3924,7 @@ INSERT INTO {db_name}{db_suffix}.{table_name} VALUES (
to_date("2024-05-14"),
"Some string",
"bin1",
+ 15.48,
named_struct("i", 10),
array(cast(10.0 as double), cast(20.0 as double)),
map(10, cast(10.0 as float), 100, cast(100.0 as float))
@@ -3938,6 +3939,7 @@ INSERT INTO {db_name}{db_suffix}.{table_name} VALUES (
to_date("2025-06-15"),
"A string",
NULL,
+ 5.8,
named_struct("i", -150),
array(cast(-10.0 as double), cast(-2e100 as double)),
map(10, cast(0.5 as float), 101, cast(1e3 as float))
@@ -3952,6 +3954,7 @@ INSERT INTO {db_name}{db_suffix}.{table_name} VALUES (
NULL,
NULL,
"bin2",
+ NULL,
named_struct("i", -150),
array(cast(-12.0 as double), cast(-2e100 as double)),
map(10, cast(0.5 as float), 101, cast(1e3 as float))
diff --git
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
index cca52f8af..cad619500 100644
---
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
+++
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
@@ -834,12 +834,11 @@ STRING,DATE,DATE
# Query the `files` metadata table of a table that contains all types -
because of lower
# and upper bounds, the 'readable_metrics' struct of the metadata table will
also contain
# all types.
-# TODO IMPALA-13080: Add DECIMAL.
####
---- QUERY
select readable_metrics from
functional_parquet.iceberg_metadata_alltypes.`files`;
---- RESULTS
-regex:'{"arr.element":{"column_size":\d+,"value_count":6,"null_value_count":0,"nan_value_count":0,"lower_bound":-2e\+100,"upper_bound":20},"b":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":false,"upper_bound":true},"bn":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"YmluMQ==","upper_bound":"YmluMg=="},"d":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":1,"lower_bound":-
[...]
+regex:'{"arr.element":{"column_size":\d+,"value_count":6,"null_value_count":0,"nan_value_count":0,"lower_bound":-2e\+100,"upper_bound":20},"b":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":false,"upper_bound":true},"bn":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"YmluMQ==","upper_bound":"YmluMg=="},"d":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":1,"lower_bound":-
[...]
---- TYPES
STRING
====