This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new f2f0d798d IMPALA-12996: Add support for DATE in Iceberg metadata tables
f2f0d798d is described below
commit f2f0d798dfeabd7b4580b8ccae1c300906f0ddc7
Author: Daniel Becker <[email protected]>
AuthorDate: Fri Apr 12 12:40:50 2024 +0200
IMPALA-12996: Add support for DATE in Iceberg metadata tables
DATE fields in Iceberg metadata tables were NULLed out before this
change. This change adds support for displaying their actual values.
DATE fields are stored as 32-bit integers (storing the number of days
since the epoch), so they are handled similarly to INTS, but if they are
out of the valid DATE range, their value is set to
DateValue::INVALID_DAYS_SINCE_EPOCH.
Tests:
- added a test query and adjusted existing ones in
iceberg-metadata-tables.test
Change-Id: Ib2223385f90555b1f9b22f3e27fa0e2489c3b9b5
Reviewed-on: http://gerrit.cloudera.org:8080/21292
Reviewed-by: Csaba Ringhofer <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
be/src/exec/iceberg-metadata/iceberg-row-reader.cc | 31 ++++++++++++++++++----
be/src/exec/iceberg-metadata/iceberg-row-reader.h | 7 +++++
.../queries/QueryTest/iceberg-metadata-tables.test | 24 +++++++++++++----
3 files changed, 52 insertions(+), 10 deletions(-)
diff --git a/be/src/exec/iceberg-metadata/iceberg-row-reader.cc
b/be/src/exec/iceberg-metadata/iceberg-row-reader.cc
index ea7f20532..4b32feb25 100644
--- a/be/src/exec/iceberg-metadata/iceberg-row-reader.cc
+++ b/be/src/exec/iceberg-metadata/iceberg-row-reader.cc
@@ -94,6 +94,9 @@ Status IcebergRowReader::WriteSlot(JNIEnv* env, const
jobject* struct_like_row,
case TYPE_BOOLEAN: { // java.lang.Boolean
RETURN_IF_ERROR(WriteBooleanSlot(env, accessed_value, slot));
break;
+ } case TYPE_DATE: { // java.lang.Integer
+ RETURN_IF_ERROR(WriteDateSlot(env, accessed_value, slot));
+ break;
} case TYPE_INT: { // java.lang.Integer
RETURN_IF_ERROR(WriteIntSlot(env, accessed_value, slot));
break;
@@ -143,13 +146,30 @@ Status IcebergRowReader::WriteBooleanSlot(JNIEnv* env,
const jobject &accessed_v
return Status::OK();
}
+Status IcebergRowReader::WriteDateSlot(JNIEnv* env, const jobject
&accessed_value,
+ void* slot) {
+ int32_t days_since_epoch;
+ RETURN_IF_ERROR(ExtractJavaInteger(env, accessed_value, &days_since_epoch));
+
+ // This will set the value to DateValue::INVALID_DAYS_SINCE_EPOCH if it is
out of range.
+ DateValue result(days_since_epoch);
+ *reinterpret_cast<int32_t*>(slot) = result.Value();
+ return Status::OK();
+}
+
Status IcebergRowReader::WriteIntSlot(JNIEnv* env, const jobject
&accessed_value,
void* slot) {
- DCHECK(accessed_value != nullptr);
- DCHECK(env->IsInstanceOf(accessed_value, integer_cl_) == JNI_TRUE);
- jint result = env->CallIntMethod(accessed_value, integer_value_);
+ return ExtractJavaInteger(env, accessed_value,
reinterpret_cast<int32_t*>(slot));
+}
+
+Status IcebergRowReader::ExtractJavaInteger(JNIEnv* env, const jobject&
jinteger,
+ int32_t* res) {
+ DCHECK(jinteger != nullptr);
+ DCHECK(env->IsInstanceOf(jinteger, integer_cl_) == JNI_TRUE);
+ jint result = env->CallIntMethod(jinteger, integer_value_);
RETURN_ERROR_IF_EXC(env);
- *reinterpret_cast<int32_t*>(slot) = reinterpret_cast<int32_t>(result);
+
+ *res = reinterpret_cast<int32_t>(result);
return Status::OK();
}
@@ -321,7 +341,8 @@ jclass IcebergRowReader::JavaClassFromImpalaType(const
ColumnType type) {
switch (type.type) {
case TYPE_BOOLEAN: { // java.lang.Boolean
return boolean_cl_;
- } case TYPE_INT: { // java.lang.Integer
+ } case TYPE_DATE:
+ case TYPE_INT: { // java.lang.Integer
return integer_cl_;
} case TYPE_BIGINT: // java.lang.Long
case TYPE_TIMESTAMP: { // org.apache.iceberg.types.TimestampType
diff --git a/be/src/exec/iceberg-metadata/iceberg-row-reader.h
b/be/src/exec/iceberg-metadata/iceberg-row-reader.h
index 64de2cc86..f9f665499 100644
--- a/be/src/exec/iceberg-metadata/iceberg-row-reader.h
+++ b/be/src/exec/iceberg-metadata/iceberg-row-reader.h
@@ -85,6 +85,8 @@ class IcebergRowReader {
/// into the target slot.
Status WriteBooleanSlot(JNIEnv* env, const jobject &accessed_value, void*
slot)
WARN_UNUSED_RESULT;
+ Status WriteDateSlot(JNIEnv* env, const jobject &accessed_value, void* slot)
+ WARN_UNUSED_RESULT;
Status WriteIntSlot(JNIEnv* env, const jobject &accessed_value, void* slot)
WARN_UNUSED_RESULT;
Status WriteLongSlot(JNIEnv* env, const jobject &accessed_value, void* slot)
@@ -113,6 +115,11 @@ class IcebergRowReader {
const TupleDescriptor* item_tuple_desc, Tuple* tuple,
MemPool* tuple_data_pool_collection, RuntimeState* state)
WARN_UNUSED_RESULT;
+ /// Extracts an int32_t from a Java integer object and writes it into 'res'.
'jinteger'
+ /// must not be null and of type java.lang.Integer.
+ Status ExtractJavaInteger(JNIEnv* env, const jobject &jinteger, int32_t* res)
+ WARN_UNUSED_RESULT;
+
/// Helper method that gives back the Iceberg Java class for a ColumnType.
It is
/// specified in this class, to avoid defining all the Java type classes in
other
/// classes.
diff --git
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
index 5b562c0e9..eb4698eae 100644
---
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
+++
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
@@ -802,18 +802,32 @@
row_regex:'{"content":1,"file_path":".*/test-warehouse/iceberg_test/hadoop_catal
STRING
====
---- QUERY
-set EXPAND_COMPLEX_TYPES=1;
select * from functional_parquet.iceberg_v2_delete_both_eq_and_pos.all_files;
---- RESULTS
1,'/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_both_eq_and_pos/data/delete-074a9e19e61b766e-652a169e00000001_800513971_data.0.parq','PARQUET',0,1,1606,'{2147483546:215,2147483545:51}','{2147483546:1,2147483545:1}','{2147483546:0,2147483545:0}','NULL','{2147483546:null,2147483545:null}','{2147483546:null,2147483545:null}','NULL','NULL','NULL',NULL,'{"d":{"column_size":null,"value_count":null,"null_value_count":null,"nan_value_count":null,"lower_bound":null,"upper_bou
[...]
-2,'/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_both_eq_and_pos/data/00000-0-38a471ff-46f4-4350-85cc-2e7ba946b34c-00002.parquet','PARQUET',0,2,697,'{1:40,3:66}','{1:2,3:2}','{1:0,3:0}','{}','{1:null,3:null}','{1:null,3:null}','NULL','[4]','[1,3]',0,'{"d":{"column_size":66,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"i":{"column_size":40,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":1,"
[...]
-0,'/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_both_eq_and_pos/data/00000-0-72709aba-fb15-4bd6-9758-5f39eb9bdcb7-00001.parquet','PARQUET',0,2,885,'{1:40,2:62,3:40}','{1:2,2:2,3:2}','{1:0,2:0,3:0}','{}','{1:null,2:null,3:null}','{1:null,2:null,3:null}','NULL','[4]','NULL',0,'{"d":{"column_size":40,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"i":{"column_size":40,"value_count":2,"null_value_count":0,"nan_value_cou
[...]
-0,'/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_both_eq_and_pos/data/00000-0-38a471ff-46f4-4350-85cc-2e7ba946b34c-00001.parquet','PARQUET',0,2,898,'{1:40,2:54,3:66}','{1:2,2:2,3:2}','{1:0,2:0,3:0}','{}','{1:null,2:null,3:null}','{1:null,2:null,3:null}','NULL','[4]','NULL',0,'{"d":{"column_size":66,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"i":{"column_size":40,"value_count":2,"null_value_count":0,"nan_value_cou
[...]
-2,'/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_both_eq_and_pos/data/00000-0-72709aba-fb15-4bd6-9758-5f39eb9bdcb7-00002.parquet','PARQUET',0,2,657,'{1:40,3:40}','{1:2,3:2}','{1:0,3:0}','{}','{1:null,3:null}','{1:null,3:null}','NULL','[4]','[1,3]',0,'{"d":{"column_size":40,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"i":{"column_size":40,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":2,"
[...]
+2,'/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_both_eq_and_pos/data/00000-0-38a471ff-46f4-4350-85cc-2e7ba946b34c-00002.parquet','PARQUET',0,2,697,'{1:40,3:66}','{1:2,3:2}','{1:0,3:0}','{}','{1:null,3:null}','{1:null,3:null}','NULL','[4]','[1,3]',0,'{"d":{"column_size":66,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":"2023-12-13","upper_bound":"2023-12-13"},"i":{"column_size":40,"value_count":2,"null_value_count":0,"nan_value_count":null,"
[...]
+0,'/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_both_eq_and_pos/data/00000-0-72709aba-fb15-4bd6-9758-5f39eb9bdcb7-00001.parquet','PARQUET',0,2,885,'{1:40,2:62,3:40}','{1:2,2:2,3:2}','{1:0,2:0,3:0}','{}','{1:null,2:null,3:null}','{1:null,2:null,3:null}','NULL','[4]','NULL',0,'{"d":{"column_size":40,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":"2023-12-13","upper_bound":"2023-12-23"},"i":{"column_size":40,"value_count":2,"null_value_count":
[...]
+0,'/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_both_eq_and_pos/data/00000-0-38a471ff-46f4-4350-85cc-2e7ba946b34c-00001.parquet','PARQUET',0,2,898,'{1:40,2:54,3:66}','{1:2,2:2,3:2}','{1:0,2:0,3:0}','{}','{1:null,2:null,3:null}','{1:null,2:null,3:null}','NULL','[4]','NULL',0,'{"d":{"column_size":66,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":"2023-12-13","upper_bound":"2023-12-13"},"i":{"column_size":40,"value_count":2,"null_value_count":
[...]
+2,'/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_both_eq_and_pos/data/00000-0-72709aba-fb15-4bd6-9758-5f39eb9bdcb7-00002.parquet','PARQUET',0,2,657,'{1:40,3:40}','{1:2,3:2}','{1:0,3:0}','{}','{1:null,3:null}','{1:null,3:null}','NULL','[4]','[1,3]',0,'{"d":{"column_size":40,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":"2023-12-13","upper_bound":"2023-12-23"},"i":{"column_size":40,"value_count":2,"null_value_count":0,"nan_value_count":null,"
[...]
---- TYPES
INT,STRING,STRING,INT,BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,BINARY,STRING,STRING,INT,STRING
====
+####
+# Query DATE fields;
+####
+---- QUERY
+select value_counts, readable_metrics.d.lower_bound,
readable_metrics.d.upper_bound from
functional_parquet.iceberg_v2_delete_both_eq_and_pos.`files`;
+---- RESULTS
+'{1:2,2:2,3:2}',2023-12-13,2023-12-23
+'{1:2,2:2,3:2}',2023-12-13,2023-12-13
+'{2147483546:1,2147483545:1}',NULL,NULL
+'{1:2,3:2}',2023-12-13,2023-12-23
+'{1:2,3:2}',2023-12-13,2023-12-13
+---- TYPES
+STRING,DATE,DATE
+====
+
####
# Describe all the metadata tables once
####