Gabor Kaszab has uploaded this change for review. ( 
http://gerrit.cloudera.org:8080/20951


Change subject: IMPALA-12598: Allow multiple equality filed id lists for 
Iceberg tables
......................................................................

IMPALA-12598: Allow multiple equality filed id lists for Iceberg tables

This patch adds support for reading Iceberg tables that has different
equality field ID lists associated to different equality delete
files. In practice this is a use case when one equality delete file
deletes by e.g. columnA and columnB while another one deletes by
columnB and columnC.

In order to achieve such functionality the plan tree creation needed
some adjustments so that it can create separate LEFT ANTI JOIN nodes
for the different equality field ID lists.

Testing:
  - Flink was used for creating some test tables with the desired
    equality field IDs. Coverage on these tables are added to the
    test suite.
  - Also did some experiments creating test tables using NiFi.

Change-Id: I3e52d7a5800bf1b479f0c234679be92442d09f79
---
M common/fbs/IcebergObjects.fbs
M common/thrift/CatalogObjects.thrift
M fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
M fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java
M fe/src/main/java/org/apache/impala/catalog/IcebergEqualityDeleteTable.java
M fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java
M fe/src/main/java/org/apache/impala/util/IcebergUtil.java
M testdata/data/README
D 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_different_equality_ids/data/af4e128ee3256830-d9bd9e2f00000000_1372039299_data.0.parq
D 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_different_equality_ids/data/delete-41417e7df44b347b-e035009600000001_138281890_data.0.parq
D 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_different_equality_ids/data/delete-61438487836ebfcc-95c9ce7a00000000_909175610_data.0.parq
D 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_different_equality_ids/metadata/2d3fafd7-bce6-483f-be82-e0ccce9203fc-m0.avro
D 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_different_equality_ids/metadata/57a963d3-0e4e-4540-8080-a57afd51ba99-m0.avro
D 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_different_equality_ids/metadata/8bd425d8-25fb-4603-8cc7-aeb5ad2a3917-m0.avro
D 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_different_equality_ids/metadata/snap-397031335297740726-1-2d3fafd7-bce6-483f-be82-e0ccce9203fc.avro
D 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_different_equality_ids/metadata/snap-6117850509763739078-1-57a963d3-0e4e-4540-8080-a57afd51ba99.avro
D 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_different_equality_ids/metadata/snap-8494861454990126958-1-8bd425d8-25fb-4603-8cc7-aeb5ad2a3917.avro
D 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_different_equality_ids/metadata/v1.metadata.json
D 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_different_equality_ids/metadata/v2.metadata.json
D 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_different_equality_ids/metadata/v3.metadata.json
D 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_different_equality_ids/metadata/v4.metadata.json
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/data/00000-0-1483849a-0bdf-49f1-82ac-b3cfa757c541-00001.parquet
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/data/00000-0-1483849a-0bdf-49f1-82ac-b3cfa757c541-00002.parquet
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/data/00000-0-a8488080-c95c-4b79-9db9-085ed10090d6-00001.parquet
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/data/00000-0-a8488080-c95c-4b79-9db9-085ed10090d6-00002.parquet
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/data/00000-0-d92dc85b-efc8-4173-b96f-10a13c1d1e18-00001.parquet
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/data/00000-0-d92dc85b-efc8-4173-b96f-10a13c1d1e18-00002.parquet
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/metadata/18458ea9-087c-4e3d-8264-5e8b1fe425b1-m0.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/metadata/18458ea9-087c-4e3d-8264-5e8b1fe425b1-m1.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/metadata/b7db365c-79e0-404d-8bcd-834bb3e958c0-m0.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/metadata/b7db365c-79e0-404d-8bcd-834bb3e958c0-m1.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/metadata/c841d7ac-647e-4748-82c5-6a8082282a3e-m0.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/metadata/c841d7ac-647e-4748-82c5-6a8082282a3e-m1.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/metadata/snap-2471377723456553138-1-b7db365c-79e0-404d-8bcd-834bb3e958c0.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/metadata/snap-4960871595590944125-1-18458ea9-087c-4e3d-8264-5e8b1fe425b1.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/metadata/snap-5458629701251044342-1-c841d7ac-647e-4748-82c5-6a8082282a3e.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/metadata/v1.metadata.json
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/metadata/v2.metadata.json
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/metadata/v3.metadata.json
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/metadata/v4.metadata.json
R 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_equality_multi_eq_ids/metadata/version-hint.text
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/data/00000-0-7788dcf5-a880-466d-ae9d-2dd332f98412-00001.parquet
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/data/00000-0-7788dcf5-a880-466d-ae9d-2dd332f98412-00002.parquet
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/data/00000-0-ddf90527-66f7-41de-bd3a-a6ef952918fc-00001.parquet
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/data/00000-0-ddf90527-66f7-41de-bd3a-a6ef952918fc-00002.parquet
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/data/00000-0-e93b89d3-fcf6-4847-8fd1-68e5b33d0ad6-00001.parquet
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/data/00000-0-e93b89d3-fcf6-4847-8fd1-68e5b33d0ad6-00002.parquet
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/data/delete-3e480099fc20aca4-23ae231a00000001_738940911_data.0.parq
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/metadata/103b5b20-fb15-41bb-a97d-1e2ddc147650-m0.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/metadata/c0500e2e-00c0-48fb-9c29-31bbafc91d57-m0.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/metadata/c0500e2e-00c0-48fb-9c29-31bbafc91d57-m1.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/metadata/d7fa3972-f84c-4b35-aa37-2079458ccea8-m0.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/metadata/d7fa3972-f84c-4b35-aa37-2079458ccea8-m1.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/metadata/f9fa006c-0078-4caf-8eaf-f9d499fc6939-m0.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/metadata/f9fa006c-0078-4caf-8eaf-f9d499fc6939-m1.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/metadata/snap-152862018760071153-1-c0500e2e-00c0-48fb-9c29-31bbafc91d57.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/metadata/snap-2066775081852432762-1-f9fa006c-0078-4caf-8eaf-f9d499fc6939.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/metadata/snap-6283211732171745116-1-103b5b20-fb15-41bb-a97d-1e2ddc147650.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/metadata/snap-7591397613223797435-1-d7fa3972-f84c-4b35-aa37-2079458ccea8.avro
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/metadata/v1.metadata.json
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/metadata/v2.metadata.json
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/metadata/v3.metadata.json
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/metadata/v4.metadata.json
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/metadata/v5.metadata.json
A 
testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_pos_and_multi_eq_ids/metadata/version-hint.text
M testdata/datasets/functional/functional_schema_template.sql
M testdata/datasets/functional/schema_constraints.csv
M 
testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test
M 
testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-equality-deletes.test
M tests/query_test/test_iceberg.py
70 files changed, 1,435 insertions(+), 529 deletions(-)



  git pull ssh://gerrit.cloudera.org:29418/Impala-ASF refs/changes/51/20951/1
--
To view, visit http://gerrit.cloudera.org:8080/20951
To unsubscribe, visit http://gerrit.cloudera.org:8080/settings

Gerrit-Project: Impala-ASF
Gerrit-Branch: master
Gerrit-MessageType: newchange
Gerrit-Change-Id: I3e52d7a5800bf1b479f0c234679be92442d09f79
Gerrit-Change-Number: 20951
Gerrit-PatchSet: 1
Gerrit-Owner: Gabor Kaszab <gaborkas...@cloudera.com>

Reply via email to