This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 83ba3e0850d HIVE-27331: Iceberg: Rows are not deleted from table when 
execution mode is vectorized llap. (Ayush Saxena, Krisztian Kasa, reviewed by 
Denys Kuzmenko)
83ba3e0850d is described below

commit 83ba3e0850d44812afa474359ebf7e439b030033
Author: Krisztian Kasa <[email protected]>
AuthorDate: Tue May 16 15:20:13 2023 +0200

    HIVE-27331: Iceberg: Rows are not deleted from table when execution mode is 
vectorized llap. (Ayush Saxena, Krisztian Kasa, reviewed by Denys Kuzmenko)
    
    Co-authored-by: Ayush Saxena <[email protected]>
---
 .../mr/hive/vector/HiveVectorizedReader.java       |   3 +-
 .../test/queries/positive/llap_iceberg_read_orc.q  |  78 +++++++-
 .../positive/llap/llap_iceberg_read_orc.q.out      | 205 ++++++++++++++++++++-
 3 files changed, 283 insertions(+), 3 deletions(-)

diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java
index 6566c7b1f11..c92b4617cbd 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java
@@ -54,6 +54,7 @@ import org.apache.iceberg.Table;
 import org.apache.iceberg.expressions.Expression;
 import org.apache.iceberg.io.CloseableIterable;
 import org.apache.iceberg.io.CloseableIterator;
+import org.apache.iceberg.mr.InputFormatConfig;
 import org.apache.iceberg.mr.hive.HiveIcebergInputFormat;
 import org.apache.iceberg.mr.mapred.MapredIcebergInputFormat;
 import org.apache.iceberg.orc.VectorizedReadUtils;
@@ -195,7 +196,7 @@ public class HiveVectorizedReader {
     // If LLAP enabled, try to retrieve an LLAP record reader - this might 
yield to null in some special cases
     // TODO: add support for reading files with positional deletes with LLAP 
(LLAP would need to provide file row num)
     if (HiveConf.getBoolVar(job, HiveConf.ConfVars.LLAP_IO_ENABLED, 
LlapProxy.isDaemon()) &&
-        LlapProxy.getIo() != null && task.deletes().isEmpty()) {
+        LlapProxy.getIo() != null && task.deletes().isEmpty() && 
!InputFormatConfig.fetchVirtualColumns(job)) {
       boolean isDisableVectorization =
           
job.getBoolean(HiveIcebergInputFormat.getVectorizationConfName(tableName), 
false);
       if (isDisableVectorization) {
diff --git 
a/iceberg/iceberg-handler/src/test/queries/positive/llap_iceberg_read_orc.q 
b/iceberg/iceberg-handler/src/test/queries/positive/llap_iceberg_read_orc.q
index 1c6f4c7d671..08ac2dd0469 100644
--- a/iceberg/iceberg-handler/src/test/queries/positive/llap_iceberg_read_orc.q
+++ b/iceberg/iceberg-handler/src/test/queries/positive/llap_iceberg_read_orc.q
@@ -1,11 +1,87 @@
 --test against vectorized LLAP execution mode
+-- SORT_QUERY_RESULTS
+-- Mask neededVirtualColumns due to non-strict order
+--! qt:replace:/(\s+neededVirtualColumns:\s)(.*)/$1#Masked#/
+
 set hive.llap.io.enabled=true;
 set hive.vectorized.execution.enabled=true;
+set hive.optimize.shared.work.merge.ts.schema=true;
 
 DROP TABLE IF EXISTS llap_orders PURGE;
 DROP TABLE IF EXISTS llap_items PURGE;
 DROP TABLE IF EXISTS mig_source PURGE;
-
+DROP TABLE IF EXISTS target_ice PURGE;
+DROP TABLE IF EXISTS calls PURGE;
+DROP TABLE IF EXISTS display PURGE;
+
+-- read after a merge call
+CREATE EXTERNAL TABLE calls (
+  s_key bigint,
+  year int
+) PARTITIONED BY SPEC (year)
+STORED BY Iceberg STORED AS ORC
+TBLPROPERTIES ('format-version'='2');
+
+INSERT INTO calls (s_key, year) VALUES (1090969, 2022);
+
+
+CREATE EXTERNAL TABLE display (
+  skey bigint,
+  hierarchy_number string,
+  hierarchy_name string,
+  language_id int,
+  hierarchy_display string,
+  orderby string
+)
+STORED BY Iceberg STORED AS ORC
+TBLPROPERTIES ('format-version'='2');
+
+INSERT INTO display (skey, language_id, hierarchy_display) VALUES
+  (1090969, 3, 'f9e59bae9b131de1d8f02d887ee91e20-mergeupdated1-updated1'),
+  (1090969, 3, 
'f9e59bae9b131de1d8f02d887ee91e20-mergeupdated1-updated1-insertnew1');
+
+MERGE INTO display USING (
+  SELECT distinct display_skey, display, display as orig_display
+  FROM (
+    SELECT D.skey display_skey, D.hierarchy_display display
+    FROM (
+      SELECT s_key FROM calls WHERE s_key =  1090969
+    ) R
+    INNER JOIN display D
+      ON R.s_key = D.skey AND D.language_id = 3
+    GROUP BY D.skey,
+      D.hierarchy_display
+  ) sub1
+
+  UNION ALL
+
+  SELECT distinct display_skey, null as display, display as orig_display
+  FROM (
+    SELECT D.skey display_skey, D.hierarchy_display display
+    FROM (
+      SELECT s_key FROM calls WHERE s_key =  1090969
+    ) R
+    INNER JOIN display D
+      ON R.s_key = D.skey AND D.language_id = 3
+    GROUP BY D.skey,
+      D.hierarchy_display
+  ) sub2
+) sub
+ON display.skey = sub.display_skey
+    and display.hierarchy_display = sub.display
+
+WHEN MATCHED THEN
+  UPDATE SET hierarchy_display = concat(sub.display, '-mergeupdated1')
+WHEN NOT MATCHED THEN
+  INSERT (skey, language_id, hierarchy_display) values (sub.display_skey, 3, 
concat(sub.orig_display, '-mergenew1'));
+
+SELECT * FROM display;
+
+-- try read after a delete query
+CREATE EXTERNAL TABLE target_ice(a int, b string, c int)  STORED BY ICEBERG 
STORED AS ORC tblproperties ('format-version'='2');
+INSERT INTO target_ice values (1, 'one', 50);
+DELETE FROM target_ice WHERE a = 1;
+SELECT * FROM target_ice;
 
 CREATE EXTERNAL TABLE llap_items (itemid INT, price INT, category STRING, name 
STRING, description STRING) STORED BY ICEBERG STORED AS ORC;
 INSERT INTO llap_items VALUES
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/llap/llap_iceberg_read_orc.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/llap/llap_iceberg_read_orc.q.out
index c12562d3187..a3cabd9776c 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/llap/llap_iceberg_read_orc.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/llap/llap_iceberg_read_orc.q.out
@@ -10,6 +10,209 @@ PREHOOK: query: DROP TABLE IF EXISTS mig_source PURGE
 PREHOOK: type: DROPTABLE
 POSTHOOK: query: DROP TABLE IF EXISTS mig_source PURGE
 POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE IF EXISTS target_ice PURGE
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS target_ice PURGE
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE IF EXISTS calls PURGE
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS calls PURGE
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE IF EXISTS display PURGE
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS display PURGE
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE EXTERNAL TABLE calls (
+  s_key bigint,
+  year int
+) PARTITIONED BY SPEC (year)
+STORED BY Iceberg STORED AS ORC
+TBLPROPERTIES ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@calls
+POSTHOOK: query: CREATE EXTERNAL TABLE calls (
+  s_key bigint,
+  year int
+) PARTITIONED BY SPEC (year)
+STORED BY Iceberg STORED AS ORC
+TBLPROPERTIES ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@calls
+PREHOOK: query: INSERT INTO calls (s_key, year) VALUES (1090969, 2022)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@calls
+POSTHOOK: query: INSERT INTO calls (s_key, year) VALUES (1090969, 2022)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@calls
+PREHOOK: query: CREATE EXTERNAL TABLE display (
+  skey bigint,
+  hierarchy_number string,
+  hierarchy_name string,
+  language_id int,
+  hierarchy_display string,
+  orderby string
+)
+STORED BY Iceberg STORED AS ORC
+TBLPROPERTIES ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@display
+POSTHOOK: query: CREATE EXTERNAL TABLE display (
+  skey bigint,
+  hierarchy_number string,
+  hierarchy_name string,
+  language_id int,
+  hierarchy_display string,
+  orderby string
+)
+STORED BY Iceberg STORED AS ORC
+TBLPROPERTIES ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@display
+PREHOOK: query: INSERT INTO display (skey, language_id, hierarchy_display) 
VALUES
+  (1090969, 3, 'f9e59bae9b131de1d8f02d887ee91e20-mergeupdated1-updated1'),
+  (1090969, 3, 
'f9e59bae9b131de1d8f02d887ee91e20-mergeupdated1-updated1-insertnew1')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@display
+POSTHOOK: query: INSERT INTO display (skey, language_id, hierarchy_display) 
VALUES
+  (1090969, 3, 'f9e59bae9b131de1d8f02d887ee91e20-mergeupdated1-updated1'),
+  (1090969, 3, 
'f9e59bae9b131de1d8f02d887ee91e20-mergeupdated1-updated1-insertnew1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@display
+Warning: Shuffle Join MERGEJOIN[62][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[63][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 8' is a cross product
+PREHOOK: query: MERGE INTO display USING (
+  SELECT distinct display_skey, display, display as orig_display
+  FROM (
+    SELECT D.skey display_skey, D.hierarchy_display display
+    FROM (
+      SELECT s_key FROM calls WHERE s_key =  1090969
+    ) R
+    INNER JOIN display D
+      ON R.s_key = D.skey AND D.language_id = 3
+    GROUP BY D.skey,
+      D.hierarchy_display
+  ) sub1
+
+  UNION ALL
+
+  SELECT distinct display_skey, null as display, display as orig_display
+  FROM (
+    SELECT D.skey display_skey, D.hierarchy_display display
+    FROM (
+      SELECT s_key FROM calls WHERE s_key =  1090969
+    ) R
+    INNER JOIN display D
+      ON R.s_key = D.skey AND D.language_id = 3
+    GROUP BY D.skey,
+      D.hierarchy_display
+  ) sub2
+) sub
+ON display.skey = sub.display_skey
+    and display.hierarchy_display = sub.display
+
+WHEN MATCHED THEN
+  UPDATE SET hierarchy_display = concat(sub.display, '-mergeupdated1')
+WHEN NOT MATCHED THEN
+  INSERT (skey, language_id, hierarchy_display) values (sub.display_skey, 3, 
concat(sub.orig_display, '-mergenew1'))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@calls
+PREHOOK: Input: default@display
+PREHOOK: Output: default@display
+PREHOOK: Output: default@display
+PREHOOK: Output: default@merge_tmp_table
+POSTHOOK: query: MERGE INTO display USING (
+  SELECT distinct display_skey, display, display as orig_display
+  FROM (
+    SELECT D.skey display_skey, D.hierarchy_display display
+    FROM (
+      SELECT s_key FROM calls WHERE s_key =  1090969
+    ) R
+    INNER JOIN display D
+      ON R.s_key = D.skey AND D.language_id = 3
+    GROUP BY D.skey,
+      D.hierarchy_display
+  ) sub1
+
+  UNION ALL
+
+  SELECT distinct display_skey, null as display, display as orig_display
+  FROM (
+    SELECT D.skey display_skey, D.hierarchy_display display
+    FROM (
+      SELECT s_key FROM calls WHERE s_key =  1090969
+    ) R
+    INNER JOIN display D
+      ON R.s_key = D.skey AND D.language_id = 3
+    GROUP BY D.skey,
+      D.hierarchy_display
+  ) sub2
+) sub
+ON display.skey = sub.display_skey
+    and display.hierarchy_display = sub.display
+
+WHEN MATCHED THEN
+  UPDATE SET hierarchy_display = concat(sub.display, '-mergeupdated1')
+WHEN NOT MATCHED THEN
+  INSERT (skey, language_id, hierarchy_display) values (sub.display_skey, 3, 
concat(sub.orig_display, '-mergenew1'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@calls
+POSTHOOK: Input: default@display
+POSTHOOK: Output: default@display
+POSTHOOK: Output: default@display
+POSTHOOK: Output: default@merge_tmp_table
+POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(display)display.null, ]
+PREHOOK: query: SELECT * FROM display
+PREHOOK: type: QUERY
+PREHOOK: Input: default@display
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM display
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@display
+#### A masked pattern was here ####
+1090969        NULL    NULL    3       
f9e59bae9b131de1d8f02d887ee91e20-mergeupdated1-updated1-insertnew1-mergenew1    
NULL
+1090969        NULL    NULL    3       
f9e59bae9b131de1d8f02d887ee91e20-mergeupdated1-updated1-insertnew1-mergeupdated1
        NULL
+1090969        NULL    NULL    3       
f9e59bae9b131de1d8f02d887ee91e20-mergeupdated1-updated1-mergenew1       NULL
+1090969        NULL    NULL    3       
f9e59bae9b131de1d8f02d887ee91e20-mergeupdated1-updated1-mergeupdated1   NULL
+PREHOOK: query: CREATE EXTERNAL TABLE target_ice(a int, b string, c int)  
STORED BY ICEBERG STORED AS ORC tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@target_ice
+POSTHOOK: query: CREATE EXTERNAL TABLE target_ice(a int, b string, c int)  
STORED BY ICEBERG STORED AS ORC tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@target_ice
+PREHOOK: query: INSERT INTO target_ice values (1, 'one', 50)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@target_ice
+POSTHOOK: query: INSERT INTO target_ice values (1, 'one', 50)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@target_ice
+PREHOOK: query: DELETE FROM target_ice WHERE a = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@target_ice
+PREHOOK: Output: default@target_ice
+POSTHOOK: query: DELETE FROM target_ice WHERE a = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@target_ice
+POSTHOOK: Output: default@target_ice
+PREHOOK: query: SELECT * FROM target_ice
+PREHOOK: type: QUERY
+PREHOOK: Input: default@target_ice
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM target_ice
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@target_ice
+#### A masked pattern was here ####
 PREHOOK: query: CREATE EXTERNAL TABLE llap_items (itemid INT, price INT, 
category STRING, name STRING, description STRING) STORED BY ICEBERG STORED AS 
ORC
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
@@ -551,9 +754,9 @@ POSTHOOK: Input: default@llap_orders
 #### A masked pattern was here ####
 München        Cybertruck      50000   4.5     99
 NULL   Model 3 50000   NULL    42
-Venezia        Model S 123000  NULL    89
 NULL   Model S 83000   NULL    185
 NULL   Model Y 55000   NULL    76
+Venezia        Model S 123000  NULL    89
 PREHOOK: query: SELECT i.name, i.description, SUM(o.quantity) FROM llap_items 
i JOIN llap_orders o ON i.itemid = o.itemid  WHERE region = 'EU' and i.cost >= 
50000 GROUP BY i.name, i.description
 PREHOOK: type: QUERY
 PREHOOK: Input: default@llap_items

Reply via email to