This is an automated email from the ASF dual-hosted git repository.
szita pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 3ffc2b403c4 HIVE-26506: HiveIcebergVectorizedRecordReader doesn't set
the selected array (#3558) (Adam Szita, reviewed by Laszlo Pinter)
3ffc2b403c4 is described below
commit 3ffc2b403c4193ef7d9b7b9deda834eb60ea5ceb
Author: Adam Szita <[email protected]>
AuthorDate: Wed Aug 31 10:59:09 2022 +0200
HIVE-26506: HiveIcebergVectorizedRecordReader doesn't set the selected
array (#3558) (Adam Szita, reviewed by Laszlo Pinter)
---
.../vector/HiveIcebergVectorizedRecordReader.java | 4 +
.../test/queries/positive/llap_iceberg_read_orc.q | 28 ++++
.../positive/llap/llap_iceberg_read_orc.q.out | 154 +++++++++++++++++++++
3 files changed, 186 insertions(+)
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveIcebergVectorizedRecordReader.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveIcebergVectorizedRecordReader.java
index ddabc27932f..412f7478090 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveIcebergVectorizedRecordReader.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveIcebergVectorizedRecordReader.java
@@ -49,7 +49,11 @@ public final class HiveIcebergVectorizedRecordReader extends
AbstractMapredIcebe
VectorizedRowBatch newBatch = (VectorizedRowBatch)
innerReader.getCurrentValue();
value.cols = newBatch.cols;
value.endOfFile = newBatch.endOfFile;
+ value.numCols = newBatch.numCols;
+ value.projectedColumns = newBatch.projectedColumns;
+ value.projectionSize = newBatch.projectionSize;
value.selectedInUse = newBatch.selectedInUse;
+ value.selected = newBatch.selected;
value.size = newBatch.size;
return true;
} else {
diff --git
a/iceberg/iceberg-handler/src/test/queries/positive/llap_iceberg_read_orc.q
b/iceberg/iceberg-handler/src/test/queries/positive/llap_iceberg_read_orc.q
index 2d62dd844ba..a450bb68d80 100644
--- a/iceberg/iceberg-handler/src/test/queries/positive/llap_iceberg_read_orc.q
+++ b/iceberg/iceberg-handler/src/test/queries/positive/llap_iceberg_read_orc.q
@@ -41,6 +41,34 @@ INSERT INTO llap_orders VALUES
(19, 54, 6, timestamp('2015-08-15 01:59:22.177'), 'EU', 'HU'),
(20, 10, 0, timestamp('2018-05-06 12:56:12.789'), 'US', 'CA');
+--verify row level filtering works with Iceberg ORC too
+set hive.auto.convert.join=true;
+set hive.disable.unsafe.external.table.operations=false;
+set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=true;
+
+explain select sum(quantity)
+ from llap_orders o, llap_items i
+ where
+ o.itemid = i.itemid and i.price != 83000 and
+ (
+ (o.quantity > 0 and o.quantity < 39)
+ or
+ (o.quantity > 39 and o.quantity < 69)
+ or
+ (o.quantity > 70 )
+ );
+select sum(quantity)
+from llap_orders o, llap_items i
+where
+ o.itemid = i.itemid and i.price != 83000 and
+ (
+ (o.quantity > 0 and o.quantity < 39)
+ or
+ (o.quantity > 39 and o.quantity < 69)
+ or
+ (o.quantity > 70 )
+ );
+
--select query without any schema change yet
SELECT i.name, i.description, SUM(o.quantity) FROM llap_items i JOIN
llap_orders o ON i.itemid = o.itemid WHERE p1 = 'EU' and i.price >= 50000
GROUP BY i.name, i.description;
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/llap/llap_iceberg_read_orc.q.out
b/iceberg/iceberg-handler/src/test/results/positive/llap/llap_iceberg_read_orc.q.out
index 625c180f34d..440ee72dd0c 100644
---
a/iceberg/iceberg-handler/src/test/results/positive/llap/llap_iceberg_read_orc.q.out
+++
b/iceberg/iceberg-handler/src/test/results/positive/llap/llap_iceberg_read_orc.q.out
@@ -98,6 +98,160 @@ POSTHOOK: query: INSERT INTO llap_orders VALUES
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@llap_orders
+PREHOOK: query: explain select sum(quantity)
+ from llap_orders o, llap_items i
+ where
+ o.itemid = i.itemid and i.price != 83000 and
+ (
+ (o.quantity > 0 and o.quantity < 39)
+ or
+ (o.quantity > 39 and o.quantity < 69)
+ or
+ (o.quantity > 70 )
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@llap_items
+PREHOOK: Input: default@llap_orders
+#### A masked pattern was here ####
+POSTHOOK: query: explain select sum(quantity)
+ from llap_orders o, llap_items i
+ where
+ o.itemid = i.itemid and i.price != 83000 and
+ (
+ (o.quantity > 0 and o.quantity < 39)
+ or
+ (o.quantity > 39 and o.quantity < 69)
+ or
+ (o.quantity > 70 )
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@llap_items
+POSTHOOK: Input: default@llap_orders
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 3 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: o
+ filterExpr: ((quantity NOT BETWEEN 39 AND 0 or quantity NOT
BETWEEN 69 AND 39 or (quantity > 70)) and (((quantity > 0) and (quantity < 39))
or ((quantity > 39) and (quantity < 69)) or (quantity > 70)) and itemid is not
null) (type: boolean)
+ probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_29_container,
bigKeyColName:itemid, smallTablePos:1, keyRatio:0.9523809523809523
+ Statistics: Num rows: 21 Data size: 168 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((quantity NOT BETWEEN 39 AND 0 or quantity NOT
BETWEEN 69 AND 39 or (quantity > 70)) and (((quantity > 0) and (quantity < 39))
or ((quantity > 39) and (quantity < 69)) or (quantity > 70)) and itemid is not
null) (type: boolean)
+ Statistics: Num rows: 21 Data size: 168 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: quantity (type: int), itemid (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 21 Data size: 168 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 21 Data size: 84 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0)
+ minReductionHashAggr: 0.95238096
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs (cache only)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: i
+ filterExpr: ((price <> 83000) and itemid is not null) (type:
boolean)
+ Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: ((price <> 83000) and itemid is not null)
(type: boolean)
+ Statistics: Num rows: 7 Data size: 56 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: itemid (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs (cache only)
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select sum(quantity)
+from llap_orders o, llap_items i
+where
+ o.itemid = i.itemid and i.price != 83000 and
+ (
+ (o.quantity > 0 and o.quantity < 39)
+ or
+ (o.quantity > 39 and o.quantity < 69)
+ or
+ (o.quantity > 70 )
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@llap_items
+PREHOOK: Input: default@llap_orders
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(quantity)
+from llap_orders o, llap_items i
+where
+ o.itemid = i.itemid and i.price != 83000 and
+ (
+ (o.quantity > 0 and o.quantity < 39)
+ or
+ (o.quantity > 39 and o.quantity < 69)
+ or
+ (o.quantity > 70 )
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@llap_items
+POSTHOOK: Input: default@llap_orders
+#### A masked pattern was here ####
+774
PREHOOK: query: SELECT i.name, i.description, SUM(o.quantity) FROM llap_items
i JOIN llap_orders o ON i.itemid = o.itemid WHERE p1 = 'EU' and i.price >=
50000 GROUP BY i.name, i.description
PREHOOK: type: QUERY
PREHOOK: Input: default@llap_items