This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 7d467160e62 HIVE-28729: Apply nulls order setting in Reduce Sink 
operator of join branches (Krisztian Kasa, reviewed by Stamatis Zampetakis)
7d467160e62 is described below

commit 7d467160e623a187973921fa9658a31ff410308c
Author: Krisztian Kasa <[email protected]>
AuthorDate: Tue Feb 4 15:39:26 2025 +0100

    HIVE-28729: Apply nulls order setting in Reduce Sink operator of join 
branches (Krisztian Kasa, reviewed by Stamatis Zampetakis)
---
 .../calcite/rules/HiveInsertExchange4JoinRule.java |  19 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java       |   5 +-
 .../queries/clientpositive/cbo_rp_null_order.q     |  12 ++
 .../clientpositive/llap/cbo_rp_null_order.q.out    | 224 +++++++++++++++++++++
 4 files changed, 245 insertions(+), 15 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java
index 0c8c5e1a8ed..08107adda2a 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveInsertExchange4JoinRule.java
@@ -30,8 +30,6 @@
 import org.apache.calcite.rel.core.Exchange;
 import org.apache.calcite.rel.core.Join;
 import org.apache.calcite.rex.RexNode;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo;
@@ -56,20 +54,13 @@
  */
 public class HiveInsertExchange4JoinRule extends RelOptRule {
 
-  protected static transient final Logger LOG = LoggerFactory
-      .getLogger(HiveInsertExchange4JoinRule.class);
+  private final RelFieldCollation.NullDirection defaultAscNullDirection;
 
-  /** Rule that creates Exchange operators under a MultiJoin operator. */
-  public static final HiveInsertExchange4JoinRule EXCHANGE_BELOW_MULTIJOIN =
-      new HiveInsertExchange4JoinRule(HiveMultiJoin.class);
-
-  /** Rule that creates Exchange operators under a Join operator. */
-  public static final HiveInsertExchange4JoinRule EXCHANGE_BELOW_JOIN =
-      new HiveInsertExchange4JoinRule(Join.class);
-
-  public HiveInsertExchange4JoinRule(Class<? extends RelNode> clazz) {
+  public HiveInsertExchange4JoinRule(
+          Class<? extends RelNode> clazz, RelFieldCollation.NullDirection 
defaultAscNullDirection) {
     // match multijoin or join
     super(RelOptRule.operand(clazz, any()));
+    this.defaultAscNullDirection = defaultAscNullDirection;
   }
 
   @Override
@@ -118,7 +109,7 @@ public void onMatch(RelOptRuleCall call) {
         for (int pos : joinLeafPredInfo.getProjsJoinKeysInChildSchema(i)) {
           if (!joinKeyPositions.contains(pos)) {
             joinKeyPositions.add(pos);
-            collationListBuilder.add(new RelFieldCollation(pos));
+            collationListBuilder.add(new RelFieldCollation(pos, 
RelFieldCollation.Direction.ASCENDING, defaultAscNullDirection));
           }
         }
       }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index c662417b371..f2e17993da6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -84,6 +84,7 @@
 import org.apache.calcite.rel.core.AggregateCall;
 import org.apache.calcite.rel.core.CorrelationId;
 import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
 import org.apache.calcite.rel.core.JoinRelType;
 import org.apache.calcite.rel.core.SetOp;
 import org.apache.calcite.rel.core.TableScan;
@@ -2416,7 +2417,9 @@ private RelNode applyPostJoinOrderingTransform(RelNode 
basePlan, RelMetadataProv
 
         // 9.2.  Introduce exchange operators below join/multijoin operators
         generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
-            HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN, 
HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN);
+                new HiveInsertExchange4JoinRule(Join.class, 
NullOrdering.defaultNullOrder(conf).getDirection()),
+                new HiveInsertExchange4JoinRule(
+                        HiveMultiJoin.class, 
NullOrdering.defaultNullOrder(conf).getDirection()));
       } else {
         generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
                 HiveProjectSortExchangeTransposeRule.INSTANCE, 
HiveProjectMergeRule.INSTANCE);
diff --git a/ql/src/test/queries/clientpositive/cbo_rp_null_order.q 
b/ql/src/test/queries/clientpositive/cbo_rp_null_order.q
new file mode 100644
index 00000000000..a2ff1e2be7d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_rp_null_order.q
@@ -0,0 +1,12 @@
+SET hive.cbo.returnpath.hiveop=true;
+SET hive.default.nulls.last=false;
+
+CREATE TABLE t1(key int, value string);
+
+EXPLAIN CBO SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key;
+EXPLAIN SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key;
+
+SET hive.default.nulls.last=true;
+
+EXPLAIN CBO SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key;
+EXPLAIN SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key;
diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_null_order.q.out 
b/ql/src/test/results/clientpositive/llap/cbo_rp_null_order.q.out
new file mode 100644
index 00000000000..071f824365f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/cbo_rp_null_order.q.out
@@ -0,0 +1,224 @@
+PREHOOK: query: CREATE TABLE t1(key int, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: CREATE TABLE t1(key int, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: EXPLAIN CBO SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO SELECT * FROM t1 a INNER JOIN t1 b on a.key = 
b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not 
available])
+  HiveSortExchange(distribution=[hash[0]], collation=[[0 ASC-nulls-first]])
+    HiveProject(key=[$0], value=[$1])
+      HiveFilter(condition=[IS NOT NULL($0)])
+        HiveTableScan(table=[[default, t1]], qbid:alias=[a])
+  HiveSortExchange(distribution=[hash[0]], collation=[[0 ASC-nulls-first]])
+    HiveProject(key=[$0], value=[$1])
+      HiveFilter(condition=[IS NOT NULL($0)])
+        HiveTableScan(table=[[default, t1]], qbid:alias=[b])
+
+PREHOOK: query: EXPLAIN SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: key, value
+                      Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: key (type: int)
+                        null sort order: a
+                        sort order: +
+                        Map-reduce partition columns: key (type: int)
+                        Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: value (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: key, value
+                      Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: key (type: int)
+                        null sort order: a
+                        sort order: +
+                        Map-reduce partition columns: key (type: int)
+                        Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: value (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 key (type: int)
+                  1 key (type: int)
+                outputColumnNames: key, value, key0, value0
+                Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN CBO SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO SELECT * FROM t1 a INNER JOIN t1 b on a.key = 
b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not 
available])
+  HiveSortExchange(distribution=[hash[0]], collation=[[0]])
+    HiveProject(key=[$0], value=[$1])
+      HiveFilter(condition=[IS NOT NULL($0)])
+        HiveTableScan(table=[[default, t1]], qbid:alias=[a])
+  HiveSortExchange(distribution=[hash[0]], collation=[[0]])
+    HiveProject(key=[$0], value=[$1])
+      HiveFilter(condition=[IS NOT NULL($0)])
+        HiveTableScan(table=[[default, t1]], qbid:alias=[b])
+
+PREHOOK: query: EXPLAIN SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN SELECT * FROM t1 a INNER JOIN t1 b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: key, value
+                      Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: key (type: int)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: key (type: int)
+                        Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: value (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: key, value
+                      Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: key (type: int)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: key (type: int)
+                        Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: value (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 key (type: int)
+                  1 key (type: int)
+                outputColumnNames: key, value, key0, value0
+                Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

Reply via email to