[hive] branch master updated: HIVE-26762: Remove operand pruning in HiveFilterSetOpTransposeRule (Alessandro Solimando, reviewed by Krisztian Kasa)

2022-12-05 Thread krisztiankasa
This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new d6b1d5fa784 HIVE-26762: Remove operand pruning in 
HiveFilterSetOpTransposeRule (Alessandro Solimando, reviewed by Krisztian Kasa)
d6b1d5fa784 is described below

commit d6b1d5fa784789d7aa0461adc9676a0489f2e3ea
Author: Alessandro Solimando 
AuthorDate: Mon Dec 5 20:17:14 2022 +0100

HIVE-26762: Remove operand pruning in HiveFilterSetOpTransposeRule 
(Alessandro Solimando, reviewed by Krisztian Kasa)
---
 .../rules/HiveFilterSetOpTransposeRule.java|  64 ++
 .../union_all_filter_transpose_pruned_operands.q   |  45 +++
 ...nion_all_filter_transpose_pruned_operands.q.out | 140 +
 .../perf/tpcds30tb/tez/cbo_query11.q.out   |   8 +-
 .../perf/tpcds30tb/tez/cbo_query4.q.out|  12 +-
 .../perf/tpcds30tb/tez/cbo_query74.q.out   |   8 +-
 .../perf/tpcds30tb/tez/query11.q.out   |  44 ---
 .../clientpositive/perf/tpcds30tb/tez/query4.q.out |  58 +
 .../perf/tpcds30tb/tez/query74.q.out   |  46 +++
 9 files changed, 291 insertions(+), 134 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java
index 192fb682e13..8f6bb61b833 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java
@@ -20,25 +20,17 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.calcite.plan.RelOptPredicateList;
 import org.apache.calcite.plan.RelOptRuleCall;
 import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Filter;
 import org.apache.calcite.rel.core.SetOp;
-import org.apache.calcite.rel.core.Union;
-import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.rel.rules.FilterSetOpTransposeRule;
 import org.apache.calcite.rel.type.RelDataTypeField;
 import org.apache.calcite.rex.RexBuilder;
-import org.apache.calcite.rex.RexExecutor;
 import org.apache.calcite.rex.RexNode;
-import org.apache.calcite.rex.RexSimplify;
-import org.apache.calcite.rex.RexUnknownAs;
-import org.apache.calcite.rex.RexUtil;
 import org.apache.calcite.tools.RelBuilder;
 import org.apache.calcite.tools.RelBuilderFactory;
-import org.apache.calcite.util.Util;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
 
@@ -55,17 +47,12 @@ public class HiveFilterSetOpTransposeRule extends 
FilterSetOpTransposeRule {
*  Union
*   / \
* Op1 Op2
-   *
* to
*   Union
* /\
* FIL
* | |
*   Op1 Op2
-   *
-   *
-   * It additionally can remove branch(es) of filter if it's able to determine
-   * that they are going to generate an empty result set.
*/
   private HiveFilterSetOpTransposeRule(RelBuilderFactory relBuilderFactory) {
 super(relBuilderFactory);
@@ -85,57 +72,30 @@ public class HiveFilterSetOpTransposeRule extends 
FilterSetOpTransposeRule {
 
   //~ Methods 
 
-  // implement RelOptRule
-  // We override the rule in order to do union all branch elimination
+  @Override
   public void onMatch(RelOptRuleCall call) {
-Filter filterRel = call.rel(0);
-SetOp setOp = call.rel(1);
+final Filter filterRel = call.rel(0);
+final SetOp setOp = call.rel(1);
 
-RexNode condition = filterRel.getCondition();
+final RexNode condition = filterRel.getCondition();
 
 // create filters on top of each setop child, modifying the filter
 // condition to reference each setop child
-RexBuilder rexBuilder = filterRel.getCluster().getRexBuilder();
+final RexBuilder rexBuilder = filterRel.getCluster().getRexBuilder();
 final RelBuilder relBuilder = call.builder();
-List origFields = setOp.getRowType().getFieldList();
-int[] adjustments = new int[origFields.size()];
+final List origFields = 
setOp.getRowType().getFieldList();
+final int[] adjustments = new int[origFields.size()];
 final List newSetOpInputs = new ArrayList<>();
-RelNode lastInput = null;
+
 for (int index = 0; index < setOp.getInputs().size(); index++) {
   RelNode input = setOp.getInput(index);
   RexNode newCondition = condition.accept(new 
RelOptUtil.RexInputConverter(rexBuilder,
   origFields, input.getRowType().getFieldList(), adjustments));
-  if (setOp instanceof 

[hive] branch master updated: HIVE-26685: Improve path name escaping/unescaping (#3721)

2022-12-05 Thread weiz
This is an automated email from the ASF dual-hosted git repository.

weiz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new dbe2a323351 HIVE-26685: Improve path name escaping/unescaping (#3721)
dbe2a323351 is described below

commit dbe2a323351b7a0196fc7834023b9bc28cd3244e
Author: James Petty 
AuthorDate: Mon Dec 5 13:04:54 2022 -0500

HIVE-26685: Improve path name escaping/unescaping (#3721)
---
 .../org/apache/hadoop/hive/common/FileUtils.java   | 38 +++---
 .../apache/hadoop/hive/common/TestFileUtils.java   |  8 +
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java 
b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
index 37ff2c04dc2..17169d6e184 100644
--- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
@@ -258,6 +258,11 @@ public final class FileUtils {
 }
   }
 
+  /**
+   * Hex encoding characters indexed by integer value
+   */
+  private static final char[] HEX_UPPER_CHARS = {'0', '1', '2', '3', '4', '5', 
'6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
+
   static boolean needsEscaping(char c) {
 return c < charToEscape.size() && charToEscape.get(c);
   }
@@ -287,12 +292,28 @@ public final class FileUtils {
   }
 }
 
-StringBuilder sb = new StringBuilder();
+//  Fast-path detection, no escaping and therefore no copying necessary
+int firstEscapeIndex = -1;
 for (int i = 0; i < path.length(); i++) {
+  if (needsEscaping(path.charAt(i))) {
+firstEscapeIndex = i;
+break;
+  }
+}
+if (firstEscapeIndex == -1) {
+  return path;
+}
+
+// slow path, escape beyond the first required escape character into a new 
string
+StringBuilder sb = new StringBuilder();
+if (firstEscapeIndex > 0) {
+  sb.append(path, 0, firstEscapeIndex);
+}
+
+for (int i = firstEscapeIndex; i < path.length(); i++) {
   char c = path.charAt(i);
   if (needsEscaping(c)) {
-sb.append('%');
-sb.append(String.format("%1$02X", (int) c));
+sb.append('%').append(HEX_UPPER_CHARS[(0xF0 & c) >>> 
4]).append(HEX_UPPER_CHARS[(0x0F & c)]);
   } else {
 sb.append(c);
   }
@@ -301,8 +322,17 @@ public final class FileUtils {
   }
 
   public static String unescapePathName(String path) {
+int firstUnescapeIndex = path.indexOf('%');
+if (firstUnescapeIndex == -1) {
+  return path;
+}
+
 StringBuilder sb = new StringBuilder();
-for (int i = 0; i < path.length(); i++) {
+if (firstUnescapeIndex > 0) {
+  sb.append(path, 0, firstUnescapeIndex);
+}
+
+for (int i = firstUnescapeIndex; i < path.length(); i++) {
   char c = path.charAt(i);
   if (c == '%' && i + 2 < path.length()) {
 int code = -1;
diff --git a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java 
b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java
index 2721deb7a03..9ffb52ba5f9 100644
--- a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java
+++ b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java
@@ -303,6 +303,14 @@ public class TestFileUtils {
 assertEquals(1, assertExpectedFilePaths(itr, 
Collections.singletonList("mock:/tmp/dummy")));
   }
 
+  @Test
+  public void testPathEscapeChars() {
+StringBuilder sb = new StringBuilder();
+FileUtils.charToEscape.stream().forEach(integer -> sb.append((char) 
integer));
+String path = sb.toString();
+assertEquals(path, 
FileUtils.unescapePathName(FileUtils.escapePathName(path)));
+  }
+
   private int assertExpectedFilePaths(RemoteIterator 
lfs, List expectedPaths)
   throws Exception {
 int count = 0;



[hive] branch master updated: HIVE-26683: Sum windowing function returns wrong value when all nulls. (#3800)

2022-12-05 Thread rameshkumar
This is an automated email from the ASF dual-hosted git repository.

rameshkumar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 10805bc997d HIVE-26683: Sum windowing function returns wrong value 
when all nulls. (#3800)
10805bc997d is described below

commit 10805bc997d7cd136b85fca9200cf165ffe2eae5
Author: scarlin-cloudera <55709772+scarlin-cloud...@users.noreply.github.com>
AuthorDate: Mon Dec 5 08:58:15 2022 -0800

HIVE-26683: Sum windowing function returns wrong value when all nulls. 
(#3800)

* HIVE-26683: Sum windowing function returns wrong value when all nulls.

The sum windowing function is returning an incorrect value when all the
"following" rows are null.  The correct value for sum when all the rows
are null is "null".

A new member variable had to be added to track for nulls. It uses the
same algorithm that is used for sums. The sums are tracked by keeping
a running sum across all the rows and subtracting off the running sum
outside the window. Likewise, we keep track of a running non null row
count for the current row and subtract the non null row count of the
row that is leaving the window.

* empty
---
 .../hadoop/hive/ql/udf/generic/GenericUDAFSum.java | 106 +++---
 .../clientpositive/windowing_sum_following_null.q  |  30 +
 .../llap/windowing_sum_following_null.q.out| 124 +
 3 files changed, 220 insertions(+), 40 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
index 6ce8734e8f0..40c7a7d7b5e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
@@ -139,9 +139,17 @@ public class GenericUDAFSum extends 
AbstractGenericUDAFResolver {
*/
   public static abstract class GenericUDAFSumEvaluator extends GenericUDAFEvaluator {
 static abstract class SumAgg extends AbstractAggregationBuffer {
-  boolean empty;
   T sum;
   HashSet uniqueObjects; // Unique rows.
+  // HIVE-26683: Tracks the number of non null rows. If all the rows are 
null, then the sum of
+  // them is null. The count is needed for tracking in windowing frames. 
Windowing frames
+  // keep a running count of the sum and subtract off entries as the 
window moves. In order
+  // to process nulls within this same framework, we track the number of 
non null rows and
+  // also subtract off the number of entries as the window moves. If the 
current running count
+  // of non null rows is  and the number of non null rows in the entry 
leaving the window
+  // is also  then we know all the entries within the window are null 
and can return null
+  // for the sum.
+  long nonNullCount;
 }
 
 protected PrimitiveObjectInspector inputOI;
@@ -267,9 +275,9 @@ public class GenericUDAFSum extends 
AbstractGenericUDAFResolver {
 @Override
 public void reset(AggregationBuffer agg) throws HiveException {
   SumAgg bdAgg = (SumAgg) agg;
-  bdAgg.empty = true;
   bdAgg.sum = new HiveDecimalWritable(0);
   bdAgg.uniqueObjects = null;
+  bdAgg.nonNullCount = 0;
 }
 
 boolean warned = false;
@@ -279,7 +287,7 @@ public class GenericUDAFSum extends 
AbstractGenericUDAFResolver {
   assert (parameters.length == 1);
   try {
 if (isEligibleValue((SumHiveDecimalWritableAgg) agg, parameters[0])) {
-  ((SumHiveDecimalWritableAgg)agg).empty = false;
+  ((SumHiveDecimalWritableAgg)agg).nonNullCount++;
   ((SumHiveDecimalWritableAgg)agg).sum.mutateAdd(
   PrimitiveObjectInspectorUtils.getHiveDecimal(parameters[0], 
inputOI));
 }
@@ -303,12 +311,12 @@ public class GenericUDAFSum extends 
AbstractGenericUDAFResolver {
   return;
 }
 
-myagg.empty = false;
 if (isWindowingDistinct()) {
   throw new HiveException("Distinct windowing UDAF doesn't support 
merge and terminatePartial");
 } else {
   // If partial is NULL, then there was an overflow and myagg.sum will 
be marked as not set.
   
myagg.sum.mutateAdd(PrimitiveObjectInspectorUtils.getHiveDecimal(partial, 
inputOI));
+  myagg.nonNullCount++;
 }
   }
 }
@@ -316,7 +324,7 @@ public class GenericUDAFSum extends 
AbstractGenericUDAFResolver {
 @Override
 public Object terminate(AggregationBuffer agg) throws HiveException {
   SumHiveDecimalWritableAgg myagg = (SumHiveDecimalWritableAgg) agg;
-  if (myagg.empty || myagg.sum == null || !myagg.sum.isSet()) {
+  if (myagg.nonNullCount == 0 || myagg.sum == null || !myagg.sum.isSet()) {
 return null;
   }
   DecimalTypeInfo 

[hive] branch master updated: HIVE-26737: Subquery returning wrong results when database has materialized views (Steve Carlin, reviewed by Krisztian Kasa)

2022-12-05 Thread krisztiankasa
This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 5916236ac62 HIVE-26737: Subquery returning wrong results when database 
has materialized views (Steve Carlin, reviewed by Krisztian Kasa)
5916236ac62 is described below

commit 5916236ac6205fd5add66593ee09bd3cf6e1b19f
Author: scarlin-cloudera <55709772+scarlin-cloud...@users.noreply.github.com>
AuthorDate: Mon Dec 5 00:49:51 2022 -0800

HIVE-26737: Subquery returning wrong results when database has materialized 
views (Steve Carlin, reviewed by Krisztian Kasa)

* HIVE-26737: Subquery returning wrong results when database has 
materialized views

When there is a materialized view in the materialized view registry,
the HiveMaterializedViewASTSubQueryRewriteShuttle runs and rewrites some
RelNodes and RexNodes.

On creation time, the HivePlannerContext is given the RexSubquery nodes 
which
are used to see if it is a correlated subquery with an agg. In the case 
where
the RexSubQuery was rewritten, the Context contains references to stale 
nodes.
It loses the correlated subquery information and creates an incorrect query
plan.

The boolean check for subqueries with agg is now done within the Calcite 
nodes
rather than when checking the ASTNodes. It was only used at rule time, so it
made more sense for the calculation of the boolean value to be done there, 
and
it's safer in the long run (as opposed to just update the global context 
when
a new RexSubQuery is created).

The HiveFilter and HiveProject will contain the structure holding the
calculated correlation information. The information is done with a lazy 
fetch
and only calculated when needed.

The HiveCorrelationInfo structure only contains information for the current
subquery level, similar to the old code. A correlated variable cannot go
down to a subquery within a subquery at this point.
---
 .../ql/optimizer/calcite/HivePlannerContext.java   |  14 +-
 .../correlation/CorrelationInfoVisitor.java| 167 +
 .../calcite/correlation/HiveCorrelationInfo.java   | 108 +
 .../optimizer/calcite/reloperators/HiveFilter.java | 103 +++-
 .../calcite/reloperators/HiveProject.java  |  34 +++
 .../calcite/rules/HiveSubQueryRemoveRule.java  |  46 ++--
 .../hadoop/hive/ql/parse/CalcitePlanner.java   |  22 +-
 .../apache/hadoop/hive/ql/parse/QBSubQuery.java|  52 +---
 .../apache/hadoop/hive/ql/parse/SubQueryUtils.java |  11 +-
 .../calcite/TestCBORuleFiredOnlyOnce.java  |   3 +-
 .../clientpositive/subquery_with_corr_and_mv.q |  57 +
 .../clientpositive/llap/subquery_scalar.q.out  |  14 +-
 .../llap/subquery_with_corr_and_mv.q.out   | 264 +
 .../perf/tpcds30tb/tez/cbo_query32.q.out   |   6 +-
 .../perf/tpcds30tb/tez/cbo_query92.q.out   |   6 +-
 .../perf/tpcds30tb/tez/query32.q.out   |  10 +-
 .../perf/tpcds30tb/tez/query92.q.out   |  10 +-
 17 files changed, 721 insertions(+), 206 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java
index 3a86140fa73..08e82a91cde 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java
@@ -19,33 +19,24 @@ package org.apache.hadoop.hive.ql.optimizer.calcite;
 
 import org.apache.calcite.config.CalciteConnectionConfig;
 import org.apache.calcite.plan.Context;
-import org.apache.calcite.rel.RelNode;
 import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry;
 import org.apache.hadoop.hive.ql.plan.mapper.StatsSource;
 
-import java.util.Set;
-
-
 public class HivePlannerContext implements Context {
   private HiveAlgorithmsConf algoConfig;
   private HiveRulesRegistry registry;
   private CalciteConnectionConfig calciteConfig;
-  private SubqueryConf subqueryConfig;
   private HiveConfPlannerContext isCorrelatedColumns;
   private StatsSource statsSource;
 
   public HivePlannerContext(HiveAlgorithmsConf algoConfig, HiveRulesRegistry 
registry,
-  CalciteConnectionConfig calciteConfig, Set 
corrScalarRexSQWithAgg,
+  CalciteConnectionConfig calciteConfig,
   HiveConfPlannerContext isCorrelatedColumns, StatsSource statsSource) {
 this.algoConfig = algoConfig;
 this.registry = registry;
 this.calciteConfig = calciteConfig;
 this.statsSource = statsSource;
-// this is to keep track if a subquery is correlated and contains aggregate
-// this is computed in