[hive] branch master updated: HIVE-26762: Remove operand pruning in HiveFilterSetOpTransposeRule (Alessandro Solimando, reviewed by Krisztian Kasa)
This is an automated email from the ASF dual-hosted git repository. krisztiankasa pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new d6b1d5fa784 HIVE-26762: Remove operand pruning in HiveFilterSetOpTransposeRule (Alessandro Solimando, reviewed by Krisztian Kasa) d6b1d5fa784 is described below commit d6b1d5fa784789d7aa0461adc9676a0489f2e3ea Author: Alessandro Solimando AuthorDate: Mon Dec 5 20:17:14 2022 +0100 HIVE-26762: Remove operand pruning in HiveFilterSetOpTransposeRule (Alessandro Solimando, reviewed by Krisztian Kasa) --- .../rules/HiveFilterSetOpTransposeRule.java| 64 ++ .../union_all_filter_transpose_pruned_operands.q | 45 +++ ...nion_all_filter_transpose_pruned_operands.q.out | 140 + .../perf/tpcds30tb/tez/cbo_query11.q.out | 8 +- .../perf/tpcds30tb/tez/cbo_query4.q.out| 12 +- .../perf/tpcds30tb/tez/cbo_query74.q.out | 8 +- .../perf/tpcds30tb/tez/query11.q.out | 44 --- .../clientpositive/perf/tpcds30tb/tez/query4.q.out | 58 + .../perf/tpcds30tb/tez/query74.q.out | 46 +++ 9 files changed, 291 insertions(+), 134 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java index 192fb682e13..8f6bb61b833 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSetOpTransposeRule.java @@ -20,25 +20,17 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; import java.util.ArrayList; import java.util.List; -import org.apache.calcite.plan.RelOptPredicateList; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.SetOp; -import org.apache.calcite.rel.core.Union; -import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.rules.FilterSetOpTransposeRule; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexExecutor; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexSimplify; -import org.apache.calcite.rex.RexUnknownAs; -import org.apache.calcite.rex.RexUtil; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilderFactory; -import org.apache.calcite.util.Util; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; @@ -55,17 +47,12 @@ public class HiveFilterSetOpTransposeRule extends FilterSetOpTransposeRule { * Union * / \ * Op1 Op2 - * * to * Union * /\ * FIL * | | * Op1 Op2 - * - * - * It additionally can remove branch(es) of filter if it's able to determine - * that they are going to generate an empty result set. */ private HiveFilterSetOpTransposeRule(RelBuilderFactory relBuilderFactory) { super(relBuilderFactory); @@ -85,57 +72,30 @@ public class HiveFilterSetOpTransposeRule extends FilterSetOpTransposeRule { //~ Methods - // implement RelOptRule - // We override the rule in order to do union all branch elimination + @Override public void onMatch(RelOptRuleCall call) { -Filter filterRel = call.rel(0); -SetOp setOp = call.rel(1); +final Filter filterRel = call.rel(0); +final SetOp setOp = call.rel(1); -RexNode condition = filterRel.getCondition(); +final RexNode condition = filterRel.getCondition(); // create filters on top of each setop child, modifying the filter // condition to reference each setop child -RexBuilder rexBuilder = filterRel.getCluster().getRexBuilder(); +final RexBuilder rexBuilder = filterRel.getCluster().getRexBuilder(); final RelBuilder relBuilder = call.builder(); -List origFields = setOp.getRowType().getFieldList(); -int[] adjustments = new int[origFields.size()]; +final List origFields = setOp.getRowType().getFieldList(); +final int[] adjustments = new int[origFields.size()]; final List newSetOpInputs = new ArrayList<>(); -RelNode lastInput = null; + for (int index = 0; index < setOp.getInputs().size(); index++) { RelNode input = setOp.getInput(index); RexNode newCondition = condition.accept(new RelOptUtil.RexInputConverter(rexBuilder, origFields, input.getRowType().getFieldList(), adjustments)); - if (setOp instanceof
[hive] branch master updated: HIVE-26685: Improve path name escaping/unescaping (#3721)
This is an automated email from the ASF dual-hosted git repository. weiz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new dbe2a323351 HIVE-26685: Improve path name escaping/unescaping (#3721) dbe2a323351 is described below commit dbe2a323351b7a0196fc7834023b9bc28cd3244e Author: James Petty AuthorDate: Mon Dec 5 13:04:54 2022 -0500 HIVE-26685: Improve path name escaping/unescaping (#3721) --- .../org/apache/hadoop/hive/common/FileUtils.java | 38 +++--- .../apache/hadoop/hive/common/TestFileUtils.java | 8 + 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java index 37ff2c04dc2..17169d6e184 100644 --- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java @@ -258,6 +258,11 @@ public final class FileUtils { } } + /** + * Hex encoding characters indexed by integer value + */ + private static final char[] HEX_UPPER_CHARS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; + static boolean needsEscaping(char c) { return c < charToEscape.size() && charToEscape.get(c); } @@ -287,12 +292,28 @@ public final class FileUtils { } } -StringBuilder sb = new StringBuilder(); +// Fast-path detection, no escaping and therefore no copying necessary +int firstEscapeIndex = -1; for (int i = 0; i < path.length(); i++) { + if (needsEscaping(path.charAt(i))) { +firstEscapeIndex = i; +break; + } +} +if (firstEscapeIndex == -1) { + return path; +} + +// slow path, escape beyond the first required escape character into a new string +StringBuilder sb = new StringBuilder(); +if (firstEscapeIndex > 0) { + sb.append(path, 0, firstEscapeIndex); +} + +for (int i = firstEscapeIndex; i < path.length(); i++) { char c = path.charAt(i); if (needsEscaping(c)) { -sb.append('%'); -sb.append(String.format("%1$02X", (int) c)); +sb.append('%').append(HEX_UPPER_CHARS[(0xF0 & c) >>> 4]).append(HEX_UPPER_CHARS[(0x0F & c)]); } else { sb.append(c); } @@ -301,8 +322,17 @@ public final class FileUtils { } public static String unescapePathName(String path) { +int firstUnescapeIndex = path.indexOf('%'); +if (firstUnescapeIndex == -1) { + return path; +} + StringBuilder sb = new StringBuilder(); -for (int i = 0; i < path.length(); i++) { +if (firstUnescapeIndex > 0) { + sb.append(path, 0, firstUnescapeIndex); +} + +for (int i = firstUnescapeIndex; i < path.length(); i++) { char c = path.charAt(i); if (c == '%' && i + 2 < path.length()) { int code = -1; diff --git a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java index 2721deb7a03..9ffb52ba5f9 100644 --- a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java +++ b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java @@ -303,6 +303,14 @@ public class TestFileUtils { assertEquals(1, assertExpectedFilePaths(itr, Collections.singletonList("mock:/tmp/dummy"))); } + @Test + public void testPathEscapeChars() { +StringBuilder sb = new StringBuilder(); +FileUtils.charToEscape.stream().forEach(integer -> sb.append((char) integer)); +String path = sb.toString(); +assertEquals(path, FileUtils.unescapePathName(FileUtils.escapePathName(path))); + } + private int assertExpectedFilePaths(RemoteIterator lfs, List expectedPaths) throws Exception { int count = 0;
[hive] branch master updated: HIVE-26683: Sum windowing function returns wrong value when all nulls. (#3800)
This is an automated email from the ASF dual-hosted git repository. rameshkumar pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 10805bc997d HIVE-26683: Sum windowing function returns wrong value when all nulls. (#3800) 10805bc997d is described below commit 10805bc997d7cd136b85fca9200cf165ffe2eae5 Author: scarlin-cloudera <55709772+scarlin-cloud...@users.noreply.github.com> AuthorDate: Mon Dec 5 08:58:15 2022 -0800 HIVE-26683: Sum windowing function returns wrong value when all nulls. (#3800) * HIVE-26683: Sum windowing function returns wrong value when all nulls. The sum windowing function is returning an incorrect value when all the "following" rows are null. The correct value for sum when all the rows are null is "null". A new member variable had to be added to track for nulls. It uses the same algorithm that is used for sums. The sums are tracked by keeping a running sum across all the rows and subtracting off the running sum outside the window. Likewise, we keep track of a running non null row count for the current row and subtract the non null row count of the row that is leaving the window. * empty --- .../hadoop/hive/ql/udf/generic/GenericUDAFSum.java | 106 +++--- .../clientpositive/windowing_sum_following_null.q | 30 + .../llap/windowing_sum_following_null.q.out| 124 + 3 files changed, 220 insertions(+), 40 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java index 6ce8734e8f0..40c7a7d7b5e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java @@ -139,9 +139,17 @@ public class GenericUDAFSum extends AbstractGenericUDAFResolver { */ public static abstract class GenericUDAFSumEvaluator extends GenericUDAFEvaluator { static abstract class SumAgg extends AbstractAggregationBuffer { - boolean empty; T sum; HashSet uniqueObjects; // Unique rows. + // HIVE-26683: Tracks the number of non null rows. If all the rows are null, then the sum of + // them is null. The count is needed for tracking in windowing frames. Windowing frames + // keep a running count of the sum and subtract off entries as the window moves. In order + // to process nulls within this same framework, we track the number of non null rows and + // also subtract off the number of entries as the window moves. If the current running count + // of non null rows is and the number of non null rows in the entry leaving the window + // is also then we know all the entries within the window are null and can return null + // for the sum. + long nonNullCount; } protected PrimitiveObjectInspector inputOI; @@ -267,9 +275,9 @@ public class GenericUDAFSum extends AbstractGenericUDAFResolver { @Override public void reset(AggregationBuffer agg) throws HiveException { SumAgg bdAgg = (SumAgg) agg; - bdAgg.empty = true; bdAgg.sum = new HiveDecimalWritable(0); bdAgg.uniqueObjects = null; + bdAgg.nonNullCount = 0; } boolean warned = false; @@ -279,7 +287,7 @@ public class GenericUDAFSum extends AbstractGenericUDAFResolver { assert (parameters.length == 1); try { if (isEligibleValue((SumHiveDecimalWritableAgg) agg, parameters[0])) { - ((SumHiveDecimalWritableAgg)agg).empty = false; + ((SumHiveDecimalWritableAgg)agg).nonNullCount++; ((SumHiveDecimalWritableAgg)agg).sum.mutateAdd( PrimitiveObjectInspectorUtils.getHiveDecimal(parameters[0], inputOI)); } @@ -303,12 +311,12 @@ public class GenericUDAFSum extends AbstractGenericUDAFResolver { return; } -myagg.empty = false; if (isWindowingDistinct()) { throw new HiveException("Distinct windowing UDAF doesn't support merge and terminatePartial"); } else { // If partial is NULL, then there was an overflow and myagg.sum will be marked as not set. myagg.sum.mutateAdd(PrimitiveObjectInspectorUtils.getHiveDecimal(partial, inputOI)); + myagg.nonNullCount++; } } } @@ -316,7 +324,7 @@ public class GenericUDAFSum extends AbstractGenericUDAFResolver { @Override public Object terminate(AggregationBuffer agg) throws HiveException { SumHiveDecimalWritableAgg myagg = (SumHiveDecimalWritableAgg) agg; - if (myagg.empty || myagg.sum == null || !myagg.sum.isSet()) { + if (myagg.nonNullCount == 0 || myagg.sum == null || !myagg.sum.isSet()) { return null; } DecimalTypeInfo decimalTypeInfo
[hive] branch master updated: HIVE-26737: Subquery returning wrong results when database has materialized views (Steve Carlin, reviewed by Krisztian Kasa)
This is an automated email from the ASF dual-hosted git repository. krisztiankasa pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 5916236ac62 HIVE-26737: Subquery returning wrong results when database has materialized views (Steve Carlin, reviewed by Krisztian Kasa) 5916236ac62 is described below commit 5916236ac6205fd5add66593ee09bd3cf6e1b19f Author: scarlin-cloudera <55709772+scarlin-cloud...@users.noreply.github.com> AuthorDate: Mon Dec 5 00:49:51 2022 -0800 HIVE-26737: Subquery returning wrong results when database has materialized views (Steve Carlin, reviewed by Krisztian Kasa) * HIVE-26737: Subquery returning wrong results when database has materialized views When there is a materialized view in the materialized view registry, the HiveMaterializedViewASTSubQueryRewriteShuttle runs and rewrites some RelNodes and RexNodes. On creation time, the HivePlannerContext is given the RexSubquery nodes which are used to see if it is a correlated subquery with an agg. In the case where the RexSubQuery was rewritten, the Context contains references to stale nodes. It loses the correlated subquery information and creates an incorrect query plan. The boolean check for subqueries with agg is now done within the Calcite nodes rather than when checking the ASTNodes. It was only used at rule time, so it made more sense for the calculation of the boolean value to be done there, and it's safer in the long run (as opposed to just update the global context when a new RexSubQuery is created). The HiveFilter and HiveProject will contain the structure holding the calculated correlation information. The information is done with a lazy fetch and only calculated when needed. The HiveCorrelationInfo structure only contains information for the current subquery level, similar to the old code. A correlated variable cannot go down to a subquery within a subquery at this point. --- .../ql/optimizer/calcite/HivePlannerContext.java | 14 +- .../correlation/CorrelationInfoVisitor.java| 167 + .../calcite/correlation/HiveCorrelationInfo.java | 108 + .../optimizer/calcite/reloperators/HiveFilter.java | 103 +++- .../calcite/reloperators/HiveProject.java | 34 +++ .../calcite/rules/HiveSubQueryRemoveRule.java | 46 ++-- .../hadoop/hive/ql/parse/CalcitePlanner.java | 22 +- .../apache/hadoop/hive/ql/parse/QBSubQuery.java| 52 +--- .../apache/hadoop/hive/ql/parse/SubQueryUtils.java | 11 +- .../calcite/TestCBORuleFiredOnlyOnce.java | 3 +- .../clientpositive/subquery_with_corr_and_mv.q | 57 + .../clientpositive/llap/subquery_scalar.q.out | 14 +- .../llap/subquery_with_corr_and_mv.q.out | 264 + .../perf/tpcds30tb/tez/cbo_query32.q.out | 6 +- .../perf/tpcds30tb/tez/cbo_query92.q.out | 6 +- .../perf/tpcds30tb/tez/query32.q.out | 10 +- .../perf/tpcds30tb/tez/query92.q.out | 10 +- 17 files changed, 721 insertions(+), 206 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java index 3a86140fa73..08e82a91cde 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java @@ -19,33 +19,24 @@ package org.apache.hadoop.hive.ql.optimizer.calcite; import org.apache.calcite.config.CalciteConnectionConfig; import org.apache.calcite.plan.Context; -import org.apache.calcite.rel.RelNode; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; import org.apache.hadoop.hive.ql.plan.mapper.StatsSource; -import java.util.Set; - - public class HivePlannerContext implements Context { private HiveAlgorithmsConf algoConfig; private HiveRulesRegistry registry; private CalciteConnectionConfig calciteConfig; - private SubqueryConf subqueryConfig; private HiveConfPlannerContext isCorrelatedColumns; private StatsSource statsSource; public HivePlannerContext(HiveAlgorithmsConf algoConfig, HiveRulesRegistry registry, - CalciteConnectionConfig calciteConfig, Set corrScalarRexSQWithAgg, + CalciteConnectionConfig calciteConfig, HiveConfPlannerContext isCorrelatedColumns, StatsSource statsSource) { this.algoConfig = algoConfig; this.registry = registry; this.calciteConfig = calciteConfig; this.statsSource = statsSource; -// this is to keep track if a subquery is correlated and contains aggregate -// this is computed in CalcitePla