This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new f829ff986e9 HIVE-27727: Materialized view query rewrite fails if query 
has decimal derived aggregate (Krisztian Kasa, reviewed by Stamatis Zampetakis)
f829ff986e9 is described below

commit f829ff986e919cc004c19770030b823d35b2281f
Author: Krisztian Kasa <[email protected]>
AuthorDate: Wed Sep 27 05:53:57 2023 +0200

    HIVE-27727: Materialized view query rewrite fails if query has decimal 
derived aggregate (Krisztian Kasa, reviewed by Stamatis Zampetakis)
---
 .../ql/optimizer/calcite/HiveTypeSystemImpl.java   | 11 ++--
 .../hadoop/hive/ql/udf/generic/GenericUDAFSum.java |  3 +-
 .../materialized_view_create_rewrite_11.q          | 18 +++++++
 .../clientpositive/llap/decimal_precision.q.out    |  2 +-
 .../llap/materialized_view_create_rewrite_11.q.out | 62 ++++++++++++++++++++++
 .../llap/materialized_view_create_rewrite_6.q.out  |  6 +--
 .../llap/vector_decimal_precision.q.out            | 12 ++---
 .../perf/tpcds30tb/tez/cbo_query13.q.out           |  2 +-
 .../perf/tpcds30tb/tez/query13.q.out               |  2 +-
 9 files changed, 101 insertions(+), 17 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTypeSystemImpl.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTypeSystemImpl.java
index 5e301fdbcbd..229801bb154 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTypeSystemImpl.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTypeSystemImpl.java
@@ -22,6 +22,8 @@ import org.apache.calcite.rel.type.RelDataTypeFactory;
 import org.apache.calcite.rel.type.RelDataTypeSystemImpl;
 import org.apache.calcite.sql.type.SqlTypeName;
 
+import static 
org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum.SUM_RESULT_PRECISION_INCREASE;
+
 public class HiveTypeSystemImpl extends RelDataTypeSystemImpl {
   // TODO: This should come from type system; Currently there is no definition
   // in type system for this.
@@ -178,10 +180,11 @@ public class HiveTypeSystemImpl extends 
RelDataTypeSystemImpl {
         // In Hive, SUM aggregate on decimal column will add 10 to compute
         // the output precision; see
         // 
GenericUDAFSum.GenericUDAFSumHiveDecimal#getOutputDecimalTypeInfoForSum
-        return typeFactory.createSqlType(
-            SqlTypeName.DECIMAL,
-            Math.min(MAX_DECIMAL_PRECISION, argumentType.getPrecision() + 10),
-            argumentType.getScale());
+        return typeFactory.createTypeWithNullability(
+            typeFactory.createSqlType(
+                SqlTypeName.DECIMAL,
+                Math.min(MAX_DECIMAL_PRECISION, argumentType.getPrecision() + 
SUM_RESULT_PRECISION_INCREASE),
+                argumentType.getScale()), argumentType.isNullable());
     }
     return argumentType;
   }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
index 40c7a7d7b5e..a78b5f5734d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
@@ -60,6 +60,7 @@ import org.slf4j.LoggerFactory;
 public class GenericUDAFSum extends AbstractGenericUDAFResolver {
 
   static final Logger LOG = 
LoggerFactory.getLogger(GenericUDAFSum.class.getName());
+  public static final int SUM_RESULT_PRECISION_INCREASE = 10;
 
   @Override
   public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
@@ -252,7 +253,7 @@ public class GenericUDAFSum extends 
AbstractGenericUDAFResolver {
       // 10b rows. The scale is the same as the input.
       DecimalTypeInfo outputTypeInfo = null;
       if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
-        int precision = Math.min(HiveDecimal.MAX_PRECISION, inputPrecision + 
10);
+        int precision = Math.min(HiveDecimal.MAX_PRECISION, inputPrecision + 
SUM_RESULT_PRECISION_INCREASE);
         outputTypeInfo = TypeInfoFactory.getDecimalTypeInfo(precision, 
inputScale);
       } else {
         outputTypeInfo = TypeInfoFactory.getDecimalTypeInfo(inputPrecision, 
inputScale);
diff --git 
a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_11.q 
b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_11.q
new file mode 100644
index 00000000000..b7933e5949d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_11.q
@@ -0,0 +1,18 @@
+-- Test rewrite when query has aggregate can be derived from MV aggregates: 
avg(x) = sum(x)/count(x)
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+set hive.stats.autogather=false;
+
+create table t1 (a int, b decimal(3,2)) stored as orc TBLPROPERTIES 
('transactional'='true');
+
+insert into t1 values (1,1), (2,1), (3,3);
+
+create materialized view mv1 as
+select a, sum(b), count(b) from t1 group by a;
+
+explain cbo
+select a, avg(b) from t1 group by a;
+
+select a, avg(b) from t1 group by a;
diff --git a/ql/src/test/results/clientpositive/llap/decimal_precision.q.out 
b/ql/src/test/results/clientpositive/llap/decimal_precision.q.out
index 32c7fc7844a..66dca467fd8 100644
--- a/ql/src/test/results/clientpositive/llap/decimal_precision.q.out
+++ b/ql/src/test/results/clientpositive/llap/decimal_precision.q.out
@@ -575,7 +575,7 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
-                  expressions: CAST( (_col0 / _col1) AS decimal(24,14)) (type: 
decimal(24,14)), CAST( _col0 AS decimal(30,10)) (type: decimal(30,10))
+                  expressions: CAST( (_col0 / _col1) AS decimal(24,14)) (type: 
decimal(24,14)), _col0 (type: decimal(30,10))
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE 
Column stats: NONE
                   File Output Operator
diff --git 
a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_11.q.out
 
b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_11.q.out
new file mode 100644
index 00000000000..34087c975a5
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_11.q.out
@@ -0,0 +1,62 @@
+PREHOOK: query: create table t1 (a int, b decimal(3,2)) stored as orc 
TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int, b decimal(3,2)) stored as orc 
TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: insert into t1 values (1,1), (2,1), (3,3)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1 values (1,1), (2,1), (3,3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.a SCRIPT []
+POSTHOOK: Lineage: t1.b SCRIPT []
+PREHOOK: query: create materialized view mv1 as
+select a, sum(b), count(b) from t1 group by a
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mv1
+POSTHOOK: query: create materialized view mv1 as
+select a, sum(b), count(b) from t1 group by a
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mv1
+POSTHOOK: Lineage: mv1._c1 EXPRESSION [(t1)t1.FieldSchema(name:b, 
type:decimal(3,2), comment:null), ]
+POSTHOOK: Lineage: mv1._c2 EXPRESSION [(t1)t1.FieldSchema(name:b, 
type:decimal(3,2), comment:null), ]
+POSTHOOK: Lineage: mv1.a SIMPLE [(t1)t1.FieldSchema(name:a, type:int, 
comment:null), ]
+PREHOOK: query: explain cbo
+select a, avg(b) from t1 group by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mv1
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select a, avg(b) from t1 group by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mv1
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(a=[$0], _o__c1=[CAST(/($1, $2)):DECIMAL(7, 6)])
+  HiveTableScan(table=[[default, mv1]], table:alias=[default.mv1])
+
+PREHOOK: query: select a, avg(b) from t1 group by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mv1
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select a, avg(b) from t1 group by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mv1
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+3      3.000000
+1      1.000000
+2      1.000000
diff --git 
a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_6.q.out
 
b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_6.q.out
index 49c23f6da6d..baeed8afc97 100644
--- 
a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_6.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_6.q.out
@@ -246,7 +246,7 @@ POSTHOOK: Input: default@t2
 POSTHOOK: Output: default@mat1
 POSTHOOK: Output: default@mat1
 CBO PLAN:
-HiveProject(a=[$5], _o__c1=[CAST(CASE(IS NULL($1), $6, +($6, $1))):DECIMAL(17, 
2)], _o__c2=[CASE(IS NULL($2), $7, +($7, $2))], _o__c3=[CAST(/(CAST(CASE(IS 
NULL($1), $6, +($6, $1))):DECIMAL(17, 2), CASE(IS NULL($2), $7, +($7, 
$2)))):DECIMAL(11, 6)], _o__c4=[CASE(IS NULL($3), $8, +($8, $3))])
+HiveProject(a=[$5], _o__c1=[CAST(CASE(IS NULL($1), $6, IS NULL($6), $1, +($6, 
$1))):DECIMAL(17, 2)], _o__c2=[CASE(IS NULL($2), $7, +($7, $2))], 
_o__c3=[CAST(/(CAST(CASE(IS NULL($1), $6, IS NULL($6), $1, +($6, 
$1))):DECIMAL(17, 2), CASE(IS NULL($2), $7, +($7, $2)))):DECIMAL(11, 6)], 
_o__c4=[CASE(IS NULL($3), $8, +($8, $3))])
   HiveFilter(condition=[OR(AND($4, OR(AND(IS NULL($3), =($8, 0)), AND(=(+($8, 
$3), 0), IS NOT NULL($3)))), AND(IS NULL($4), OR(AND(IS NULL($3), >($8, 0)), 
AND(>(+($8, $3), 0), IS NOT NULL($3)))), AND($4, OR(AND(IS NULL($3), >($8, 0)), 
AND(>(+($8, $3), 0), IS NOT NULL($3)))))])
     HiveJoin(condition=[IS NOT DISTINCT FROM($0, $5)], joinType=[right], 
algorithm=[none], cost=[not available])
       HiveProject(a=[$0], _c1=[$1], _c2=[$2], _c4=[$4], $f4=[true])
@@ -413,7 +413,7 @@ STAGE PLANS:
                   predicate: (_col4 and ((_col3 is null and (_col9 > 0L)) or 
(((_col9 + _col3) > 0) and _col3 is not null))) (type: boolean)
                   Statistics: Num rows: 1 Data size: 429 Basic stats: COMPLETE 
Column stats: COMPLETE
                   Select Operator
-                    expressions: _col6 (type: char(15)), CAST( if(_col1 is 
null, _col7, (_col7 + _col1)) AS decimal(17,2)) (type: decimal(17,2)), if(_col2 
is null, _col8, (_col8 + _col2)) (type: bigint), CAST( (CAST( if(_col1 is null, 
_col7, (_col7 + _col1)) AS decimal(17,2)) / if(_col2 is null, _col8, (_col8 + 
_col2))) AS decimal(11,6)) (type: decimal(11,6)), if(_col3 is null, _col9, 
(_col9 + _col3)) (type: bigint)
+                    expressions: _col6 (type: char(15)), CAST( CASE WHEN 
(_col1 is null) THEN (_col7) WHEN (_col7 is null) THEN (_col1) ELSE ((_col7 + 
_col1)) END AS decimal(17,2)) (type: decimal(17,2)), if(_col2 is null, _col8, 
(_col8 + _col2)) (type: bigint), CAST( (CAST( CASE WHEN (_col1 is null) THEN 
(_col7) WHEN (_col7 is null) THEN (_col1) ELSE ((_col7 + _col1)) END AS 
decimal(17,2)) / if(_col2 is null, _col8, (_col8 + _col2))) AS decimal(11,6)) 
(type: decimal(11,6)), if(_col3 is n [...]
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4
                     Statistics: Num rows: 1 Data size: 333 Basic stats: 
COMPLETE Column stats: COMPLETE
                     File Output Operator
@@ -444,7 +444,7 @@ STAGE PLANS:
                   predicate: (_col4 is null and ((_col3 is null and (_col9 > 
0L)) or (((_col9 + _col3) > 0) and _col3 is not null))) (type: boolean)
                   Statistics: Num rows: 1 Data size: 429 Basic stats: COMPLETE 
Column stats: COMPLETE
                   Select Operator
-                    expressions: _col6 (type: char(15)), CAST( if(_col1 is 
null, _col7, (_col7 + _col1)) AS decimal(17,2)) (type: decimal(17,2)), if(_col2 
is null, _col8, (_col8 + _col2)) (type: bigint), CAST( (CAST( if(_col1 is null, 
_col7, (_col7 + _col1)) AS decimal(17,2)) / if(_col2 is null, _col8, (_col8 + 
_col2))) AS decimal(11,6)) (type: decimal(11,6)), if(_col3 is null, _col9, 
(_col9 + _col3)) (type: bigint)
+                    expressions: _col6 (type: char(15)), CAST( CASE WHEN 
(_col1 is null) THEN (_col7) WHEN (_col7 is null) THEN (_col1) ELSE ((_col7 + 
_col1)) END AS decimal(17,2)) (type: decimal(17,2)), if(_col2 is null, _col8, 
(_col8 + _col2)) (type: bigint), CAST( (CAST( CASE WHEN (_col1 is null) THEN 
(_col7) WHEN (_col7 is null) THEN (_col1) ELSE ((_col7 + _col1)) END AS 
decimal(17,2)) / if(_col2 is null, _col8, (_col8 + _col2))) AS decimal(11,6)) 
(type: decimal(11,6)), if(_col3 is n [...]
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4
                     Statistics: Num rows: 1 Data size: 333 Basic stats: 
COMPLETE Column stats: COMPLETE
                     File Output Operator
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out 
b/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out
index b215702ffd3..25ab3f04fd3 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out
@@ -664,13 +664,13 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Select Operator
-                  expressions: CAST( (_col0 / _col1) AS decimal(24,14)) (type: 
decimal(24,14)), CAST( _col0 AS decimal(30,10)) (type: decimal(30,10))
+                  expressions: CAST( (_col0 / _col1) AS decimal(24,14)) (type: 
decimal(24,14)), _col0 (type: decimal(30,10))
                   outputColumnNames: _col0, _col1
                   Select Vectorization:
                       className: VectorSelectOperator
                       native: true
-                      projectedOutputColumnNums: [4, 5]
-                      selectExpressions: CastDecimalToDecimal(col 
3:decimal(38,18))(children: DecimalColDivideDecimalColumn(col 0:decimal(30,10), 
col 2:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 
2:decimal(19,0)) -> 3:decimal(38,18)) -> 4:decimal(24,14), 
CastDecimalToDecimal(col 0:decimal(30,10)) -> 5:decimal(30,10)
+                      projectedOutputColumnNums: [4, 0]
+                      selectExpressions: CastDecimalToDecimal(col 
3:decimal(38,18))(children: DecimalColDivideDecimalColumn(col 0:decimal(30,10), 
col 2:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 
2:decimal(19,0)) -> 3:decimal(38,18)) -> 4:decimal(24,14)
                   Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE 
Column stats: COMPLETE
                   File Output Operator
                     compressed: false
@@ -1288,13 +1288,13 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
-                  expressions: CAST( (_col0 / _col1) AS decimal(24,14)) (type: 
decimal(24,14)), CAST( _col0 AS decimal(30,10)) (type: decimal(30,10))
+                  expressions: CAST( (_col0 / _col1) AS decimal(24,14)) (type: 
decimal(24,14)), _col0 (type: decimal(30,10))
                   outputColumnNames: _col0, _col1
                   Select Vectorization:
                       className: VectorSelectOperator
                       native: true
-                      projectedOutputColumnNums: [4, 5]
-                      selectExpressions: CastDecimalToDecimal(col 
3:decimal(38,18))(children: DecimalColDivideDecimalColumn(col 0:decimal(30,10), 
col 2:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 
2:decimal(19,0)) -> 3:decimal(38,18)) -> 4:decimal(24,14), 
CastDecimalToDecimal(col 0:decimal(30,10)) -> 5:decimal(30,10)
+                      projectedOutputColumnNums: [4, 0]
+                      selectExpressions: CastDecimalToDecimal(col 
3:decimal(38,18))(children: DecimalColDivideDecimalColumn(col 0:decimal(30,10), 
col 2:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 
2:decimal(19,0)) -> 3:decimal(38,18)) -> 4:decimal(24,14)
                   Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE 
Column stats: NONE
                   File Output Operator
                     compressed: false
diff --git 
a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query13.q.out 
b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query13.q.out
index aaeea7af89a..f7b701009cc 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query13.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query13.q.out
@@ -1,5 +1,5 @@
 CBO PLAN:
-HiveProject(_o__c0=[/(CAST($0):DOUBLE, $1)], _o__c1=[CAST(/($2, 
$3)):DECIMAL(11, 6)], _o__c2=[CAST(/($4, $5)):DECIMAL(11, 6)], 
_o__c3=[CAST($4):DECIMAL(17, 2)])
+HiveProject(_o__c0=[/(CAST($0):DOUBLE, $1)], _o__c1=[CAST(/($2, 
$3)):DECIMAL(11, 6)], _o__c2=[CAST(/($4, $5)):DECIMAL(11, 6)], _o__c3=[$4])
   HiveAggregate(group=[{}], agg#0=[sum($3)], agg#1=[count($3)], 
agg#2=[sum($4)], agg#3=[count($4)], agg#4=[sum($5)], agg#5=[count($5)])
     HiveJoin(condition=[AND(=($21, $0), OR(AND($22, $23, $10, $19), AND($24, 
$25, $11, $20), AND($26, $27, $12, $20)))], joinType=[inner], algorithm=[none], 
cost=[not available])
       HiveJoin(condition=[=($1, $18)], joinType=[inner], algorithm=[none], 
cost=[not available])
diff --git 
a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query13.q.out 
b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query13.q.out
index 0b11dce5b5e..113957a1384 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query13.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query13.q.out
@@ -200,7 +200,7 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                 Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Select Operator
-                  expressions: (UDFToDouble(_col0) / _col1) (type: double), 
CAST( (_col2 / _col3) AS decimal(11,6)) (type: decimal(11,6)), CAST( (_col4 / 
_col5) AS decimal(11,6)) (type: decimal(11,6)), CAST( _col4 AS decimal(17,2)) 
(type: decimal(17,2))
+                  expressions: (UDFToDouble(_col0) / _col1) (type: double), 
CAST( (_col2 / _col3) AS decimal(11,6)) (type: decimal(11,6)), CAST( (_col4 / 
_col5) AS decimal(11,6)) (type: decimal(11,6)), _col4 (type: decimal(17,2))
                   outputColumnNames: _col0, _col1, _col2, _col3
                   Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE 
Column stats: COMPLETE
                   File Output Operator

Reply via email to