zabetak commented on code in PR #5978:
URL: https://github.com/apache/hive/pull/5978#discussion_r2213810142


##########
ql/src/test/results/clientpositive/llap/udtf_with_unionall.q.out:
##########
@@ -0,0 +1,168 @@
+PREHOOK: query: SELECT STACK(3,'A',10,date '2015-01-01','z','B',20,date 
'2016-01-01','y','C',30,date '2017-08-09','x') AS (col0,col1,col2,col3)
+  UNION ALL
+ SELECT STACK(3,'A',10,date '2015-01-01','n','B',20,date 
'2016-01-01','m','C',30,date '2017-08-09','l') AS (col0,col1,col2,col3)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT STACK(3,'A',10,date '2015-01-01','z','B',20,date 
'2016-01-01','y','C',30,date '2017-08-09','x') AS (col0,col1,col2,col3)
+  UNION ALL
+ SELECT STACK(3,'A',10,date '2015-01-01','n','B',20,date 
'2016-01-01','m','C',30,date '2017-08-09','l') AS (col0,col1,col2,col3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+A      10      2015-01-01      z
+B      20      2016-01-01      y
+C      30      2017-08-09      x
+A      10      2015-01-01      n
+B      20      2016-01-01      m
+C      30      2017-08-09      l
+PREHOOK: query: EXPLAIN CBO SELECT stack(3,'A',10,date 
'2015-01-01','z','B',20,date '2016-01-01','y','C',30,date '2017-08-09','x') AS 
(col0,col1,col2,col3)
+  UNION ALL
+ SELECT STACK(3,'A',10,date '2015-01-01','n','B',20,date 
'2016-01-01','m','C',30,date '2017-08-09','l') AS (col0,col1,col2,col3)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO SELECT stack(3,'A',10,date 
'2015-01-01','z','B',20,date '2016-01-01','y','C',30,date '2017-08-09','x') AS 
(col0,col1,col2,col3)
+  UNION ALL
+ SELECT STACK(3,'A',10,date '2015-01-01','n','B',20,date 
'2016-01-01','m','C',30,date '2017-08-09','l') AS (col0,col1,col2,col3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+CBO PLAN:
+HiveUnion(all=[true])
+  HiveProject(col0=[$0], col1=[$1], col2=[$2], col3=[$3])
+    HiveTableFunctionScan(invocation=[stack(3, 
_UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 10, 2015-01-01:DATE, 
_UTF-16LE'z':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 
_UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 20, 2016-01-01:DATE, 
_UTF-16LE'y':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 
_UTF-16LE'C':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 30, 2017-08-09:DATE, 
_UTF-16LE'x':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], 
rowType=[RecordType(VARCHAR(2147483647) col0, INTEGER col1, DATE col2, 
VARCHAR(2147483647) col3)])
+      HiveTableScan(table=[[_dummy_database, _dummy_table]], 
table:alias=[_dummy_table])
+  HiveProject(col0=[$0], col1=[$1], col2=[$2], col3=[$3])
+    HiveTableFunctionScan(invocation=[stack(3, 
_UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 10, 2015-01-01:DATE, 
_UTF-16LE'n':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 
_UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 20, 2016-01-01:DATE, 
_UTF-16LE'm':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 
_UTF-16LE'C':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 30, 2017-08-09:DATE, 
_UTF-16LE'l':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], 
rowType=[RecordType(VARCHAR(2147483647) col0, INTEGER col1, DATE col2, 
VARCHAR(2147483647) col3)])

Review Comment:
   The use of `STRING` literals lead into plans with lots of boilerplate info 
`_UTF-16LE'l':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"`. If you were using 
`INTEGER` literals the plans would be more readable. Anyways that's a minor 
point so you can ignore for now but maybe worth considering for future PRs.



##########
ql/src/test/results/clientpositive/llap/udtf_with_unionall.q.out:
##########
@@ -0,0 +1,168 @@
+PREHOOK: query: SELECT STACK(3,'A',10,date '2015-01-01','z','B',20,date 
'2016-01-01','y','C',30,date '2017-08-09','x') AS (col0,col1,col2,col3)
+  UNION ALL
+ SELECT STACK(3,'A',10,date '2015-01-01','n','B',20,date 
'2016-01-01','m','C',30,date '2017-08-09','l') AS (col0,col1,col2,col3)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT STACK(3,'A',10,date '2015-01-01','z','B',20,date 
'2016-01-01','y','C',30,date '2017-08-09','x') AS (col0,col1,col2,col3)
+  UNION ALL
+ SELECT STACK(3,'A',10,date '2015-01-01','n','B',20,date 
'2016-01-01','m','C',30,date '2017-08-09','l') AS (col0,col1,col2,col3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+A      10      2015-01-01      z
+B      20      2016-01-01      y
+C      30      2017-08-09      x
+A      10      2015-01-01      n
+B      20      2016-01-01      m
+C      30      2017-08-09      l
+PREHOOK: query: EXPLAIN CBO SELECT stack(3,'A',10,date 
'2015-01-01','z','B',20,date '2016-01-01','y','C',30,date '2017-08-09','x') AS 
(col0,col1,col2,col3)
+  UNION ALL
+ SELECT STACK(3,'A',10,date '2015-01-01','n','B',20,date 
'2016-01-01','m','C',30,date '2017-08-09','l') AS (col0,col1,col2,col3)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO SELECT stack(3,'A',10,date 
'2015-01-01','z','B',20,date '2016-01-01','y','C',30,date '2017-08-09','x') AS 
(col0,col1,col2,col3)
+  UNION ALL
+ SELECT STACK(3,'A',10,date '2015-01-01','n','B',20,date 
'2016-01-01','m','C',30,date '2017-08-09','l') AS (col0,col1,col2,col3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+CBO PLAN:
+HiveUnion(all=[true])
+  HiveProject(col0=[$0], col1=[$1], col2=[$2], col3=[$3])
+    HiveTableFunctionScan(invocation=[stack(3, 
_UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 10, 2015-01-01:DATE, 
_UTF-16LE'z':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 
_UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 20, 2016-01-01:DATE, 
_UTF-16LE'y':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 
_UTF-16LE'C':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 30, 2017-08-09:DATE, 
_UTF-16LE'x':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], 
rowType=[RecordType(VARCHAR(2147483647) col0, INTEGER col1, DATE col2, 
VARCHAR(2147483647) col3)])
+      HiveTableScan(table=[[_dummy_database, _dummy_table]], 
table:alias=[_dummy_table])
+  HiveProject(col0=[$0], col1=[$1], col2=[$2], col3=[$3])
+    HiveTableFunctionScan(invocation=[stack(3, 
_UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 10, 2015-01-01:DATE, 
_UTF-16LE'n':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 
_UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 20, 2016-01-01:DATE, 
_UTF-16LE'm':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 
_UTF-16LE'C':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 30, 2017-08-09:DATE, 
_UTF-16LE'l':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], 
rowType=[RecordType(VARCHAR(2147483647) col0, INTEGER col1, DATE col2, 
VARCHAR(2147483647) col3)])
+      HiveTableScan(table=[[_dummy_database, _dummy_table]], 
table:alias=[_dummy_table])
+
+PREHOOK: query: SELECT * FROM (VALUES(1, '1'), (2, 'orange'), (5, 'yellow')) 
AS Colors1
+  UNION ALL
+ SELECT * FROM (VALUES(10, 'green'), (11, 'blue'), (12, 'indigo'), (20, 
'violet')) AS Colors2
+  UNION ALL
+ SELECT STACK(2,10,'X',20,'Y')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM (VALUES(1, '1'), (2, 'orange'), (5, 'yellow')) 
AS Colors1
+  UNION ALL
+ SELECT * FROM (VALUES(10, 'green'), (11, 'blue'), (12, 'indigo'), (20, 
'violet')) AS Colors2
+  UNION ALL
+ SELECT STACK(2,10,'X',20,'Y')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+10     X
+20     Y
+1      1

Review Comment:
   The choice of `1 1` seems inconsistent while all the rest are combinations 
of integers and colours. Minor but it does appear strange.



##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveUnionSimpleSelectsToInlineTableRule.java:
##########
@@ -218,6 +218,18 @@ private boolean isInlineTableOperand(RelNode input) {
     if (input.getInputs().size() == 0) {
       return true;
     }
+    RexNode call = ((HiveTableFunctionScan) input).getCall();
+    if (!(call instanceof RexCall)) {
+      return false;
+    }
+    // there should be operands present, if not return false
+    if (((RexCall) call).getOperands().size() == 0) {
+      return false;
+    }
+    // the operands should be of type RexCall, if not return false
+    if (!(((RexCall) call).getOperands().get(0) instanceof RexCall)) {
+      return false;
+    }

Review Comment:
   The check implemented here could be translated in natural language as:
   
   "The rule must accept any expression that starts with a function call 
(RexCall), the call has some operands (not zero), and the first operand is also 
a function call (RexCall)"
   
   The check is a bit general and although it avoids the `ClassCastException` 
it doesn't seem sufficient to guarantee that the rule will behave correctly for 
arbitrary combinations of a `GenericUDTF` with other functions.
   
   Having said that solving the `ClassCastException` is a progress compared to 
the previous state so I think we can leave with this solution for the time 
being.



##########
ql/src/test/queries/clientpositive/udtf_with_unionall.q:
##########
@@ -0,0 +1,55 @@
+SELECT STACK(3,'A',10,date '2015-01-01','z','B',20,date 
'2016-01-01','y','C',30,date '2017-08-09','x') AS (col0,col1,col2,col3)
+  UNION ALL
+ SELECT STACK(3,'A',10,date '2015-01-01','n','B',20,date 
'2016-01-01','m','C',30,date '2017-08-09','l') AS (col0,col1,col2,col3);
+
+EXPLAIN SELECT stack(3,'A',10,date '2015-01-01','z','B',20,date 
'2016-01-01','y','C',30,date '2017-08-09','x') AS (col0,col1,col2,col3)
+  UNION ALL
+ SELECT STACK(3,'A',10,date '2015-01-01','n','B',20,date 
'2016-01-01','m','C',30,date '2017-08-09','l') AS (col0,col1,col2,col3);
+
+SELECT * FROM (VALUES(1, '1'), (2, 'orange'), (5, 'yellow')) AS Colors1
+  UNION ALL
+ SELECT * FROM (VALUES(10, 'green'), (11, 'blue'), (12, 'indigo'), (20, 
'violet')) AS Colors2

Review Comment:
   With your explanation I understand but the intention of the test is not 
visible somewhere. As I mentioned earlier for validating how certain rules work 
unit tests (`Test*Rule.java`) are usually faster and more explicit. I guess we 
can keep these tests now but consider adding some comments to clarify the 
expected outcome.



##########
ql/src/test/queries/clientpositive/udtf_with_unionall.q:
##########
@@ -0,0 +1,55 @@
+SELECT STACK(3,'A',10,date '2015-01-01','z','B',20,date 
'2016-01-01','y','C',30,date '2017-08-09','x') AS (col0,col1,col2,col3)
+  UNION ALL
+ SELECT STACK(3,'A',10,date '2015-01-01','n','B',20,date 
'2016-01-01','m','C',30,date '2017-08-09','l') AS (col0,col1,col2,col3);

Review Comment:
   Validating that the results are correct is a good idea so let's keep the 
tests.



##########
ql/src/test/queries/clientpositive/udtf_with_unionall.q:
##########
@@ -0,0 +1,39 @@
+SELECT STACK(3,'A',10,date '2015-01-01','z','B',20,date 
'2016-01-01','y','C',30,date '2017-08-09','x') AS (col0,col1,col2,col3)
+  UNION ALL
+ SELECT STACK(3,'A',10,date '2015-01-01','n','B',20,date 
'2016-01-01','m','C',30,date '2017-08-09','l') AS (col0,col1,col2,col3);
+
+EXPLAIN CBO SELECT stack(3,'A',10,date '2015-01-01','z','B',20,date 
'2016-01-01','y','C',30,date '2017-08-09','x') AS (col0,col1,col2,col3)
+  UNION ALL
+ SELECT STACK(3,'A',10,date '2015-01-01','n','B',20,date 
'2016-01-01','m','C',30,date '2017-08-09','l') AS (col0,col1,col2,col3);
+
+SELECT * FROM (VALUES(1, '1'), (2, 'orange'), (5, 'yellow')) AS Colors1
+  UNION ALL
+ SELECT * FROM (VALUES(10, 'green'), (11, 'blue'), (12, 'indigo'), (20, 
'violet')) AS Colors2
+  UNION ALL
+ SELECT STACK(2,10,'X',20,'Y');
+
+EXPLAIN CBO SELECT * FROM (VALUES(1, '1'), (2, 'orange'), (5, 'yellow')) AS 
Colors1
+  UNION ALL
+ SELECT * FROM (VALUES(10, 'green'), (11, 'blue'), (12, 'indigo'), (20, 
'violet')) AS Colors2
+  UNION ALL
+ SELECT STACK(2,10,'X',20,'Y');
+
+SELECT INLINE(array(struct('A',10,date '2015-01-01'),struct('B',20,date 
'2015-02-02')))
+  UNION ALL
+ SELECT INLINE(array(struct('C',30,date '2016-01-01'),struct('D',40,date 
'2016-02-02')));
+
+EXPLAIN CBO SELECT INLINE(array(struct('A',10,date 
'2015-01-01'),struct('B',20,date '2015-02-02')))
+  UNION ALL
+ SELECT INLINE(array(struct('C',30,date '2016-01-01'),struct('D',40,date 
'2016-02-02')));

Review Comment:
   This test is a bit redundant since the query below is more complex and 
validates the same point that two inline branches  are merged by the rule. It 
doesn't seem to add to test coverage.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org
For additional commands, e-mail: gitbox-h...@hive.apache.org

Reply via email to