Author: jdere
Date: Tue Sep 23 01:56:03 2014
New Revision: 1626926
URL: http://svn.apache.org/r1626926
Log:
HIVE-8052: Vectorization: min() on TimeStamp datatype fails with error "Vector
aggregate not implemented: min for type: TIMESTAMP" (Matt McCline via Jason
Dere)
Added:
hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_0.q.out
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/trunk/itests/src/test/resources/testconfiguration.properties
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
hive/trunk/ql/src/test/queries/clientpositive/vectorization_0.q
hive/trunk/ql/src/test/queries/clientpositive/vectorized_date_funcs.q
hive/trunk/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q
hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out
hive/trunk/ql/src/test/results/clientpositive/vectorization_0.q.out
hive/trunk/ql/src/test/results/clientpositive/vectorized_date_funcs.q.out
hive/trunk/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out
Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL:
http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1626926&r1=1626925&r2=1626926&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
(original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Tue
Sep 23 01:56:03 2014
@@ -1712,6 +1712,9 @@ public class HiveConf extends Configurat
HIVE_VECTORIZATION_REDUCE_ENABLED("hive.vectorized.execution.reduce.enabled",
true,
"This flag should be set to true to enable vectorized mode of the
reduce-side of query execution.\n" +
"The default value is true."),
+
HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED("hive.vectorized.execution.reduce.groupby.enabled",
true,
+ "This flag should be set to true to enable vectorized mode of the
reduce-side GROUP BY query execution.\n" +
+ "The default value is true."),
HIVE_VECTORIZATION_GROUPBY_CHECKINTERVAL("hive.vectorized.groupby.checkinterval",
100000,
"Number of entries added to the group by aggregation hash before a
recomputation of average entry size is performed."),
HIVE_VECTORIZATION_GROUPBY_MAXENTRIES("hive.vectorized.groupby.maxentries",
1000000,
Modified: hive/trunk/itests/src/test/resources/testconfiguration.properties
URL:
http://svn.apache.org/viewvc/hive/trunk/itests/src/test/resources/testconfiguration.properties?rev=1626926&r1=1626925&r2=1626926&view=diff
==============================================================================
--- hive/trunk/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/trunk/itests/src/test/resources/testconfiguration.properties Tue Sep
23 01:56:03 2014
@@ -158,6 +158,7 @@ minitez.query.files.shared=alter_merge_2
vector_left_outer_join.q,\
vector_mapjoin_reduce.q,\
vector_string_concat.q,\
+ vectorization_0.q,\
vectorization_12.q,\
vectorization_13.q,\
vectorization_14.q,\
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java?rev=1626926&r1=1626925&r2=1626926&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
Tue Sep 23 01:56:03 2014
@@ -67,6 +67,7 @@ public class VectorExpressionDescriptor
DATE (0x040),
TIMESTAMP (0x080),
DATETIME_FAMILY (DATE.value | TIMESTAMP.value),
+ INT_TIMESTAMP_FAMILY (INT_FAMILY.value | TIMESTAMP.value),
INT_DATETIME_FAMILY (INT_FAMILY.value | DATETIME_FAMILY.value),
STRING_DATETIME_FAMILY (STRING_FAMILY.value | DATETIME_FAMILY.value),
ALL_FAMILY (0xFFF);
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1626926&r1=1626925&r2=1626926&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
Tue Sep 23 01:56:03 2014
@@ -1889,47 +1889,47 @@ public class VectorizationContext {
// TODO: And, investigate if different reduce-side versions are needed for
var* and std*, or if map-side aggregate can be used.. Right now they are
conservatively
// marked map-side (HASH).
static ArrayList<AggregateDefinition> aggregatesDefinition = new
ArrayList<AggregateDefinition>() {{
- add(new AggregateDefinition("min",
VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null,
VectorUDAFMinLong.class));
- add(new AggregateDefinition("min",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null,
VectorUDAFMinDouble.class));
- add(new AggregateDefinition("min",
VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null,
VectorUDAFMinString.class));
- add(new AggregateDefinition("min",
VectorExpressionDescriptor.ArgumentType.DECIMAL, null,
VectorUDAFMinDecimal.class));
- add(new AggregateDefinition("max",
VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null,
VectorUDAFMaxLong.class));
- add(new AggregateDefinition("max",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null,
VectorUDAFMaxDouble.class));
- add(new AggregateDefinition("max",
VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null,
VectorUDAFMaxString.class));
- add(new AggregateDefinition("max",
VectorExpressionDescriptor.ArgumentType.DECIMAL, null,
VectorUDAFMaxDecimal.class));
- add(new AggregateDefinition("count",
VectorExpressionDescriptor.ArgumentType.NONE, GroupByDesc.Mode.HASH,
VectorUDAFCountStar.class));
- add(new AggregateDefinition("count",
VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFCount.class));
- add(new AggregateDefinition("count",
VectorExpressionDescriptor.ArgumentType.INT_FAMILY,
GroupByDesc.Mode.MERGEPARTIAL, VectorUDAFCountMerge.class));
- add(new AggregateDefinition("count",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFCount.class));
- add(new AggregateDefinition("count",
VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFCount.class));
- add(new AggregateDefinition("count",
VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH,
VectorUDAFCount.class));
- add(new AggregateDefinition("sum",
VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null,
VectorUDAFSumLong.class));
- add(new AggregateDefinition("sum",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null,
VectorUDAFSumDouble.class));
- add(new AggregateDefinition("sum",
VectorExpressionDescriptor.ArgumentType.DECIMAL, null,
VectorUDAFSumDecimal.class));
- add(new AggregateDefinition("avg",
VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFAvgLong.class));
- add(new AggregateDefinition("avg",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFAvgDouble.class));
- add(new AggregateDefinition("avg",
VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH,
VectorUDAFAvgDecimal.class));
- add(new AggregateDefinition("variance",
VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFVarPopLong.class));
- add(new AggregateDefinition("var_pop",
VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFVarPopLong.class));
- add(new AggregateDefinition("variance",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFVarPopDouble.class));
- add(new AggregateDefinition("var_pop",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFVarPopDouble.class));
- add(new AggregateDefinition("variance",
VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH,
VectorUDAFVarPopDecimal.class));
- add(new AggregateDefinition("var_pop",
VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH,
VectorUDAFVarPopDecimal.class));
- add(new AggregateDefinition("var_samp",
VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFVarSampLong.class));
- add(new AggregateDefinition("var_samp" ,
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFVarSampDouble.class));
- add(new AggregateDefinition("var_samp" ,
VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH,
VectorUDAFVarSampDecimal.class));
- add(new AggregateDefinition("std",
VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFStdPopLong.class));
- add(new AggregateDefinition("stddev",
VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFStdPopLong.class));
- add(new AggregateDefinition("stddev_pop",
VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFStdPopLong.class));
- add(new AggregateDefinition("std",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFStdPopDouble.class));
- add(new AggregateDefinition("stddev",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFStdPopDouble.class));
- add(new AggregateDefinition("stddev_pop",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFStdPopDouble.class));
- add(new AggregateDefinition("std",
VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH,
VectorUDAFStdPopDecimal.class));
- add(new AggregateDefinition("stddev",
VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH,
VectorUDAFStdPopDecimal.class));
- add(new AggregateDefinition("stddev_pop",
VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH,
VectorUDAFStdPopDecimal.class));
- add(new AggregateDefinition("stddev_samp",
VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFStdSampLong.class));
- add(new AggregateDefinition("stddev_samp",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH,
VectorUDAFStdSampDouble.class));
- add(new AggregateDefinition("stddev_samp",
VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH,
VectorUDAFStdSampDecimal.class));
+ add(new AggregateDefinition("min",
VectorExpressionDescriptor.ArgumentType.INT_DATETIME_FAMILY, null,
VectorUDAFMinLong.class));
+ add(new AggregateDefinition("min",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null,
VectorUDAFMinDouble.class));
+ add(new AggregateDefinition("min",
VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null,
VectorUDAFMinString.class));
+ add(new AggregateDefinition("min",
VectorExpressionDescriptor.ArgumentType.DECIMAL, null,
VectorUDAFMinDecimal.class));
+ add(new AggregateDefinition("max",
VectorExpressionDescriptor.ArgumentType.INT_DATETIME_FAMILY, null,
VectorUDAFMaxLong.class));
+ add(new AggregateDefinition("max",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null,
VectorUDAFMaxDouble.class));
+ add(new AggregateDefinition("max",
VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null,
VectorUDAFMaxString.class));
+ add(new AggregateDefinition("max",
VectorExpressionDescriptor.ArgumentType.DECIMAL, null,
VectorUDAFMaxDecimal.class));
+ add(new AggregateDefinition("count",
VectorExpressionDescriptor.ArgumentType.NONE,
GroupByDesc.Mode.HASH, VectorUDAFCountStar.class));
+ add(new AggregateDefinition("count",
VectorExpressionDescriptor.ArgumentType.INT_DATETIME_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFCount.class));
+ add(new AggregateDefinition("count",
VectorExpressionDescriptor.ArgumentType.INT_FAMILY,
GroupByDesc.Mode.MERGEPARTIAL, VectorUDAFCountMerge.class));
+ add(new AggregateDefinition("count",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFCount.class));
+ add(new AggregateDefinition("count",
VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFCount.class));
+ add(new AggregateDefinition("count",
VectorExpressionDescriptor.ArgumentType.DECIMAL,
GroupByDesc.Mode.HASH, VectorUDAFCount.class));
+ add(new AggregateDefinition("sum",
VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null,
VectorUDAFSumLong.class));
+ add(new AggregateDefinition("sum",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null,
VectorUDAFSumDouble.class));
+ add(new AggregateDefinition("sum",
VectorExpressionDescriptor.ArgumentType.DECIMAL, null,
VectorUDAFSumDecimal.class));
+ add(new AggregateDefinition("avg",
VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFAvgLong.class));
+ add(new AggregateDefinition("avg",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFAvgDouble.class));
+ add(new AggregateDefinition("avg",
VectorExpressionDescriptor.ArgumentType.DECIMAL,
GroupByDesc.Mode.HASH, VectorUDAFAvgDecimal.class));
+ add(new AggregateDefinition("variance",
VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class));
+ add(new AggregateDefinition("var_pop",
VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class));
+ add(new AggregateDefinition("variance",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class));
+ add(new AggregateDefinition("var_pop",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class));
+ add(new AggregateDefinition("variance",
VectorExpressionDescriptor.ArgumentType.DECIMAL,
GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class));
+ add(new AggregateDefinition("var_pop",
VectorExpressionDescriptor.ArgumentType.DECIMAL,
GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class));
+ add(new AggregateDefinition("var_samp",
VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFVarSampLong.class));
+ add(new AggregateDefinition("var_samp" ,
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFVarSampDouble.class));
+ add(new AggregateDefinition("var_samp" ,
VectorExpressionDescriptor.ArgumentType.DECIMAL,
GroupByDesc.Mode.HASH, VectorUDAFVarSampDecimal.class));
+ add(new AggregateDefinition("std",
VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class));
+ add(new AggregateDefinition("stddev",
VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class));
+ add(new AggregateDefinition("stddev_pop",
VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class));
+ add(new AggregateDefinition("std",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class));
+ add(new AggregateDefinition("stddev",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class));
+ add(new AggregateDefinition("stddev_pop",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class));
+ add(new AggregateDefinition("std",
VectorExpressionDescriptor.ArgumentType.DECIMAL,
GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class));
+ add(new AggregateDefinition("stddev",
VectorExpressionDescriptor.ArgumentType.DECIMAL,
GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class));
+ add(new AggregateDefinition("stddev_pop",
VectorExpressionDescriptor.ArgumentType.DECIMAL,
GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class));
+ add(new AggregateDefinition("stddev_samp",
VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFStdSampLong.class));
+ add(new AggregateDefinition("stddev_samp",
VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY,
GroupByDesc.Mode.HASH, VectorUDAFStdSampDouble.class));
+ add(new AggregateDefinition("stddev_samp",
VectorExpressionDescriptor.ArgumentType.DECIMAL,
GroupByDesc.Mode.HASH, VectorUDAFStdSampDecimal.class));
}};
public VectorAggregateExpression getAggregatorExpression(AggregationDesc
desc, boolean isReduce)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1626926&r1=1626925&r2=1626926&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
Tue Sep 23 01:56:03 2014
@@ -791,6 +791,7 @@ public class Vectorizer implements Physi
boolean validateMapWorkOperator(Operator<? extends OperatorDesc> op, boolean
isTez) {
boolean ret = false;
+ LOG.info("Validating MapWork operator " + op.getType().name());
switch (op.getType()) {
case MAPJOIN:
if (op instanceof MapJoinOperator) {
@@ -827,6 +828,7 @@ public class Vectorizer implements Physi
boolean validateReduceWorkOperator(Operator<? extends OperatorDesc> op) {
boolean ret = false;
+ LOG.info("Validating ReduceWork operator " + op.getType().name());
switch (op.getType()) {
case EXTRACT:
ret = validateExtractOperator((ExtractOperator) op);
@@ -840,7 +842,12 @@ public class Vectorizer implements Physi
}
break;
case GROUPBY:
- ret = validateGroupByOperator((GroupByOperator) op, true, true);
+ if (HiveConf.getBoolVar(physicalContext.getConf(),
+
HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED)) {
+ ret = validateGroupByOperator((GroupByOperator) op, true, true);
+ } else {
+ ret = false;
+ }
break;
case FILTER:
ret = validateFilterOperator((FilterOperator) op);
@@ -1071,11 +1078,11 @@ public class Vectorizer implements Physi
VectorizationContext vc = new ValidatorVectorizationContext();
if (vc.getVectorExpression(desc, mode) == null) {
// TODO: this cannot happen - VectorizationContext throws in such
cases.
- LOG.info("getVectorExpression returned null");
+ LOG.debug("getVectorExpression returned null");
return false;
}
} catch (Exception e) {
- LOG.info("Failed to vectorize", e);
+ LOG.debug("Failed to vectorize", e);
return false;
}
return true;
@@ -1098,19 +1105,19 @@ public class Vectorizer implements Physi
if
(!supportedAggregationUdfs.contains(aggDesc.getGenericUDAFName().toLowerCase()))
{
return false;
}
- if (aggDesc.getParameters() != null) {
- return validateExprNodeDesc(aggDesc.getParameters());
+ if (aggDesc.getParameters() != null &&
!validateExprNodeDesc(aggDesc.getParameters())) {
+ return false;
}
// See if we can vectorize the aggregation.
try {
VectorizationContext vc = new ValidatorVectorizationContext();
if (vc.getAggregatorExpression(aggDesc, isReduce) == null) {
// TODO: this cannot happen - VectorizationContext throws in such
cases.
- LOG.info("getAggregatorExpression returned null");
+ LOG.debug("getAggregatorExpression returned null");
return false;
}
} catch (Exception e) {
- LOG.info("Failed to vectorize", e);
+ LOG.debug("Failed to vectorize", e);
return false;
}
return true;
Modified: hive/trunk/ql/src/test/queries/clientpositive/vectorization_0.q
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorization_0.q?rev=1626926&r1=1626925&r2=1626926&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorization_0.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorization_0.q Tue Sep 23
01:56:03 2014
@@ -1,4 +1,180 @@
SET hive.vectorized.execution.enabled=true;
+
+-- Use ORDER BY clauses to generate 2 stages.
+EXPLAIN
+SELECT MIN(ctinyint) as c1,
+ MAX(ctinyint),
+ COUNT(ctinyint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1;
+
+SELECT MIN(ctinyint) as c1,
+ MAX(ctinyint),
+ COUNT(ctinyint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1;
+
+EXPLAIN
+SELECT SUM(ctinyint) as c1
+FROM alltypesorc
+ORDER BY c1;
+
+SELECT SUM(ctinyint) as c1
+FROM alltypesorc
+ORDER BY c1;
+
+EXPLAIN
+SELECT
+ avg(ctinyint) as c1,
+ variance(ctinyint),
+ var_pop(ctinyint),
+ var_samp(ctinyint),
+ std(ctinyint),
+ stddev(ctinyint),
+ stddev_pop(ctinyint),
+ stddev_samp(ctinyint)
+FROM alltypesorc
+ORDER BY c1;
+
+SELECT
+ avg(ctinyint) as c1,
+ variance(ctinyint),
+ var_pop(ctinyint),
+ var_samp(ctinyint),
+ std(ctinyint),
+ stddev(ctinyint),
+ stddev_pop(ctinyint),
+ stddev_samp(ctinyint)
+FROM alltypesorc
+ORDER BY c1;
+
+EXPLAIN
+SELECT MIN(cbigint) as c1,
+ MAX(cbigint),
+ COUNT(cbigint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1;
+
+SELECT MIN(cbigint) as c1,
+ MAX(cbigint),
+ COUNT(cbigint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1;
+
+EXPLAIN
+SELECT SUM(cbigint) as c1
+FROM alltypesorc
+ORDER BY c1;
+
+SELECT SUM(cbigint) as c1
+FROM alltypesorc
+ORDER BY c1;
+
+EXPLAIN
+SELECT
+ avg(cbigint) as c1,
+ variance(cbigint),
+ var_pop(cbigint),
+ var_samp(cbigint),
+ std(cbigint),
+ stddev(cbigint),
+ stddev_pop(cbigint),
+ stddev_samp(cbigint)
+FROM alltypesorc
+ORDER BY c1;
+
+SELECT
+ avg(cbigint) as c1,
+ variance(cbigint),
+ var_pop(cbigint),
+ var_samp(cbigint),
+ std(cbigint),
+ stddev(cbigint),
+ stddev_pop(cbigint),
+ stddev_samp(cbigint)
+FROM alltypesorc
+ORDER BY c1;
+
+EXPLAIN
+SELECT MIN(cfloat) as c1,
+ MAX(cfloat),
+ COUNT(cfloat),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1;
+
+SELECT MIN(cfloat) as c1,
+ MAX(cfloat),
+ COUNT(cfloat),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1;
+
+EXPLAIN
+SELECT SUM(cfloat) as c1
+FROM alltypesorc
+ORDER BY c1;
+
+SELECT SUM(cfloat) as c1
+FROM alltypesorc
+ORDER BY c1;
+
+EXPLAIN
+SELECT
+ avg(cfloat) as c1,
+ variance(cfloat),
+ var_pop(cfloat),
+ var_samp(cfloat),
+ std(cfloat),
+ stddev(cfloat),
+ stddev_pop(cfloat),
+ stddev_samp(cfloat)
+FROM alltypesorc
+ORDER BY c1;
+
+SELECT
+ avg(cfloat) as c1,
+ variance(cfloat),
+ var_pop(cfloat),
+ var_samp(cfloat),
+ std(cfloat),
+ stddev(cfloat),
+ stddev_pop(cfloat),
+ stddev_samp(cfloat)
+FROM alltypesorc
+ORDER BY c1;
+
+EXPLAIN
+SELECT AVG(cbigint),
+ (-(AVG(cbigint))),
+ (-6432 + AVG(cbigint)),
+ STDDEV_POP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
+ VAR_SAMP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ (-6432 + (-((-6432 + AVG(cbigint))))),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
+ COUNT(*),
+ SUM(cfloat),
+ (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
+ (-(VAR_SAMP(cbigint))),
+ ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
+ MIN(ctinyint),
+ (-(MIN(ctinyint)))
+FROM alltypesorc
+WHERE (((cstring2 LIKE '%b%')
+ OR ((79.553 != cint)
+ OR (cbigint < cdouble)))
+ OR ((ctinyint >= csmallint)
+ AND ((cboolean2 = 1)
+ AND (3569 = ctinyint))));
+
SELECT AVG(cbigint),
(-(AVG(cbigint))),
(-6432 + AVG(cbigint)),
Modified: hive/trunk/ql/src/test/queries/clientpositive/vectorized_date_funcs.q
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorized_date_funcs.q?rev=1626926&r1=1626925&r2=1626926&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorized_date_funcs.q
(original)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorized_date_funcs.q Tue
Sep 23 01:56:03 2014
@@ -122,4 +122,20 @@ SELECT
FROM date_udf_flight_orc LIMIT 10;
-- Test extracting the date part of expression that includes time
-SELECT to_date('2009-07-30 04:17:52') FROM date_udf_flight_orc LIMIT 1;
\ No newline at end of file
+SELECT to_date('2009-07-30 04:17:52') FROM date_udf_flight_orc LIMIT 1;
+
+EXPLAIN SELECT
+ min(fl_date) AS c1,
+ max(fl_date),
+ count(fl_date),
+ count(*)
+FROM date_udf_flight_orc
+ORDER BY c1;
+
+SELECT
+ min(fl_date) AS c1,
+ max(fl_date),
+ count(fl_date),
+ count(*)
+FROM date_udf_flight_orc
+ORDER BY c1;
\ No newline at end of file
Modified:
hive/trunk/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q?rev=1626926&r1=1626925&r2=1626926&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q
(original)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q
Tue Sep 23 01:56:03 2014
@@ -1,6 +1,7 @@
-SET hive.vectorized.execution.enabled = true;
-
-- Test timestamp functions in vectorized mode to verify they run correctly
end-to-end.
+-- Turning on vectorization has been temporarily moved after filling the test
table
+-- due to bug HIVE-8197.
+
CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string)
STORED AS ORC;
@@ -11,6 +12,8 @@ SELECT
FROM alltypesorc
LIMIT 40;
+SET hive.vectorized.execution.enabled = true;
+
CREATE TABLE alltypesorc_wrong(stimestamp1 string) STORED AS ORC;
INSERT INTO TABLE alltypesorc_wrong SELECT 'abcd' FROM alltypesorc LIMIT 1;
@@ -122,3 +125,48 @@ SELECT
second(stimestamp1)
FROM alltypesorc_wrong
ORDER BY c1;
+
+EXPLAIN SELECT
+ min(ctimestamp1),
+ max(ctimestamp1),
+ count(ctimestamp1),
+ count(*)
+FROM alltypesorc_string;
+
+SELECT
+ min(ctimestamp1),
+ max(ctimestamp1),
+ count(ctimestamp1),
+ count(*)
+FROM alltypesorc_string;
+
+-- SUM of timestamps are not vectorized reduce-side because they produce a
double instead of a long (HIVE-8211)...
+EXPLAIN SELECT
+ sum(ctimestamp1)
+FROM alltypesorc_string;
+
+SELECT
+ sum(ctimestamp1)
+FROM alltypesorc_string;
+
+EXPLAIN SELECT
+ avg(ctimestamp1),
+ variance(ctimestamp1),
+ var_pop(ctimestamp1),
+ var_samp(ctimestamp1),
+ std(ctimestamp1),
+ stddev(ctimestamp1),
+ stddev_pop(ctimestamp1),
+ stddev_samp(ctimestamp1)
+FROM alltypesorc_string;
+
+SELECT
+ avg(ctimestamp1),
+ variance(ctimestamp1),
+ var_pop(ctimestamp1),
+ var_samp(ctimestamp1),
+ std(ctimestamp1),
+ stddev(ctimestamp1),
+ stddev_pop(ctimestamp1),
+ stddev_samp(ctimestamp1)
+FROM alltypesorc_string;
\ No newline at end of file
Added: hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_0.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_0.q.out?rev=1626926&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_0.q.out
(added)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_0.q.out Tue
Sep 23 01:56:03 2014
@@ -0,0 +1,1127 @@
+PREHOOK: query: -- Use ORDER BY clauses to generate 2 stages.
+EXPLAIN
+SELECT MIN(ctinyint) as c1,
+ MAX(ctinyint),
+ COUNT(ctinyint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Use ORDER BY clauses to generate 2 stages.
+EXPLAIN
+SELECT MIN(ctinyint) as c1,
+ MAX(ctinyint),
+ COUNT(ctinyint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: ctinyint
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(ctinyint), max(ctinyint),
count(ctinyint), count()
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 24 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 24 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col0 (type: tinyint), _col1 (type:
tinyint), _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
count(VALUE._col2), count(VALUE._col3)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: tinyint), _col1 (type: tinyint),
_col2 (type: bigint), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 24 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col1 (type: tinyint), _col2 (type:
bigint), _col3 (type: bigint)
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0
(type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT MIN(ctinyint) as c1,
+ MAX(ctinyint),
+ COUNT(ctinyint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MIN(ctinyint) as c1,
+ MAX(ctinyint),
+ COUNT(ctinyint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-64 62 9173 12288
+PREHOOK: query: EXPLAIN
+SELECT SUM(ctinyint) as c1
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT SUM(ctinyint) as c1
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: ctinyint
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(ctinyint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT SUM(ctinyint) as c1
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(ctinyint) as c1
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-39856
+PREHOOK: query: EXPLAIN
+SELECT
+ avg(ctinyint) as c1,
+ variance(ctinyint),
+ var_pop(ctinyint),
+ var_samp(ctinyint),
+ std(ctinyint),
+ stddev(ctinyint),
+ stddev_pop(ctinyint),
+ stddev_samp(ctinyint)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT
+ avg(ctinyint) as c1,
+ variance(ctinyint),
+ var_pop(ctinyint),
+ var_samp(ctinyint),
+ std(ctinyint),
+ stddev(ctinyint),
+ stddev_pop(ctinyint),
+ stddev_samp(ctinyint)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: ctinyint
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ctinyint), variance(ctinyint),
var_pop(ctinyint), var_samp(ctinyint), std(ctinyint), stddev(ctinyint),
stddev_pop(ctinyint), stddev_samp(ctinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 0 Basic stats:
PARTIAL Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 0 Basic stats:
PARTIAL Column stats: NONE
+ value expressions: _col0 (type:
struct<count:bigint,sum:double,input:tinyint>), _col1 (type:
struct<count:bigint,sum:double,variance:double>), _col2 (type:
struct<count:bigint,sum:double,variance:double>), _col3 (type:
struct<count:bigint,sum:double,variance:double>), _col4 (type:
struct<count:bigint,sum:double,variance:double>), _col5 (type:
struct<count:bigint,sum:double,variance:double>), _col6 (type:
struct<count:bigint,sum:double,variance:double>), _col7 (type:
struct<count:bigint,sum:double,variance:double>)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), variance(VALUE._col1),
var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4),
stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: double), _col1 (type: double),
_col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type:
double), _col6 (type: double), _col7 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 64 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double), _col2 (type:
double), _col3 (type: double), _col4 (type: double), _col5 (type: double),
_col6 (type: double), _col7 (type: double)
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: double), VALUE._col0
(type: double), VALUE._col1 (type: double), VALUE._col2 (type: double),
VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type:
double), VALUE._col6 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ avg(ctinyint) as c1,
+ variance(ctinyint),
+ var_pop(ctinyint),
+ var_samp(ctinyint),
+ std(ctinyint),
+ stddev(ctinyint),
+ stddev_pop(ctinyint),
+ stddev_samp(ctinyint)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ avg(ctinyint) as c1,
+ variance(ctinyint),
+ var_pop(ctinyint),
+ var_samp(ctinyint),
+ std(ctinyint),
+ stddev(ctinyint),
+ stddev_pop(ctinyint),
+ stddev_samp(ctinyint)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-4.344925324321378 1158.3003004768184 1158.3003004768184
1158.4265870337827 34.033811136527426 34.033811136527426
34.033811136527426 34.03566639620536
+PREHOOK: query: EXPLAIN
+SELECT MIN(cbigint) as c1,
+ MAX(cbigint),
+ COUNT(cbigint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT MIN(cbigint) as c1,
+ MAX(cbigint),
+ COUNT(cbigint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbigint (type: bigint)
+ outputColumnNames: cbigint
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(cbigint), max(cbigint),
count(cbigint), count()
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 32 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
count(VALUE._col2), count(VALUE._col3)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), _col1 (type: bigint),
_col2 (type: bigint), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 32 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type:
bigint), _col3 (type: bigint)
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0
(type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT MIN(cbigint) as c1,
+ MAX(cbigint),
+ COUNT(cbigint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MIN(cbigint) as c1,
+ MAX(cbigint),
+ COUNT(cbigint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-2147311592 2145498388 9173 12288
+PREHOOK: query: EXPLAIN
+SELECT SUM(cbigint) as c1
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT SUM(cbigint) as c1
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbigint (type: bigint)
+ outputColumnNames: cbigint
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(cbigint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT SUM(cbigint) as c1
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(cbigint) as c1
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-1698460028409
+PREHOOK: query: EXPLAIN
+SELECT
+ avg(cbigint) as c1,
+ variance(cbigint),
+ var_pop(cbigint),
+ var_samp(cbigint),
+ std(cbigint),
+ stddev(cbigint),
+ stddev_pop(cbigint),
+ stddev_samp(cbigint)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT
+ avg(cbigint) as c1,
+ variance(cbigint),
+ var_pop(cbigint),
+ var_samp(cbigint),
+ std(cbigint),
+ stddev(cbigint),
+ stddev_pop(cbigint),
+ stddev_samp(cbigint)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbigint (type: bigint)
+ outputColumnNames: cbigint
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(cbigint), variance(cbigint),
var_pop(cbigint), var_samp(cbigint), std(cbigint), stddev(cbigint),
stddev_pop(cbigint), stddev_samp(cbigint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 0 Basic stats:
PARTIAL Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 0 Basic stats:
PARTIAL Column stats: NONE
+ value expressions: _col0 (type:
struct<count:bigint,sum:double,input:bigint>), _col1 (type:
struct<count:bigint,sum:double,variance:double>), _col2 (type:
struct<count:bigint,sum:double,variance:double>), _col3 (type:
struct<count:bigint,sum:double,variance:double>), _col4 (type:
struct<count:bigint,sum:double,variance:double>), _col5 (type:
struct<count:bigint,sum:double,variance:double>), _col6 (type:
struct<count:bigint,sum:double,variance:double>), _col7 (type:
struct<count:bigint,sum:double,variance:double>)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), variance(VALUE._col1),
var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4),
stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: double), _col1 (type: double),
_col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type:
double), _col6 (type: double), _col7 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 64 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double), _col2 (type:
double), _col3 (type: double), _col4 (type: double), _col5 (type: double),
_col6 (type: double), _col7 (type: double)
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: double), VALUE._col0
(type: double), VALUE._col1 (type: double), VALUE._col2 (type: double),
VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type:
double), VALUE._col6 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ avg(cbigint) as c1,
+ variance(cbigint),
+ var_pop(cbigint),
+ var_samp(cbigint),
+ std(cbigint),
+ stddev(cbigint),
+ stddev_pop(cbigint),
+ stddev_samp(cbigint)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ avg(cbigint) as c1,
+ variance(cbigint),
+ var_pop(cbigint),
+ var_samp(cbigint),
+ std(cbigint),
+ stddev(cbigint),
+ stddev_pop(cbigint),
+ stddev_samp(cbigint)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18
2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9
1.441142951074188E9 1.4412215110214279E9
+PREHOOK: query: EXPLAIN
+SELECT MIN(cfloat) as c1,
+ MAX(cfloat),
+ COUNT(cfloat),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT MIN(cfloat) as c1,
+ MAX(cfloat),
+ COUNT(cfloat),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cfloat (type: float)
+ outputColumnNames: cfloat
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(cfloat), max(cfloat), count(cfloat),
count()
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 24 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 24 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col0 (type: float), _col1 (type:
float), _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
count(VALUE._col2), count(VALUE._col3)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: float), _col1 (type: float), _col2
(type: bigint), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: float)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 24 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col1 (type: float), _col2 (type:
bigint), _col3 (type: bigint)
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: float), VALUE._col0
(type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT MIN(cfloat) as c1,
+ MAX(cfloat),
+ COUNT(cfloat),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MIN(cfloat) as c1,
+ MAX(cfloat),
+ COUNT(cfloat),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-64.0 79.553 9173 12288
+PREHOOK: query: EXPLAIN
+SELECT SUM(cfloat) as c1
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT SUM(cfloat) as c1
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cfloat (type: float)
+ outputColumnNames: cfloat
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(cfloat)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col0 (type: double)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT SUM(cfloat) as c1
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(cfloat) as c1
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-39479.635992884636
+PREHOOK: query: EXPLAIN
+SELECT
+ avg(cfloat) as c1,
+ variance(cfloat),
+ var_pop(cfloat),
+ var_samp(cfloat),
+ std(cfloat),
+ stddev(cfloat),
+ stddev_pop(cfloat),
+ stddev_samp(cfloat)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT
+ avg(cfloat) as c1,
+ variance(cfloat),
+ var_pop(cfloat),
+ var_samp(cfloat),
+ std(cfloat),
+ stddev(cfloat),
+ stddev_pop(cfloat),
+ stddev_samp(cfloat)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cfloat (type: float)
+ outputColumnNames: cfloat
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(cfloat), variance(cfloat),
var_pop(cfloat), var_samp(cfloat), std(cfloat), stddev(cfloat),
stddev_pop(cfloat), stddev_samp(cfloat)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 0 Basic stats:
PARTIAL Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 0 Basic stats:
PARTIAL Column stats: NONE
+ value expressions: _col0 (type:
struct<count:bigint,sum:double,input:float>), _col1 (type:
struct<count:bigint,sum:double,variance:double>), _col2 (type:
struct<count:bigint,sum:double,variance:double>), _col3 (type:
struct<count:bigint,sum:double,variance:double>), _col4 (type:
struct<count:bigint,sum:double,variance:double>), _col5 (type:
struct<count:bigint,sum:double,variance:double>), _col6 (type:
struct<count:bigint,sum:double,variance:double>), _col7 (type:
struct<count:bigint,sum:double,variance:double>)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), variance(VALUE._col1),
var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4),
stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: double), _col1 (type: double),
_col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type:
double), _col6 (type: double), _col7 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 64 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double), _col2 (type:
double), _col3 (type: double), _col4 (type: double), _col5 (type: double),
_col6 (type: double), _col7 (type: double)
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: double), VALUE._col0
(type: double), VALUE._col1 (type: double), VALUE._col2 (type: double),
VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type:
double), VALUE._col6 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ avg(cfloat) as c1,
+ variance(cfloat),
+ var_pop(cfloat),
+ var_samp(cfloat),
+ std(cfloat),
+ stddev(cfloat),
+ stddev_pop(cfloat),
+ stddev_samp(cfloat)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ avg(cfloat) as c1,
+ variance(cfloat),
+ var_pop(cfloat),
+ var_samp(cfloat),
+ std(cfloat),
+ stddev(cfloat),
+ stddev_pop(cfloat),
+ stddev_samp(cfloat)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-4.303895780321011 1163.8972588604984 1163.8972588604984
1164.0241556397025 34.115938487171924 34.115938487171924
34.115938487171924 34.11779822379666
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: EXPLAIN
+SELECT AVG(cbigint),
+ (-(AVG(cbigint))),
+ (-6432 + AVG(cbigint)),
+ STDDEV_POP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
+ VAR_SAMP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ (-6432 + (-((-6432 + AVG(cbigint))))),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
+ COUNT(*),
+ SUM(cfloat),
+ (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
+ (-(VAR_SAMP(cbigint))),
+ ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
+ MIN(ctinyint),
+ (-(MIN(ctinyint)))
+FROM alltypesorc
+WHERE (((cstring2 LIKE '%b%')
+ OR ((79.553 != cint)
+ OR (cbigint < cdouble)))
+ OR ((ctinyint >= csmallint)
+ AND ((cboolean2 = 1)
+ AND (3569 = ctinyint))))
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT AVG(cbigint),
+ (-(AVG(cbigint))),
+ (-6432 + AVG(cbigint)),
+ STDDEV_POP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
+ VAR_SAMP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ (-6432 + (-((-6432 + AVG(cbigint))))),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
+ COUNT(*),
+ SUM(cfloat),
+ (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
+ (-(VAR_SAMP(cbigint))),
+ ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
+ MIN(ctinyint),
+ (-(MIN(ctinyint)))
+FROM alltypesorc
+WHERE (((cstring2 LIKE '%b%')
+ OR ((79.553 != cint)
+ OR (cbigint < cdouble)))
+ OR ((ctinyint >= csmallint)
+ AND ((cboolean2 = 1)
+ AND (3569 = ctinyint))))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((cstring2 like '%b%') or ((79.553 <> cint) or
(cbigint < cdouble))) (type: boolean)
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbigint (type: bigint), cfloat (type:
float), ctinyint (type: tinyint)
+ outputColumnNames: cbigint, cfloat, ctinyint
+ Statistics: Num rows: 12288 Data size: 377237 Basic
stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(cbigint), stddev_pop(cbigint),
var_samp(cbigint), count(), sum(cfloat), min(ctinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5
+ Statistics: Num rows: 1 Data size: 20 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 20 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col0 (type:
struct<count:bigint,sum:double,input:bigint>), _col1 (type:
struct<count:bigint,sum:double,variance:double>), _col2 (type:
struct<count:bigint,sum:double,variance:double>), _col3 (type: bigint), _col4
(type: double), _col5 (type: tinyint)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1),
var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: double), (- _col0) (type: double),
(-6432 + _col0) (type: double), _col1 (type: double), (- (-6432 + _col0))
(type: double), ((- (-6432 + _col0)) + (-6432 + _col0)) (type: double), _col2
(type: double), (- (-6432 + _col0)) (type: double), (-6432 + (- (-6432 +
_col0))) (type: double), (- (-6432 + _col0)) (type: double), ((- (-6432 +
_col0)) / (- (-6432 + _col0))) (type: double), _col3 (type: bigint), _col4
(type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((-
(-6432 + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5)
(type: tinyint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15,
_col16, _col17
+ Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 44 Basic stats:
COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: SELECT AVG(cbigint),
+ (-(AVG(cbigint))),
+ (-6432 + AVG(cbigint)),
+ STDDEV_POP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
+ VAR_SAMP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ (-6432 + (-((-6432 + AVG(cbigint))))),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
+ COUNT(*),
+ SUM(cfloat),
+ (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
+ (-(VAR_SAMP(cbigint))),
+ ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
+ MIN(ctinyint),
+ (-(MIN(ctinyint)))
+FROM alltypesorc
+WHERE (((cstring2 LIKE '%b%')
+ OR ((79.553 != cint)
+ OR (cbigint < cdouble)))
+ OR ((ctinyint >= csmallint)
+ AND ((cboolean2 = 1)
+ AND (3569 = ctinyint))))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT AVG(cbigint),
+ (-(AVG(cbigint))),
+ (-6432 + AVG(cbigint)),
+ STDDEV_POP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
+ VAR_SAMP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ (-6432 + (-((-6432 + AVG(cbigint))))),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
+ COUNT(*),
+ SUM(cfloat),
+ (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
+ (-(VAR_SAMP(cbigint))),
+ ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
+ MIN(ctinyint),
+ (-(MIN(ctinyint)))
+FROM alltypesorc
+WHERE (((cstring2 LIKE '%b%')
+ OR ((79.553 != cint)
+ OR (cbigint < cdouble)))
+ OR ((ctinyint >= csmallint)
+ AND ((cboolean2 = 1)
+ AND (3569 = ctinyint))))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8
1.436387455459401E9 3.875716535945533E8 0.0 2.06347151720204902E18
3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0
10934 -37224.52399241924 1.0517370547117279E9 -2.06347151720204902E18
1.5020929380914048E17 -64 64