HIVE-13381 : Timestamp & date should have precedence in type hierarchy than string group (Ashutosh Chauhan via Jason Dere)
Signed-off-by: Ashutosh Chauhan <hashut...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b4465023 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b4465023 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b4465023 Branch: refs/heads/llap Commit: b44650231ad2708fa73346164ae9c329ad36d6cb Parents: 9830363 Author: Ashutosh Chauhan <hashut...@apache.org> Authored: Tue Mar 29 19:01:24 2016 -0700 Committer: Ashutosh Chauhan <hashut...@apache.org> Committed: Mon Apr 4 13:11:07 2016 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/exec/FunctionRegistry.java | 9 +- .../ql/exec/vector/VectorizationContext.java | 12 +- .../hive/ql/exec/TestFunctionRegistry.java | 18 ++- .../exec/vector/TestVectorizationContext.java | 17 +- .../queries/clientpositive/cast_on_constant.q | 7 + .../clientpositive/cast_on_constant.q.out | 160 +++++++++++++++++++ 6 files changed, 198 insertions(+), 25 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/b4465023/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 56b96b4..1343b39 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -737,7 +737,14 @@ public final class FunctionRegistry { return getTypeInfoForPrimitiveCategory( (PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b,PrimitiveCategory.STRING); } - + // timestamp/date is higher precedence than String_GROUP + if (pgA == PrimitiveGrouping.STRING_GROUP && pgB == PrimitiveGrouping.DATE_GROUP) { + return b; + } + // date/timestamp is higher precedence than String_GROUP + if (pgB == PrimitiveGrouping.STRING_GROUP && pgA == PrimitiveGrouping.DATE_GROUP) { + return a; + } // Another special case, because timestamp is not implicitly convertible to numeric types. if ((pgA == PrimitiveGrouping.NUMERIC_GROUP || pgB == PrimitiveGrouping.NUMERIC_GROUP) && (pcA == PrimitiveCategory.TIMESTAMP || pcB == PrimitiveCategory.TIMESTAMP)) { http://git-wip-us.apache.org/repos/asf/hive/blob/b4465023/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 1eb960d..30a0f5a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -155,7 +155,7 @@ public class VectorizationContext { VectorExpressionDescriptor vMap; - private List<String> initialColumnNames; + private final List<String> initialColumnNames; private List<Integer> projectedColumns; private List<String> projectionColumnNames; @@ -712,7 +712,7 @@ public class VectorizationContext { genericUdf = new GenericUDFToDate(); break; case TIMESTAMP: - genericUdf = new GenericUDFToUnixTimeStamp(); + genericUdf = new GenericUDFTimestamp(); break; case INTERVAL_YEAR_MONTH: genericUdf = new GenericUDFToIntervalYearMonth(); @@ -1329,7 +1329,7 @@ public class VectorizationContext { case INT: case LONG: return InConstantType.INT_FAMILY; - + case DATE: return InConstantType.TIMESTAMP; @@ -1339,16 +1339,16 @@ public class VectorizationContext { case FLOAT: case DOUBLE: return InConstantType.FLOAT_FAMILY; - + case STRING: case CHAR: case VARCHAR: case BINARY: return InConstantType.STRING_FAMILY; - + case DECIMAL: return InConstantType.DECIMAL; - + case INTERVAL_YEAR_MONTH: case INTERVAL_DAY_TIME: http://git-wip-us.apache.org/repos/asf/hive/blob/b4465023/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java index 6a83c32..8488c21 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java @@ -253,9 +253,13 @@ public class TestFunctionRegistry extends TestCase { TypeInfoFactory.doubleTypeInfo); comparison(TypeInfoFactory.dateTypeInfo, TypeInfoFactory.stringTypeInfo, - TypeInfoFactory.stringTypeInfo); + TypeInfoFactory.dateTypeInfo); comparison(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.dateTypeInfo, - TypeInfoFactory.stringTypeInfo); + TypeInfoFactory.dateTypeInfo); + comparison(TypeInfoFactory.timestampTypeInfo, TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.timestampTypeInfo); + comparison(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.timestampTypeInfo, + TypeInfoFactory.timestampTypeInfo); comparison(TypeInfoFactory.intTypeInfo, TypeInfoFactory.timestampTypeInfo, TypeInfoFactory.doubleTypeInfo); @@ -364,15 +368,15 @@ public class TestFunctionRegistry extends TestCase { // non-qualified types should simply return the TypeInfo associated with that type assertEquals(TypeInfoFactory.stringTypeInfo, FunctionRegistry.getTypeInfoForPrimitiveCategory( - (PrimitiveTypeInfo) varchar10, (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo, + (PrimitiveTypeInfo) varchar10, TypeInfoFactory.stringTypeInfo, PrimitiveCategory.STRING)); assertEquals(TypeInfoFactory.stringTypeInfo, FunctionRegistry.getTypeInfoForPrimitiveCategory( - (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo, - (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo, PrimitiveCategory.STRING)); assertEquals(TypeInfoFactory.doubleTypeInfo, FunctionRegistry.getTypeInfoForPrimitiveCategory( - (PrimitiveTypeInfo) TypeInfoFactory.doubleTypeInfo, - (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.doubleTypeInfo, + TypeInfoFactory.stringTypeInfo, PrimitiveCategory.DOUBLE)); } http://git-wip-us.apache.org/repos/asf/hive/blob/b4465023/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index e4c7529..bb37a04 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -23,14 +23,9 @@ import static org.junit.Assert.assertTrue; import java.sql.Timestamp; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; - -import junit.framework.Assert; import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.vector.expressions.BRoundWithNumDigitsDoubleToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol; @@ -73,11 +68,12 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.StringUpper; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampTimestamp; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDoubleColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterTimestampColumnBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterTimestampColumnNotBetween; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongColumn; @@ -144,13 +140,12 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFPower; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFRound; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.junit.Assert; import org.junit.Test; public class TestVectorizationContext { @@ -1215,12 +1210,12 @@ public class TestVectorizationContext { children1.set(2, new ExprNodeConstantDesc("2013-11-05 00:00:00.000")); children1.set(3, new ExprNodeConstantDesc("2013-11-06 00:00:00.000")); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); - assertEquals(FilterStringColumnBetween.class, ve.getClass()); + assertEquals(FilterTimestampColumnBetween.class, ve.getClass()); // timestamp NOT BETWEEN children1.set(0, new ExprNodeConstantDesc(new Boolean(true))); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); - assertEquals(FilterStringColumnNotBetween.class, ve.getClass()); + assertEquals(FilterTimestampColumnNotBetween.class, ve.getClass()); } // Test translation of both IN filters and boolean-valued IN expressions (non-filters). @@ -1468,7 +1463,7 @@ public class TestVectorizationContext { children1.set(2, col3Expr); ve = vc.getVectorExpression(exprDesc); assertTrue(ve instanceof IfExprCharScalarStringGroupColumn); - + // test for VARCHAR type VarcharTypeInfo varcharTypeInfo = new VarcharTypeInfo(10); constDesc2 = new ExprNodeConstantDesc(varcharTypeInfo, new HiveVarchar("Alpha", 10)); http://git-wip-us.apache.org/repos/asf/hive/blob/b4465023/ql/src/test/queries/clientpositive/cast_on_constant.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/cast_on_constant.q b/ql/src/test/queries/clientpositive/cast_on_constant.q new file mode 100644 index 0000000..aabb9c6 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cast_on_constant.q @@ -0,0 +1,7 @@ +create table t1(ts_field timestamp, date_field date); +explain select * from t1 where ts_field = "2016-01-23 00:00:00"; +explain select * from t1 where date_field = "2016-01-23"; +explain select * from t1 where ts_field = timestamp '2016-01-23 00:00:00'; +explain select * from t1 where date_field = date '2016-01-23'; + +drop table t1; http://git-wip-us.apache.org/repos/asf/hive/blob/b4465023/ql/src/test/results/clientpositive/cast_on_constant.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/cast_on_constant.q.out b/ql/src/test/results/clientpositive/cast_on_constant.q.out new file mode 100644 index 0000000..f8d6a0d --- /dev/null +++ b/ql/src/test/results/clientpositive/cast_on_constant.q.out @@ -0,0 +1,160 @@ +PREHOOK: query: create table t1(ts_field timestamp, date_field date) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1(ts_field timestamp, date_field date) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: explain select * from t1 where ts_field = "2016-01-23 00:00:00" +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 where ts_field = "2016-01-23 00:00:00" +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ts_field = 2016-01-23 00:00:00.0) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 2016-01-23 00:00:00.0 (type: timestamp), date_field (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from t1 where date_field = "2016-01-23" +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 where date_field = "2016-01-23" +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (date_field = 2016-01-23) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ts_field (type: timestamp), 2016-01-23 (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from t1 where ts_field = timestamp '2016-01-23 00:00:00' +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 where ts_field = timestamp '2016-01-23 00:00:00' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ts_field = 2016-01-23 00:00:00.0) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 2016-01-23 00:00:00.0 (type: timestamp), date_field (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from t1 where date_field = date '2016-01-23' +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 where date_field = date '2016-01-23' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (date_field = 2016-01-23) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ts_field (type: timestamp), 2016-01-23 (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: drop table t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1