[CARBONDATA-2992] Fixed Between Query Data Mismatch issue for timestamp data type
Problem: Between query is giving wrong result. Root cause: For timestamp time when filter is given in yyyy-mm-dd format instead of yyyy-mm-dd HH:MM:SS format it will add cast, In CastExpressionOptimization it is using SimpleDateFormat object to parse the filter value which is failing as filter values is not same. Solution: Use SPARK:DateTimeUtils.stringToTime method as spark is handling for above scenario. This closes #2787 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/11bd0ade Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/11bd0ade Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/11bd0ade Branch: refs/heads/branch-1.5 Commit: 11bd0ade93a3ac72b42068c3b57ed8bb1203ab47 Parents: 6aa2a90 Author: kumarvishal09 <[email protected]> Authored: Fri Sep 28 18:33:29 2018 +0530 Committer: ravipesala <[email protected]> Committed: Thu Oct 4 18:02:08 2018 +0530 ---------------------------------------------------------------------- .../src/test/resources/datedatafile.csv | 7 ++ .../src/test/resources/timestampdatafile.csv | 7 ++ ...imestampNoDictionaryColumnCastTestCase.scala | 80 ++++++++++++++++++++ .../execution/CastExpressionOptimization.scala | 67 +++++++++------- .../bloom/BloomCoarseGrainDataMapSuite.scala | 47 +++++------- 5 files changed, 152 insertions(+), 56 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark-common-test/src/test/resources/datedatafile.csv ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/resources/datedatafile.csv b/integration/spark-common-test/src/test/resources/datedatafile.csv new file mode 100644 index 0000000..43a615d --- /dev/null +++ b/integration/spark-common-test/src/test/resources/datedatafile.csv @@ -0,0 +1,7 @@ +datetype1 +2018-09-11 +2018-09-12 +2018-09-13 +2018-09-14 +2018-09-15 +2018-09-16 http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark-common-test/src/test/resources/timestampdatafile.csv ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/resources/timestampdatafile.csv b/integration/spark-common-test/src/test/resources/timestampdatafile.csv new file mode 100644 index 0000000..473f330 --- /dev/null +++ b/integration/spark-common-test/src/test/resources/timestampdatafile.csv @@ -0,0 +1,7 @@ +timestamptype +2018-09-11 00:00:00 +2018-09-12 00:00:00 +2018-09-13 00:00:00 +2018-09-14 00:00:00 +2018-09-15 00:00:00 +2018-09-16 00:00:00 http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala new file mode 100644 index 0000000..41c7005 --- /dev/null +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.spark.testsuite.directdictionary + + +import org.apache.spark.sql.Row +import org.apache.spark.sql.test.util.QueryTest +import org.scalatest.BeforeAndAfterAll + +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.util.CarbonProperties + +/** + * Test Class for detailed query on timestamp datatypes + */ +class TimestampNoDictionaryColumnCastTestCase extends QueryTest with BeforeAndAfterAll { + + override def beforeAll { + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, + CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, + CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT) + + sql("drop table if exists timestamp_nodictionary") + sql("drop table if exists datetype") + sql( + """ + CREATE TABLE IF NOT EXISTS timestamp_nodictionary + (timestamptype timestamp) STORED BY 'carbondata'""" + ) + val csvFilePath = s"$resourcesPath/timestampdatafile.csv" + sql(s"LOAD DATA LOCAL INPATH '$csvFilePath' into table timestamp_nodictionary") +// + sql( + """ + CREATE TABLE IF NOT EXISTS datetype + (datetype1 date) STORED BY 'carbondata'""" + ) + val csvFilePath1 = s"$resourcesPath/datedatafile.csv" + sql(s"LOAD DATA LOCAL INPATH '$csvFilePath1' into table datetype") + } + + test("select count(*) from timestamp_nodictionary where timestamptype BETWEEN '2018-09-11' AND '2018-09-16'") { + checkAnswer( + sql("select count(*) from timestamp_nodictionary where timestamptype BETWEEN '2018-09-11' AND '2018-09-16'"), + Seq(Row(6) + ) + ) + } +// + test("select count(*) from datetype where datetype1 BETWEEN '2018-09-11' AND '2018-09-16'") { + checkAnswer( + sql("select count(*) from datetype where datetype1 BETWEEN '2018-09-11' AND '2018-09-16'"), + Seq(Row(6) + ) + ) + } + + override def afterAll { + sql("drop table timestamp_nodictionary") + sql("drop table if exists datetype") + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala index 7e61814..57fb3f0 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala @@ -29,53 +29,66 @@ import org.apache.spark.sql.FalseExpr import org.apache.spark.sql.sources import org.apache.spark.sql.types._ import org.apache.spark.sql.CarbonExpressions.{MatchCast => Cast} +import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.sources.Filter +import org.apache.spark.unsafe.types.UTF8String import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.util.CarbonProperties object CastExpressionOptimization { - def typeCastStringToLong(v: Any, dataType: DataType): Any = { - var parser: SimpleDateFormat = null - if (dataType == TimestampType) { - parser = new SimpleDateFormat(CarbonProperties.getInstance - .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, - CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)) - } else if (dataType == DateType) { - parser = new SimpleDateFormat(CarbonProperties.getInstance - .getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, - CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)) - parser.setTimeZone(TimeZone.getTimeZone("GMT")) - } else { - throw new UnsupportedOperationException("Unsupported DataType being evaluated.") - } - try { - val value = parser.parse(v.toString).getTime() * 1000L - value - } catch { - case e: ParseException => + if (dataType == TimestampType || dataType == DateType) { + val value = if (dataType == TimestampType) { + DateTimeUtils.stringToTimestamp(UTF8String.fromString(v.toString)) + } else { + None + } + if (value.isDefined) { + value.get + } else { + var parser: SimpleDateFormat = null + if (dataType == TimestampType) { + parser = new SimpleDateFormat(CarbonProperties.getInstance + .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, + CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)) + } else if (dataType == DateType) { + parser = new SimpleDateFormat(CarbonProperties.getInstance + .getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, + CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)) + parser.setTimeZone(TimeZone.getTimeZone("GMT")) + } try { - val parsenew: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz") - parsenew.parse(v.toString).getTime() * 1000L + val value = parser.parse(v.toString).getTime() * 1000L + value } catch { case e: ParseException => - val gmtDay = new SimpleDateFormat("yyyy-MM-dd", Locale.US) - gmtDay.setTimeZone(TimeZone.getTimeZone("GMT")) try { - gmtDay.parse(v.toString).getTime() * 1000L + val parsenew: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz") + parsenew.parse(v.toString).getTime() * 1000L } catch { case e: ParseException => - v + val gmtDay = new SimpleDateFormat("yyyy-MM-dd", Locale.US) + gmtDay.setTimeZone(TimeZone.getTimeZone("GMT")) + try { + gmtDay.parse(v.toString).getTime() * 1000L + } catch { + case e: ParseException => + v + case e: Exception => + v + } case e: Exception => v } case e: Exception => v } - case e: Exception => - v + } + } + else { + throw new UnsupportedOperationException("Unsupported DataType being evaluated.") } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala index 3360530..3b5b5ca 100644 --- a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala +++ b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala @@ -756,17 +756,10 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with } test("test bloom datamap on all basic data types") { - val originTimestampFormat = CarbonProperties.getInstance().getProperty( - CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, - CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT) - val originDateFormat = CarbonProperties.getInstance().getProperty( - CarbonCommonConstants.CARBON_DATE_FORMAT, - CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT) - CarbonProperties.getInstance().addProperty( - CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd HH:mm:ss") + CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT) CarbonProperties.getInstance().addProperty( - CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy/MM/dd") + CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT) val columnNames = "booleanField,shortField,intField,bigintField,doubleField,stringField," + "timestampField,decimalField,dateField,charField,floatField" @@ -811,17 +804,17 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with sql( s""" | INSERT INTO TABLE $bloomDMSampleTable - | VALUES(true,1,10,100,48.4,'spark','2015/4/23 12:01:01',1.23,'2015/4/23','aaa',2.5), - | (true,1,11,100,44.4,'flink','2015/5/23 12:01:03',23.23,'2015/5/23','ccc',2.15), - | (true,3,14,160,43.4,'hive','2015/7/26 12:01:06',3454.32,'2015/7/26','ff',5.5), + | VALUES(true,1,10,100,48.4,'spark','2015-4-23 12:01:01',1.23,'2015-4-23','aaa',2.5), + | (true,1,11,100,44.4,'flink','2015-5-23 12:01:03',23.23,'2015-5-23','ccc',2.15), + | (true,3,14,160,43.4,'hive','2015-7-26 12:01:06',3454.32,'2015-7-26','ff',5.5), | (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) """.stripMargin) sql( s""" | INSERT INTO TABLE $normalTable - | VALUES(true,1,10,100,48.4,'spark','2015/4/23 12:01:01',1.23,'2015/4/23','aaa',2.5), - | (true,1,11,100,44.4,'flink','2015/5/23 12:01:03',23.23,'2015/5/23','ccc',2.15), - | (true,3,14,160,43.4,'hive','2015/7/26 12:01:06',3454.32,'2015/7/26','ff',5.5), + | VALUES(true,1,10,100,48.4,'spark','2015-4-23 12:01:01',1.23,'2015-4-23','aaa',2.5), + | (true,1,11,100,44.4,'flink','2015-5-23 12:01:03',23.23,'2015-5-23','ccc',2.15), + | (true,3,14,160,43.4,'hive','2015-7-26 12:01:06',3454.32,'2015-7-26','ff',5.5), | (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) """.stripMargin) @@ -838,17 +831,17 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with sql( s""" | INSERT INTO TABLE $bloomDMSampleTable - | VALUES(true,1,10,100,48.4,'spark','2015/4/23 12:01:01',1.23,'2015/4/23','aaa',2.5), - | (true,1,11,100,44.4,'flink','2015/5/23 12:01:03',23.23,'2015/5/23','ccc',2.15), - | (true,3,14,160,43.4,'hive','2015/7/26 12:01:06',3454.32,'2015/7/26','ff',5.5), + | VALUES(true,1,10,100,48.4,'spark','2015-4-23 12:01:01',1.23,'2015-4-23','aaa',2.5), + | (true,1,11,100,44.4,'flink','2015-5-23 12:01:03',23.23,'2015-5-23','ccc',2.15), + | (true,3,14,160,43.4,'hive','2015-7-26 12:01:06',3454.32,'2015-7-26','ff',5.5), | (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) """.stripMargin) sql( s""" | INSERT INTO TABLE $normalTable - | VALUES(true,1,10,100,48.4,'spark','2015/4/23 12:01:01',1.23,'2015/4/23','aaa',2.5), - | (true,1,11,100,44.4,'flink','2015/5/23 12:01:03',23.23,'2015/5/23','ccc',2.15), - | (true,3,14,160,43.4,'hive','2015/7/26 12:01:06',3454.32,'2015/7/26','ff',5.5), + | VALUES(true,1,10,100,48.4,'spark','2015-4-23 12:01:01',1.23,'2015-4-23','aaa',2.5), + | (true,1,11,100,44.4,'flink','2015-5-23 12:01:03',23.23,'2015-5-23','ccc',2.15), + | (true,3,14,160,43.4,'hive','2015-7-26 12:01:06',3454.32,'2015-7-26','ff',5.5), | (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) """.stripMargin) @@ -866,12 +859,12 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE stringField = 'spark'"), sql(s"SELECT * FROM $normalTable WHERE stringField = 'spark'")) checkAnswer( - sql(s"SELECT * FROM $bloomDMSampleTable WHERE timestampField = '2015/7/26 12:01:06'"), - sql(s"SELECT * FROM $normalTable WHERE timestampField = '2015/7/26 12:01:06'")) + sql(s"SELECT * FROM $bloomDMSampleTable WHERE timestampField = '2015-7-26 12:01:06'"), + sql(s"SELECT * FROM $normalTable WHERE timestampField = '2015-7-26 12:01:06'")) checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE decimalField = 23.23"), sql(s"SELECT * FROM $normalTable WHERE decimalField = 23.23")) - checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE dateField = '2015/4/23'"), - sql(s"SELECT * FROM $normalTable WHERE dateField = '2015/4/23'")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE dateField = '2015-4-23'"), + sql(s"SELECT * FROM $normalTable WHERE dateField = '2015-4-23'")) checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE charField = 'ccc'"), sql(s"SELECT * FROM $normalTable WHERE charField = 'ccc'")) checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE floatField = 2.5"), @@ -919,10 +912,6 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE floatField = 0"), sql(s"SELECT * FROM $normalTable WHERE floatField = 0")) - CarbonProperties.getInstance().addProperty( - CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, originTimestampFormat) - CarbonProperties.getInstance().addProperty( - CarbonCommonConstants.CARBON_DATE_FORMAT, originDateFormat) } test("test bloom datamap on multiple columns") {
