Repository: incubator-carbondata Updated Branches: refs/heads/master 8652eee83 -> c9a9a6af1
[CARBONDATA-250] Filter result is not proper when Double data type values with 0.0 and -0.0 will be used. Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/7e814077 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/7e814077 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/7e814077 Branch: refs/heads/master Commit: 7e81407714ad8878e50b075df3283ea3984271e5 Parents: 8652eee Author: sujith71955 <sujithchacko.2...@gmail.com> Authored: Sun Sep 18 04:01:10 2016 +0530 Committer: Venkata Ramana G <ramana.gollam...@huawei.com> Committed: Sun Sep 18 04:41:15 2016 +0530 ---------------------------------------------------------------------- .../conditional/EqualToExpression.java | 3 ++- .../carbondata/scan/filter/FilterUtil.java | 21 +++++++++++++++++++- .../test/resources/Test_Data1_Logrithmic.csv | 3 +++ .../GrtLtFilterProcessorTestCase.scala | 11 ++++++++++ 4 files changed, 36 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/7e814077/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java b/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java index 12a3e32..8f7fa0a 100644 --- a/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java +++ b/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java @@ -24,6 +24,7 @@ import org.apache.carbondata.scan.expression.Expression; import org.apache.carbondata.scan.expression.ExpressionResult; import org.apache.carbondata.scan.expression.exception.FilterIllegalMemberException; import org.apache.carbondata.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.scan.filter.FilterUtil; import org.apache.carbondata.scan.filter.intf.ExpressionType; import org.apache.carbondata.scan.filter.intf.RowIntf; @@ -78,7 +79,7 @@ public class EqualToExpression extends BinaryConditionalExpression { result = val1.getInt().equals(val2.getInt()); break; case DOUBLE: - result = val1.getDouble().equals(val2.getDouble()); + result = FilterUtil.nanSafeEqualsDoubles(val1.getDouble(), val2.getDouble()); break; case TIMESTAMP: result = val1.getTime().equals(val2.getTime()); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/7e814077/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java b/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java index 71ac1bf..b7cacb1 100644 --- a/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java +++ b/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java @@ -1390,6 +1390,26 @@ public final class FilterUtil { } /** + * This method will compare double values for its equality and also it will preserve + * the -0.0 and 0.0 equality as per == ,also preserve NaN equality check as per + * java.lang.Double.equals() + * + * @param d1 double value for equality check + * @param d2 double value for equality check + * @return boolean after comparing two double values. + */ + public static boolean nanSafeEqualsDoubles(Double d1, Double d2) { + Boolean xIsNan = Double.isNaN(d1); + Boolean yIsNan = Double.isNaN(d2); + if ((xIsNan && yIsNan) || (d1.doubleValue() == d2.doubleValue())) { + + return true; + } + return false; + + } + + /** * This method will prepare the list with all unknown expressions * * @param expression @@ -1406,5 +1426,4 @@ public final class FilterUtil { getUnknownExpressionsList(child, lst); } } - } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/7e814077/integration/spark/src/test/resources/Test_Data1_Logrithmic.csv ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/resources/Test_Data1_Logrithmic.csv b/integration/spark/src/test/resources/Test_Data1_Logrithmic.csv new file mode 100644 index 0000000..0f0312d --- /dev/null +++ b/integration/spark/src/test/resources/Test_Data1_Logrithmic.csv @@ -0,0 +1,3 @@ +c1_int,c2_Bigint,c3_Decimal,c4_double,c5_string,c6_Timestamp,c7_Datatype_Desc +2147483646,9223372036854775807,0.12345678900987654321123456789012345638,1.7976931348623157E308,SQL Capability Carbon is fully compliant with Spark SQL and supports all SQL queries which can run directly on Spark SQL Easy Table Definition: Carbon supports easy to use DDL Data Definition Language statements to define and create tables. Carbon DDL is highly flexible and is very easy to use as well as powerful enough to define complex tables Easy Data anagement Carbon supports a variety of data management functions for loading data to table and maintaining the data in table. Carbon supports bulkloading historical data as well as incrementally loading new data. Loaded data can be deleted based on load time or a specific load can be undone Carbon file format is a columnar store in HDFS it has many features that a modern columnar format hasz such as splittablez compression schema zcomplex data type and so on. Carbon has following unique features Stores data along with index: it can signi ficantly accelerate query performance and reduces the scans and CPU resources where there are filters in the query. Carbon index consists of multiple level of indicesz a processing framework can leverage this index to reduce the task it needs to schedule and processz and it can also do skip scan in more finer grain unit in task side scanning instead of scanning the whole file.Operable encoded data Through supporting efficient compression and global encoding schemes can query on compressed encoded dataz the data can be converted to encoded data just before returning the results to the usersz which is Column group Allow multiple columns to form a column group that would be stored as row format. This reduces the row reconstruction cost at query time Supports for various use cases with one single Data format like interactive OLAP-style queryz Sequential Access SQL Capability Carbon is fully compliant with Spark SQL and supports all SQL queries which can run directly on Spark SQL Easy Table Definition: Carbon supports easy to use DDL Data Definition Language statements to define and create tables. Carbon DDL is highly flexible and is very easy to use as well as powerful enough to define complex tables Easy Data anagement Carbon supports a variety of data management functions for loading data to table and maintaining the data in table. Carbon supports bulkloading historical data as well as incrementally loading new data. Loaded data can be deleted based on load time or a specific load can be undone it can significantly accelerate query performance and reduces the scans and CPU resources where there are filters in the query. Carbon index consists of multiple level of indicesz a processing framework can leverage this index to reduce the task it needs to schedule and processz and it can also do skip scan in more finer grain unit in task side scanning instead of scanning the whole file Operable encoded data Through supporting efficient compression and global encodin g schemes can query on compressed encoded dataz the data can be converted to encoded data just before returning the results to the usersz which is Column group Allow multiple columns to form a column group that would be stored as row format This reduces the row reconstruction cost at query time Supports for various use cases with one single Sequential Access Carbon has following unique features Stores data along with index: it can significantly accelerate query performance and reduces the scans and CPU resources where there are filters in the query. Carbon index consists of multiple level of indicesz a processing framework can leverage this index to reduce the task it needs to schedule and processz and it can also do skip scan in more finer grain unit in task side scanning instead of scanning the whole file Operable encoded data Through supporting efficient compression and global encoding schemes can query on compressed encoded dataz the data can be converted to encoded data just before returning the results to the users which is Column group Allow multiple columns to form a column group that would be stored as row format Allow multiple columns to form format Allow carbon is fu,2017-07-01 12:07:28,Max_range_values-1 +2147483646,9223372036854775807,12345678900987654321123456789012345678,1.7976931348623157E308,SQL Capability Carbon is fully compliant with Spark SQL and supports all SQL queries which can run directly on Spark SQL Easy Table Definition: Carbon supports easy to use DDL Data Definition Language statements to define and create tables. Carbon DDL is highly flexible and is very easy to use as well as powerful enough to define complex tables Easy Data anagement Carbon supports a variety of data management functions for loading data to table and maintaining the data in table. Carbon supports bulkloading historical data as well as incrementally loading new data. Loaded data can be deleted based on load time or a specific load can be undone Carbon file format is a columnar store in HDFS it has many features that a modern columnar format hasz such as splittablez compression schema zcomplex data type and so on. Carbon has following unique features Stores data along with index: it can signifi cantly accelerate query performance and reduces the scans and CPU resources where there are filters in the query. Carbon index consists of multiple level of indicesz a processing framework can leverage this index to reduce the task it needs to schedule and processz and it can also do skip scan in more finer grain unit in task side scanning instead of scanning the whole file.Operable encoded data Through supporting efficient compression and global encoding schemes can query on compressed encoded dataz the data can be converted to encoded data just before returning the results to the usersz which is Column group Allow multiple columns to form a column group that would be stored as row format. This reduces the row reconstruction cost at query time Supports for various use cases with one single Data format like interactive OLAP-style queryz Sequential Access SQL Capability Carbon is fully compliant with Spark SQL and supports all SQL queries which can run directly on Spark SQL Easy Ta ble Definition: Carbon supports easy to use DDL Data Definition Language statements to define and create tables. Carbon DDL is highly flexible and is very easy to use as well as powerful enough to define complex tables Easy Data anagement Carbon supports a variety of data management functions for loading data to table and maintaining the data in table. Carbon supports bulkloading historical data as well as incrementally loading new data. Loaded data can be deleted based on load time or a specific load can be undone it can significantly accelerate query performance and reduces the scans and CPU resources where there are filters in the query. Carbon index consists of multiple level of indicesz a processing framework can leverage this index to reduce the task it needs to schedule and processz and it can also do skip scan in more finer grain unit in task side scanning instead of scanning the whole file Operable encoded data Through supporting efficient compression and global encoding schemes can query on compressed encoded dataz the data can be converted to encoded data just before returning the results to the usersz which is Column group Allow multiple columns to form a column group that would be stored as row format This reduces the row reconstruction cost at query time Supports for various use cases with one single Sequential Access Carbon has following unique features Stores data along with index: it can significantly accelerate query performance and reduces the scans and CPU resources where there are filters in the query. Carbon index consists of multiple level of indicesz a processing framework can leverage this index to reduce the task it needs to schedule and processz and it can also do skip scan in more finer grain unit in task side scanning instead of scanning the whole file Operable encoded data Through supporting efficient compression and global encoding schemes can query on compressed encoded dataz the data can be converted to encoded data just b efore returning the results to the users which is Column group Allow multiple columns to form a column group that would be stored as row format Allow multiple columns to form format Allow carbon is f,2017-07-01 12:07:28,Max_range_values-2 http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/7e814077/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala index 5278344..b33b65f 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala @@ -38,6 +38,7 @@ class GrtLtFilterProcessorTestCase extends QueryTest with BeforeAndAfterAll { sql("drop table if exists a12") sql("drop table if exists a12_allnull") sql("drop table if exists a12_no_null") + sql("drop table if exists Test_Boundary1") sql( "create table a12(empid String,ename String,sal double,deptno int,mgr string,gender string," + @@ -53,6 +54,7 @@ class GrtLtFilterProcessorTestCase extends QueryTest with BeforeAndAfterAll { " string," + "dob timestamp,comm decimal(4,2),desc string) stored by 'org.apache.carbondata.format'" ) + sql("create table Test_Boundary1 (c1_int int,c2_Bigint Bigint,c3_Decimal Decimal(38,38),c4_double double,c5_string string,c6_Timestamp Timestamp,c7_Datatype_Desc string) STORED BY 'org.apache.carbondata.format'") CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy-MM-dd HH:mm:ss") val basePath = new File(this.getClass.getResource("/").getPath + "/../../") @@ -77,6 +79,9 @@ class GrtLtFilterProcessorTestCase extends QueryTest with BeforeAndAfterAll { 'QUOTECHAR'='"')""" .stripMargin ) + + sql( + s"LOAD DATA INPATH './src/test/resources/Test_Data1_Logrithmic.csv' INTO table Test_Boundary1 OPTIONS('DELIMITER'=',','QUOTECHAR'='','FILEHEADER'='')") } //mixed value test test("Less Than Filter") { @@ -99,6 +104,12 @@ class GrtLtFilterProcessorTestCase extends QueryTest with BeforeAndAfterAll { Seq(Row(3)) ) } + test("0.0 and -0.0 equality check for double data type applying log function") { + checkAnswer( + sql("select log(c4_double,1) from Test_Boundary1 where log(c4_double,1)= -0.0"), + Seq(Row(0.0),Row(0.0)) + ) + } test("Greater Than equal to Filter") { sql("select count (empid) from a12 where dob >= '2014-07-01 12:07:28'").show()