[1/2] incubator-carbondata git commit: [CARBONDATA-250] Filter result is not proper when Double data type values with 0.0 and -0.0 will be used.

gvramana Sat, 17 Sep 2016 16:15:47 -0700

Repository: incubator-carbondata
Updated Branches:
  refs/heads/master 8652eee83 -> c9a9a6af1



[CARBONDATA-250] Filter result is not proper when Double data type values with 
0.0 and -0.0 will be used.


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/7e814077
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/7e814077
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/7e814077

Branch: refs/heads/master
Commit: 7e81407714ad8878e50b075df3283ea3984271e5
Parents: 8652eee
Author: sujith71955 <sujithchacko.2...@gmail.com>
Authored: Sun Sep 18 04:01:10 2016 +0530
Committer: Venkata Ramana G <ramana.gollam...@huawei.com>
Committed: Sun Sep 18 04:41:15 2016 +0530

----------------------------------------------------------------------
 .../conditional/EqualToExpression.java          |  3 ++-
 .../carbondata/scan/filter/FilterUtil.java      | 21 +++++++++++++++++++-
 .../test/resources/Test_Data1_Logrithmic.csv    |  3 +++
 .../GrtLtFilterProcessorTestCase.scala          | 11 ++++++++++
 4 files changed, 36 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/7e814077/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java
 
b/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java
index 12a3e32..8f7fa0a 100644
--- 
a/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java
+++ 
b/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java
@@ -24,6 +24,7 @@ import org.apache.carbondata.scan.expression.Expression;
 import org.apache.carbondata.scan.expression.ExpressionResult;
 import 
org.apache.carbondata.scan.expression.exception.FilterIllegalMemberException;
 import 
org.apache.carbondata.scan.expression.exception.FilterUnsupportedException;
+import org.apache.carbondata.scan.filter.FilterUtil;
 import org.apache.carbondata.scan.filter.intf.ExpressionType;
 import org.apache.carbondata.scan.filter.intf.RowIntf;
 
@@ -78,7 +79,7 @@ public class EqualToExpression extends 
BinaryConditionalExpression {
         result = val1.getInt().equals(val2.getInt());
         break;
       case DOUBLE:
-        result = val1.getDouble().equals(val2.getDouble());
+        result = FilterUtil.nanSafeEqualsDoubles(val1.getDouble(), 
val2.getDouble());
         break;
       case TIMESTAMP:
         result = val1.getTime().equals(val2.getTime());

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/7e814077/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java 
b/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java
index 71ac1bf..b7cacb1 100644
--- a/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java
+++ b/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java
@@ -1390,6 +1390,26 @@ public final class FilterUtil {
   }
 
   /**
+   * This method will compare double values for its equality and also it will 
preserve
+   * the -0.0 and 0.0 equality as per == ,also preserve NaN equality check as 
per
+   * java.lang.Double.equals()
+   *
+   * @param d1 double value for equality check
+   * @param d2 double value for equality check
+   * @return boolean after comparing two double values.
+   */
+  public static boolean nanSafeEqualsDoubles(Double d1, Double d2) {
+    Boolean xIsNan = Double.isNaN(d1);
+    Boolean yIsNan = Double.isNaN(d2);
+    if ((xIsNan && yIsNan) || (d1.doubleValue() == d2.doubleValue())) {
+
+      return true;
+    }
+    return false;
+
+  }
+
+  /**
    * This method will prepare the list with all unknown expressions
    *
    * @param expression
@@ -1406,5 +1426,4 @@ public final class FilterUtil {
       getUnknownExpressionsList(child, lst);
     }
   }
-
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/7e814077/integration/spark/src/test/resources/Test_Data1_Logrithmic.csv
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/resources/Test_Data1_Logrithmic.csv 
b/integration/spark/src/test/resources/Test_Data1_Logrithmic.csv
new file mode 100644
index 0000000..0f0312d
--- /dev/null
+++ b/integration/spark/src/test/resources/Test_Data1_Logrithmic.csv
@@ -0,0 +1,3 @@
+c1_int,c2_Bigint,c3_Decimal,c4_double,c5_string,c6_Timestamp,c7_Datatype_Desc
+2147483646,9223372036854775807,0.12345678900987654321123456789012345638,1.7976931348623157E308,SQL
 Capability Carbon is fully compliant with Spark SQL and supports all SQL 
queries which can run directly on Spark SQL Easy Table Definition: Carbon 
supports easy to use DDL Data Definition Language statements to define and 
create tables. Carbon DDL is highly flexible and is very easy to use as well as 
powerful enough to define complex tables Easy Data anagement  Carbon supports a 
variety of data management functions for loading data to table and maintaining 
the data in table. Carbon supports bulkloading historical data as well as 
incrementally loading new data. Loaded data can be deleted based on load time 
or a specific load can be undone Carbon file format is a columnar store in HDFS 
 it has many features that a modern columnar format hasz such as splittablez 
compression schema zcomplex data type and so on. Carbon has following unique 
features Stores data along with index: it can signi
 ficantly accelerate query performance and reduces the scans and CPU resources 
where there are filters in the query. Carbon index consists of multiple level 
of indicesz a processing framework can leverage this index to reduce the task 
it needs to schedule and processz and it can also do skip scan in more finer 
grain unit in task side scanning instead of scanning the whole file.Operable 
encoded data Through supporting efficient compression and global encoding 
schemes  can query on compressed encoded dataz the data can be converted to 
encoded data just before returning the results to the usersz which is Column 
group Allow multiple columns to form a column group that would be stored as row 
format. This reduces the row reconstruction cost at query time Supports for 
various use cases with one single Data format  like interactive OLAP-style 
queryz Sequential Access SQL Capability Carbon is fully compliant with Spark 
SQL and supports all SQL queries which can run directly on Spark SQL Easy 
 Table Definition: Carbon supports easy to use DDL Data Definition Language 
statements to define and create tables. Carbon DDL is highly flexible and is 
very easy to use as well as powerful enough to define complex tables Easy Data 
anagement  Carbon supports a variety of data management functions for loading 
data to table and maintaining the data in table. Carbon supports bulkloading 
historical data as well as incrementally loading new data. Loaded data can be 
deleted based on load time or a specific load can be undone  it can 
significantly accelerate query performance and reduces the scans and CPU 
resources where there are filters in the query. Carbon index consists of 
multiple level of indicesz a processing framework can leverage this index to 
reduce the task it needs to schedule and processz and it can also do skip scan 
in more finer grain unit in task side scanning instead of scanning the whole 
file Operable encoded data Through supporting efficient compression and global 
encodin
 g schemes  can query on compressed encoded dataz the data can be converted to 
encoded data just before returning the results to the usersz which is Column 
group Allow multiple columns to form a column group that would be stored as row 
format This reduces the row reconstruction cost at query time Supports for 
various use cases with one single  Sequential Access Carbon has following 
unique features Stores data along with index: it can significantly accelerate 
query performance and reduces the scans and CPU resources where there are 
filters in the query. Carbon index consists of multiple level of indicesz a 
processing framework can leverage this index to reduce the task it needs to 
schedule and processz and it can also do skip scan in more finer grain unit in 
task side scanning instead of scanning the whole file Operable encoded data 
Through supporting efficient compression and global encoding schemes  can query 
on compressed encoded dataz the data can be converted to encoded data just
  before returning the results to the users which is Column group Allow 
multiple columns to form a column group that would be stored as row format 
Allow multiple columns to form format Allow carbon is fu,2017-07-01 
12:07:28,Max_range_values-1
+2147483646,9223372036854775807,12345678900987654321123456789012345678,1.7976931348623157E308,SQL
 Capability Carbon is fully compliant with Spark SQL and supports all SQL 
queries which can run directly on Spark SQL Easy Table Definition: Carbon 
supports easy to use DDL Data Definition Language statements to define and 
create tables. Carbon DDL is highly flexible and is very easy to use as well as 
powerful enough to define complex tables Easy Data anagement  Carbon supports a 
variety of data management functions for loading data to table and maintaining 
the data in table. Carbon supports bulkloading historical data as well as 
incrementally loading new data. Loaded data can be deleted based on load time 
or a specific load can be undone Carbon file format is a columnar store in HDFS 
 it has many features that a modern columnar format hasz such as splittablez 
compression schema zcomplex data type and so on. Carbon has following unique 
features Stores data along with index: it can signifi
 cantly accelerate query performance and reduces the scans and CPU resources 
where there are filters in the query. Carbon index consists of multiple level 
of indicesz a processing framework can leverage this index to reduce the task 
it needs to schedule and processz and it can also do skip scan in more finer 
grain unit in task side scanning instead of scanning the whole file.Operable 
encoded data Through supporting efficient compression and global encoding 
schemes  can query on compressed encoded dataz the data can be converted to 
encoded data just before returning the results to the usersz which is Column 
group Allow multiple columns to form a column group that would be stored as row 
format. This reduces the row reconstruction cost at query time Supports for 
various use cases with one single Data format  like interactive OLAP-style 
queryz Sequential Access SQL Capability Carbon is fully compliant with Spark 
SQL and supports all SQL queries which can run directly on Spark SQL Easy Ta
 ble Definition: Carbon supports easy to use DDL Data Definition Language 
statements to define and create tables. Carbon DDL is highly flexible and is 
very easy to use as well as powerful enough to define complex tables Easy Data 
anagement  Carbon supports a variety of data management functions for loading 
data to table and maintaining the data in table. Carbon supports bulkloading 
historical data as well as incrementally loading new data. Loaded data can be 
deleted based on load time or a specific load can be undone  it can 
significantly accelerate query performance and reduces the scans and CPU 
resources where there are filters in the query. Carbon index consists of 
multiple level of indicesz a processing framework can leverage this index to 
reduce the task it needs to schedule and processz and it can also do skip scan 
in more finer grain unit in task side scanning instead of scanning the whole 
file Operable encoded data Through supporting efficient compression and global 
encoding 
 schemes  can query on compressed encoded dataz the data can be converted to 
encoded data just before returning the results to the usersz which is Column 
group Allow multiple columns to form a column group that would be stored as row 
format This reduces the row reconstruction cost at query time Supports for 
various use cases with one single  Sequential Access Carbon has following 
unique features Stores data along with index: it can significantly accelerate 
query performance and reduces the scans and CPU resources where there are 
filters in the query. Carbon index consists of multiple level of indicesz a 
processing framework can leverage this index to reduce the task it needs to 
schedule and processz and it can also do skip scan in more finer grain unit in 
task side scanning instead of scanning the whole file Operable encoded data 
Through supporting efficient compression and global encoding schemes  can query 
on compressed encoded dataz the data can be converted to encoded data just b
 efore returning the results to the users which is Column group Allow multiple 
columns to form a column group that would be stored as row format Allow 
multiple columns to form format Allow carbon is f,2017-07-01 
12:07:28,Max_range_values-2

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/7e814077/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala
 
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala
index 5278344..b33b65f 100644
--- 
a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala
+++ 
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala
@@ -38,6 +38,7 @@ class GrtLtFilterProcessorTestCase extends QueryTest with 
BeforeAndAfterAll {
     sql("drop table if exists a12")
     sql("drop table if exists a12_allnull")
     sql("drop table if exists a12_no_null")
+     sql("drop table if exists Test_Boundary1")
 
     sql(
       "create table a12(empid String,ename String,sal double,deptno int,mgr 
string,gender string," +
@@ -53,6 +54,7 @@ class GrtLtFilterProcessorTestCase extends QueryTest with 
BeforeAndAfterAll {
         " string," +
         "dob timestamp,comm decimal(4,2),desc string) stored by 
'org.apache.carbondata.format'"
     )
+    sql("create table Test_Boundary1 (c1_int int,c2_Bigint Bigint,c3_Decimal 
Decimal(38,38),c4_double double,c5_string string,c6_Timestamp 
Timestamp,c7_Datatype_Desc string) STORED BY 'org.apache.carbondata.format'")
     CarbonProperties.getInstance()
       .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy-MM-dd 
HH:mm:ss")
     val basePath = new File(this.getClass.getResource("/").getPath + "/../../")
@@ -77,6 +79,9 @@ class GrtLtFilterProcessorTestCase extends QueryTest with 
BeforeAndAfterAll {
          'QUOTECHAR'='"')"""
         .stripMargin
     )
+    
+    sql(
+      s"LOAD DATA INPATH './src/test/resources/Test_Data1_Logrithmic.csv' INTO 
table Test_Boundary1 OPTIONS('DELIMITER'=',','QUOTECHAR'='','FILEHEADER'='')")
   }
   //mixed value test
   test("Less Than Filter") {
@@ -99,6 +104,12 @@ class GrtLtFilterProcessorTestCase extends QueryTest with 
BeforeAndAfterAll {
       Seq(Row(3))
     )
   }
+  test("0.0 and -0.0 equality check for double data type applying log 
function") {
+    checkAnswer(
+      sql("select log(c4_double,1) from Test_Boundary1 where log(c4_double,1)= 
-0.0"),
+      Seq(Row(0.0),Row(0.0))
+    )
+  }
 
   test("Greater Than equal to Filter") {
     sql("select count (empid) from a12 where dob >= '2014-07-01 
12:07:28'").show()

[1/2] incubator-carbondata git commit: [CARBONDATA-250] Filter result is not proper when Double data type values with 0.0 and -0.0 will be used.

Reply via email to