[carbondata] branch master updated: [CARBONDATA-4173][CARBONDATA-4174] Fix inverted index query issue and handle exception for desc column

ajantha Mon, 26 Apr 2021 03:40:14 -0700

This is an automated email from the ASF dual-hosted git repository.

ajantha pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git



The following commit(s) were added to refs/heads/master by this push:
     new 3a6e4a4  [CARBONDATA-4173][CARBONDATA-4174] Fix inverted index query 
issue and handle exception for desc column
3a6e4a4 is described below

commit 3a6e4a436f7f6235555f0a9bbe9f88213e2dd82b
Author: ShreelekhyaG <[email protected]>
AuthorDate: Thu Apr 22 20:23:37 2021 +0530

    [CARBONDATA-4173][CARBONDATA-4174] Fix inverted index query issue and 
handle exception for desc column
    
    Why is this PR needed?
    After creating an Inverted index on the dimension column, some of the 
filter queries give incorrect results.
    handle exception for higher level non-existing children column in desc 
column.
    
    What changes were proposed in this PR?
    While sorting byte arrays with inverted index, we use compareTo method of 
ByteArrayColumnWithRowId. Here, it was sorting based on the last byte only. 
Made changes to sort properly based on the entire byte length when dictionary 
is used.
    handled exception and added in testcase.
    
    Does this PR introduce any user interface change?
    No
    
    Is any new testcase added?
    Yes
    
    This closes #4124
---
 .../columnar/ByteArrayBlockIndexerStorage.java     | 10 ++-----
 .../columnar/ByteArrayColumnWithRowId.java         | 14 +++++++--
 .../table/CarbonDescribeFormattedCommand.scala     | 11 +++++--
 .../dataload/TestNoInvertedIndexLoadAndQuery.scala | 34 ++++++++++++++++++++++
 .../describeTable/TestDescribeTable.scala          | 22 +++++++++++++-
 5 files changed, 76 insertions(+), 15 deletions(-)

diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ByteArrayBlockIndexerStorage.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ByteArrayBlockIndexerStorage.java
index b65ac52..ffad4da 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ByteArrayBlockIndexerStorage.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ByteArrayBlockIndexerStorage.java
@@ -47,14 +47,8 @@ public class ByteArrayBlockIndexerStorage extends 
BlockIndexerStorage<byte[][]>
   private ByteArrayColumnWithRowId[] createColumnWithRowId(byte[][] dataPage,
       boolean isNoDictionary) {
     ByteArrayColumnWithRowId[] columnWithIndexes = new 
ByteArrayColumnWithRowId[dataPage.length];
-    if (isNoDictionary) {
-      for (short i = 0; i < columnWithIndexes.length; i++) {
-        columnWithIndexes[i] = new ByteArrayColumnWithRowId(dataPage[i], i);
-      }
-    } else {
-      for (short i = 0; i < columnWithIndexes.length; i++) {
-        columnWithIndexes[i] = new ByteArrayColumnWithRowId(dataPage[i], i);
-      }
+    for (short i = 0; i < columnWithIndexes.length; i++) {
+      columnWithIndexes[i] = new ByteArrayColumnWithRowId(dataPage[i], i, 
isNoDictionary);
     }
     return columnWithIndexes;
   }
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ByteArrayColumnWithRowId.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ByteArrayColumnWithRowId.java
index 946aa6b..5141226 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ByteArrayColumnWithRowId.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ByteArrayColumnWithRowId.java
@@ -26,9 +26,12 @@ public class ByteArrayColumnWithRowId implements 
Comparable<ByteArrayColumnWithR
 
   private short rowId;
 
-  ByteArrayColumnWithRowId(byte[] column, short rowId) {
+  private boolean isNoDictionary;
+
+  ByteArrayColumnWithRowId(byte[] column, short rowId, boolean isNoDictionary) 
{
     this.column = column;
     this.rowId = rowId;
+    this.isNoDictionary = isNoDictionary;
   }
 
   public byte[] getColumn() {
@@ -41,8 +44,13 @@ public class ByteArrayColumnWithRowId implements 
Comparable<ByteArrayColumnWithR
 
   @Override
   public int compareTo(ByteArrayColumnWithRowId o) {
-    return UnsafeComparer.INSTANCE
-        .compareTo(column, 2, column.length - 2, o.column, 2, o.column.length 
- 2);
+    if (isNoDictionary) {
+      return UnsafeComparer.INSTANCE
+          .compareTo(column, 2, column.length - 2, o.column, 2, 
o.column.length - 2);
+    } else {
+      return UnsafeComparer.INSTANCE
+          .compareTo(column, 0, column.length, o.column, 0, o.column.length);
+    }
   }
 
   @Override
diff --git 
a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala
 
b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala
index f85bb1f..64ac71e 100644
--- 
a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala
+++ 
b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala
@@ -412,7 +412,7 @@ case class CarbonDescribeColumnCommand(
             val nextField = inputFieldsIterator.next()
             // child of an array can be only 'item'
             if (!nextField.equalsIgnoreCase("item")) {
-              throw handleException(nextField, currField.name, 
carbonTable.getTableName)
+              throw handleException(nextField, inputColumn, 
carbonTable.getTableName)
             }
             // make the child type as current field to describe further nested 
types.
             currField = StructField("item", 
currField.dataType.asInstanceOf[ArrayType].elementType)
@@ -433,7 +433,7 @@ case class CarbonDescribeColumnCommand(
               .find(_.name.equalsIgnoreCase(nextField))
             // verify if the input child name exists in the schema
             if (!nextCurrField.isDefined) {
-              throw handleException(nextField, currField.name, 
carbonTable.getTableName)
+              throw handleException(nextField, inputColumn, 
carbonTable.getTableName)
             }
             // make the child type as current field to describe further nested 
types.
             currField = nextCurrField.get
@@ -455,7 +455,7 @@ case class CarbonDescribeColumnCommand(
             val nextCurrField = nextField match {
               case "key" => StructField("key", children.keyType)
               case "value" => StructField("value", children.valueType)
-              case _ => throw handleException(nextField, currField.name, 
carbonTable.getTableName)
+              case _ => throw handleException(nextField, inputColumn, 
carbonTable.getTableName)
             }
             // make the child type as current field to describe further nested 
types.
             currField = nextCurrField
@@ -469,6 +469,11 @@ case class CarbonDescribeColumnCommand(
           results ++= Seq(("key", children.keyType.simpleString, "null"),
             ("value", children.valueType.simpleString, "null"))
         } else {
+          if (inputFieldsIterator.hasNext) {
+            val nextField = inputFieldsIterator.next().toLowerCase()
+            // throw exception as no children present to display.
+            throw handleException(nextField, inputColumn, 
carbonTable.getTableName)
+          }
           results = Seq((inputColumn,
             currField.dataType.typeName, 
currField.getComment().getOrElse("null")))
         }
diff --git 
a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala
 
b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala
index 8d92e05..6cc0f66 100644
--- 
a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala
+++ 
b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala
@@ -286,6 +286,38 @@ class TestNoInvertedIndexLoadAndQuery extends QueryTest 
with BeforeAndAfterAll {
       .contains(Encoding.INVERTED_INDEX))
   }
 
+  test("inverted index with dimension column in INVERTED_INDEX and test filter 
query") {
+    CarbonProperties.getInstance()
+      .addProperty(CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR,
+        "true")
+    sql("drop table if exists indexFormat")
+    sql(
+      "CREATE TABLE indexFormat (CUST_ID INT,CUST_NAME 
string,ACTIVE_EMUI_VERSION string," +
+      "DOB timestamp,DOJ timestamp,BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 
bigint," +
+      "DECIMAL_COLUMN1 DECIMAL(30, 10),DECIMAL_COLUMN2 DECIMAL(36, 
10),Double_COLUMN1 double, " +
+      "Double_COLUMN2 double,INTEGER_COLUMN1 int) STORED AS carbondata 
TBLPROPERTIES(" +
+      "'TABLE_BLOCKSIZE'='256 MB', 'sort_columns'='CUST_NAME, 
ACTIVE_EMUI_VERSION', " +
+      "'inverted_index'='CUST_NAME, ACTIVE_EMUI_VERSION', 
'local_dictionary_enable'='true', " +
+      "'local_dictionary_exclude'='ACTIVE_EMUI_VERSION')")
+    sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/restructure/data_2000.csv' 
INTO " +
+        "TABLE indexFormat OPTIONS('DELIMITER'=',', " +
+        "'BAD_RECORDS_LOGGER_ENABLE'='FALSE', 
'BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID," +
+        
"CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,"
 +
+        "DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1')")
+    val carbonTable = CarbonEnv.getCarbonTable(Some("default"), 
"indexFormat")(sqlContext
+      .sparkSession)
+    
assert(carbonTable.getColumnByName("CUST_NAME").getColumnSchema.getEncodingList
+      .contains(Encoding.INVERTED_INDEX))
+    
assert(carbonTable.getColumnByName("ACTIVE_EMUI_VERSION").getColumnSchema.getEncodingList
+      .contains(Encoding.INVERTED_INDEX))
+    checkAnswer(sql("select CUST_NAME from indexFormat where 
CUST_NAME='CUST_NAME_00004'"),
+      Seq(Row("CUST_NAME_00004")))
+    sql("drop table if exists indexFormat")
+    CarbonProperties.getInstance()
+      .addProperty(CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR,
+        CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR_DEFAULT)
+  }
+
   test("test same column configured in inverted and no inverted index") {
     sql("drop table if exists index1")
     val exception = intercept[MalformedCarbonCommandException] {
@@ -307,6 +339,8 @@ class TestNoInvertedIndexLoadAndQuery extends QueryTest 
with BeforeAndAfterAll {
     CarbonProperties.getInstance()
       .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
         CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
+      .addProperty(CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR,
+        CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR_DEFAULT)
     sql("drop table if exists index1")
     sql("drop table if exists index2")
     sql("drop table if exists indexFormat")
diff --git 
a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/describeTable/TestDescribeTable.scala
 
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/describeTable/TestDescribeTable.scala
index 8b9bbc9..e7e8ecb 100644
--- 
a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/describeTable/TestDescribeTable.scala
+++ 
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/describeTable/TestDescribeTable.scala
@@ -205,7 +205,27 @@ class TestDescribeTable extends QueryTest with 
BeforeAndAfterAll {
     exception2 = intercept[MalformedCarbonCommandException](sql(
       "describe column MAC.one on complexcarbontable"))
     assert(exception2.getMessage.contains(
-      "one is invalid child name for column mac of table: complexcarbontable"))
+      "one is invalid child name for column MAC of table: complexcarbontable"))
+
+    exception2 = intercept[MalformedCarbonCommandException](sql(
+      "describe column deviceInformationId.x on complexcarbontable"))
+    assert(exception2.getMessage.contains(
+      "x is invalid child name for column deviceInformationId of table: 
complexcarbontable"))
+
+    exception2 = intercept[MalformedCarbonCommandException](sql(
+      "describe column mobile.imei.x on complexcarbontable"))
+    assert(exception2.getMessage.contains(
+      "x is invalid child name for column mobile.imei of table: 
complexcarbontable"))
+
+    exception2 = intercept[MalformedCarbonCommandException](sql(
+      "describe column MAC.item.x on complexcarbontable"))
+    assert(exception2.getMessage.contains(
+      "x is invalid child name for column MAC.item of table: 
complexcarbontable"))
+
+    exception2 = intercept[MalformedCarbonCommandException](sql(
+      "describe column channelsId.key.x on complexcarbontable"))
+    assert(exception2.getMessage.contains(
+      "x is invalid child name for column channelsId.key of table: 
complexcarbontable"))
   }
 
   test("test describe short table format") {

[carbondata] branch master updated: [CARBONDATA-4173][CARBONDATA-4174] Fix inverted index query issue and handle exception for desc column

Reply via email to