This is an automated email from the ASF dual-hosted git repository.
ajantha pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 3a6e4a4 [CARBONDATA-4173][CARBONDATA-4174] Fix inverted index query
issue and handle exception for desc column
3a6e4a4 is described below
commit 3a6e4a436f7f6235555f0a9bbe9f88213e2dd82b
Author: ShreelekhyaG <[email protected]>
AuthorDate: Thu Apr 22 20:23:37 2021 +0530
[CARBONDATA-4173][CARBONDATA-4174] Fix inverted index query issue and
handle exception for desc column
Why is this PR needed?
After creating an Inverted index on the dimension column, some of the
filter queries give incorrect results.
handle exception for higher level non-existing children column in desc
column.
What changes were proposed in this PR?
While sorting byte arrays with inverted index, we use compareTo method of
ByteArrayColumnWithRowId. Here, it was sorting based on the last byte only.
Made changes to sort properly based on the entire byte length when dictionary
is used.
handled exception and added in testcase.
Does this PR introduce any user interface change?
No
Is any new testcase added?
Yes
This closes #4124
---
.../columnar/ByteArrayBlockIndexerStorage.java | 10 ++-----
.../columnar/ByteArrayColumnWithRowId.java | 14 +++++++--
.../table/CarbonDescribeFormattedCommand.scala | 11 +++++--
.../dataload/TestNoInvertedIndexLoadAndQuery.scala | 34 ++++++++++++++++++++++
.../describeTable/TestDescribeTable.scala | 22 +++++++++++++-
5 files changed, 76 insertions(+), 15 deletions(-)
diff --git
a/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ByteArrayBlockIndexerStorage.java
b/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ByteArrayBlockIndexerStorage.java
index b65ac52..ffad4da 100644
---
a/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ByteArrayBlockIndexerStorage.java
+++
b/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ByteArrayBlockIndexerStorage.java
@@ -47,14 +47,8 @@ public class ByteArrayBlockIndexerStorage extends
BlockIndexerStorage<byte[][]>
private ByteArrayColumnWithRowId[] createColumnWithRowId(byte[][] dataPage,
boolean isNoDictionary) {
ByteArrayColumnWithRowId[] columnWithIndexes = new
ByteArrayColumnWithRowId[dataPage.length];
- if (isNoDictionary) {
- for (short i = 0; i < columnWithIndexes.length; i++) {
- columnWithIndexes[i] = new ByteArrayColumnWithRowId(dataPage[i], i);
- }
- } else {
- for (short i = 0; i < columnWithIndexes.length; i++) {
- columnWithIndexes[i] = new ByteArrayColumnWithRowId(dataPage[i], i);
- }
+ for (short i = 0; i < columnWithIndexes.length; i++) {
+ columnWithIndexes[i] = new ByteArrayColumnWithRowId(dataPage[i], i,
isNoDictionary);
}
return columnWithIndexes;
}
diff --git
a/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ByteArrayColumnWithRowId.java
b/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ByteArrayColumnWithRowId.java
index 946aa6b..5141226 100644
---
a/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ByteArrayColumnWithRowId.java
+++
b/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ByteArrayColumnWithRowId.java
@@ -26,9 +26,12 @@ public class ByteArrayColumnWithRowId implements
Comparable<ByteArrayColumnWithR
private short rowId;
- ByteArrayColumnWithRowId(byte[] column, short rowId) {
+ private boolean isNoDictionary;
+
+ ByteArrayColumnWithRowId(byte[] column, short rowId, boolean isNoDictionary)
{
this.column = column;
this.rowId = rowId;
+ this.isNoDictionary = isNoDictionary;
}
public byte[] getColumn() {
@@ -41,8 +44,13 @@ public class ByteArrayColumnWithRowId implements
Comparable<ByteArrayColumnWithR
@Override
public int compareTo(ByteArrayColumnWithRowId o) {
- return UnsafeComparer.INSTANCE
- .compareTo(column, 2, column.length - 2, o.column, 2, o.column.length
- 2);
+ if (isNoDictionary) {
+ return UnsafeComparer.INSTANCE
+ .compareTo(column, 2, column.length - 2, o.column, 2,
o.column.length - 2);
+ } else {
+ return UnsafeComparer.INSTANCE
+ .compareTo(column, 0, column.length, o.column, 0, o.column.length);
+ }
}
@Override
diff --git
a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala
b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala
index f85bb1f..64ac71e 100644
---
a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala
+++
b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonDescribeFormattedCommand.scala
@@ -412,7 +412,7 @@ case class CarbonDescribeColumnCommand(
val nextField = inputFieldsIterator.next()
// child of an array can be only 'item'
if (!nextField.equalsIgnoreCase("item")) {
- throw handleException(nextField, currField.name,
carbonTable.getTableName)
+ throw handleException(nextField, inputColumn,
carbonTable.getTableName)
}
// make the child type as current field to describe further nested
types.
currField = StructField("item",
currField.dataType.asInstanceOf[ArrayType].elementType)
@@ -433,7 +433,7 @@ case class CarbonDescribeColumnCommand(
.find(_.name.equalsIgnoreCase(nextField))
// verify if the input child name exists in the schema
if (!nextCurrField.isDefined) {
- throw handleException(nextField, currField.name,
carbonTable.getTableName)
+ throw handleException(nextField, inputColumn,
carbonTable.getTableName)
}
// make the child type as current field to describe further nested
types.
currField = nextCurrField.get
@@ -455,7 +455,7 @@ case class CarbonDescribeColumnCommand(
val nextCurrField = nextField match {
case "key" => StructField("key", children.keyType)
case "value" => StructField("value", children.valueType)
- case _ => throw handleException(nextField, currField.name,
carbonTable.getTableName)
+ case _ => throw handleException(nextField, inputColumn,
carbonTable.getTableName)
}
// make the child type as current field to describe further nested
types.
currField = nextCurrField
@@ -469,6 +469,11 @@ case class CarbonDescribeColumnCommand(
results ++= Seq(("key", children.keyType.simpleString, "null"),
("value", children.valueType.simpleString, "null"))
} else {
+ if (inputFieldsIterator.hasNext) {
+ val nextField = inputFieldsIterator.next().toLowerCase()
+ // throw exception as no children present to display.
+ throw handleException(nextField, inputColumn,
carbonTable.getTableName)
+ }
results = Seq((inputColumn,
currField.dataType.typeName,
currField.getComment().getOrElse("null")))
}
diff --git
a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala
b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala
index 8d92e05..6cc0f66 100644
---
a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala
+++
b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala
@@ -286,6 +286,38 @@ class TestNoInvertedIndexLoadAndQuery extends QueryTest
with BeforeAndAfterAll {
.contains(Encoding.INVERTED_INDEX))
}
+ test("inverted index with dimension column in INVERTED_INDEX and test filter
query") {
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR,
+ "true")
+ sql("drop table if exists indexFormat")
+ sql(
+ "CREATE TABLE indexFormat (CUST_ID INT,CUST_NAME
string,ACTIVE_EMUI_VERSION string," +
+ "DOB timestamp,DOJ timestamp,BIGINT_COLUMN1 bigint,BIGINT_COLUMN2
bigint," +
+ "DECIMAL_COLUMN1 DECIMAL(30, 10),DECIMAL_COLUMN2 DECIMAL(36,
10),Double_COLUMN1 double, " +
+ "Double_COLUMN2 double,INTEGER_COLUMN1 int) STORED AS carbondata
TBLPROPERTIES(" +
+ "'TABLE_BLOCKSIZE'='256 MB', 'sort_columns'='CUST_NAME,
ACTIVE_EMUI_VERSION', " +
+ "'inverted_index'='CUST_NAME, ACTIVE_EMUI_VERSION',
'local_dictionary_enable'='true', " +
+ "'local_dictionary_exclude'='ACTIVE_EMUI_VERSION')")
+ sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/restructure/data_2000.csv'
INTO " +
+ "TABLE indexFormat OPTIONS('DELIMITER'=',', " +
+ "'BAD_RECORDS_LOGGER_ENABLE'='FALSE',
'BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID," +
+
"CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,"
+
+ "DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1')")
+ val carbonTable = CarbonEnv.getCarbonTable(Some("default"),
"indexFormat")(sqlContext
+ .sparkSession)
+
assert(carbonTable.getColumnByName("CUST_NAME").getColumnSchema.getEncodingList
+ .contains(Encoding.INVERTED_INDEX))
+
assert(carbonTable.getColumnByName("ACTIVE_EMUI_VERSION").getColumnSchema.getEncodingList
+ .contains(Encoding.INVERTED_INDEX))
+ checkAnswer(sql("select CUST_NAME from indexFormat where
CUST_NAME='CUST_NAME_00004'"),
+ Seq(Row("CUST_NAME_00004")))
+ sql("drop table if exists indexFormat")
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR,
+ CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR_DEFAULT)
+ }
+
test("test same column configured in inverted and no inverted index") {
sql("drop table if exists index1")
val exception = intercept[MalformedCarbonCommandException] {
@@ -307,6 +339,8 @@ class TestNoInvertedIndexLoadAndQuery extends QueryTest
with BeforeAndAfterAll {
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
+ .addProperty(CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR,
+ CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR_DEFAULT)
sql("drop table if exists index1")
sql("drop table if exists index2")
sql("drop table if exists indexFormat")
diff --git
a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/describeTable/TestDescribeTable.scala
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/describeTable/TestDescribeTable.scala
index 8b9bbc9..e7e8ecb 100644
---
a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/describeTable/TestDescribeTable.scala
+++
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/describeTable/TestDescribeTable.scala
@@ -205,7 +205,27 @@ class TestDescribeTable extends QueryTest with
BeforeAndAfterAll {
exception2 = intercept[MalformedCarbonCommandException](sql(
"describe column MAC.one on complexcarbontable"))
assert(exception2.getMessage.contains(
- "one is invalid child name for column mac of table: complexcarbontable"))
+ "one is invalid child name for column MAC of table: complexcarbontable"))
+
+ exception2 = intercept[MalformedCarbonCommandException](sql(
+ "describe column deviceInformationId.x on complexcarbontable"))
+ assert(exception2.getMessage.contains(
+ "x is invalid child name for column deviceInformationId of table:
complexcarbontable"))
+
+ exception2 = intercept[MalformedCarbonCommandException](sql(
+ "describe column mobile.imei.x on complexcarbontable"))
+ assert(exception2.getMessage.contains(
+ "x is invalid child name for column mobile.imei of table:
complexcarbontable"))
+
+ exception2 = intercept[MalformedCarbonCommandException](sql(
+ "describe column MAC.item.x on complexcarbontable"))
+ assert(exception2.getMessage.contains(
+ "x is invalid child name for column MAC.item of table:
complexcarbontable"))
+
+ exception2 = intercept[MalformedCarbonCommandException](sql(
+ "describe column channelsId.key.x on complexcarbontable"))
+ assert(exception2.getMessage.contains(
+ "x is invalid child name for column channelsId.key of table:
complexcarbontable"))
}
test("test describe short table format") {