This is an automated email from the ASF dual-hosted git repository.
akashrn5 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new c4d3d39 [CARBONDATA-4005] SI with cache level blocklet issue
c4d3d39 is described below
commit c4d3d39d44a7ebab3153bc60ad363c37631c5672
Author: ShreelekhyaG <[email protected]>
AuthorDate: Tue Sep 22 18:34:28 2020 +0530
[CARBONDATA-4005] SI with cache level blocklet issue
Why is this PR needed?
Select query on SI column returns blank resultset after changing the cache
level to blocklet
What changes were proposed in this PR?
In case of CACHE_LEVEL = BLOCKLET, blockId path contains both block id and
blocklet id. In getShortBlockId , added check for blocklet id file seperator
and made change to replace only the compressor name.
Added example in testcase.
Does this PR introduce any user interface change?
No
Is any new testcase added?
Yes
This closes #3951
---
.../scan/executor/impl/AbstractQueryExecutor.java | 6 +---
.../carbondata/core/util/path/CarbonTablePath.java | 40 ++++++++--------------
.../carbondata/core/util/CarbonUtilTest.java | 7 +++-
.../secondaryindex/TestSIWithSecondryIndex.scala | 20 +++++++++++
4 files changed, 42 insertions(+), 31 deletions(-)
diff --git
a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
index 978e8e4..3822f3e 100644
---
a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
+++
b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
@@ -451,11 +451,7 @@ public abstract class AbstractQueryExecutor<E> implements
QueryExecutor<E> {
.getBlockId(queryModel.getAbsoluteTableIdentifier(), filePath,
segment.getSegmentNo(),
queryModel.getTable().getTableInfo().isTransactionalTable(),
isStandardTable, queryModel.getTable().isHivePartitionTable());
- if (!isStandardTable) {
-
blockExecutionInfo.setBlockId(CarbonTablePath.getShortBlockIdForPartitionTable(blockId));
- } else {
- blockExecutionInfo.setBlockId(CarbonTablePath.getShortBlockId(blockId));
- }
+ blockExecutionInfo.setBlockId(CarbonTablePath.getShortBlockId(blockId));
blockExecutionInfo.setDeleteDeltaFilePath(deleteDeltaFiles);
blockExecutionInfo.setStartBlockletIndex(0);
blockExecutionInfo.setNumberOfBlockletToScan(numberOfBlockletToScan);
diff --git
a/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java
b/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java
index c89448c..1b43acc 100644
---
a/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java
+++
b/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java
@@ -648,33 +648,23 @@ public class CarbonTablePath {
.replace(DATA_PART_PREFIX, "").replace(CARBON_DATA_EXT, "");
// to remove compressor name
if (!blockId.equalsIgnoreCase(blockIdWithCompressorName)) {
- int index = blockIdWithCompressorName.lastIndexOf(".");
- if (index != -1) {
- String replace =
-
blockIdWithCompressorName.replace(blockIdWithCompressorName.substring(index),
"");
- return replace;
- } else {
- return blockIdWithCompressorName;
- }
- } else {
- return blockIdWithCompressorName;
- }
- }
-
- /**
- * This method will remove strings in path and return short block id
- *
- * @param blockId
- * @return shortBlockId
- */
- public static String getShortBlockIdForPartitionTable(String blockId) {
- String blockIdWithCompressorName = blockId.replace(DATA_PART_PREFIX, "")
- .replace(BATCH_PREFIX,
CarbonCommonConstants.UNDERSCORE).replace(CARBON_DATA_EXT, "");
- // to remove compressor name
- if (!blockId.equalsIgnoreCase(blockIdWithCompressorName)) {
int index = blockIdWithCompressorName.lastIndexOf(POINT);
+ int fileSeperatorIndex =
blockIdWithCompressorName.lastIndexOf(File.separator);
if (index != -1) {
- return
blockIdWithCompressorName.replace(blockIdWithCompressorName.substring(index),
"");
+ String modifiedBlockId;
+ if (index > fileSeperatorIndex) {
+ // Default case when path ends with compressor name.
+ // Example: 0/0-0_0-0-0-1600789595862.snappy
+ modifiedBlockId =
+
blockIdWithCompressorName.replace(blockIdWithCompressorName.substring(index),
"");
+ } else {
+ // in case of CACHE_LEVEL = BLOCKLET, blockId path contains both
block id and blocklet id
+ // so check for next file seperator and remove compressor name.
+ // Example: 0/0-0_0-0-0-1600789595862.snappy/0
+ modifiedBlockId = blockIdWithCompressorName
+ .replace(blockIdWithCompressorName.substring(index,
fileSeperatorIndex), "");
+ }
+ return modifiedBlockId;
} else {
return blockIdWithCompressorName;
}
diff --git
a/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
b/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
index 14388ca..485a8fa 100644
--- a/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
+++ b/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
@@ -927,7 +927,12 @@ public class CarbonUtilTest {
String blockId =
"Part0/Segment_0/part-0-0_batchno0-0-0-1597409791503.snappy.carbondata";
Assert.assertEquals(CarbonTablePath.getShortBlockId(blockId),
"0/0-0_0-0-0-1597409791503");
blockId =
"c3=aa/part-0-100100000100001_batchno0-0-0-1597411003332.snappy.carbondata";
-
Assert.assertEquals(CarbonTablePath.getShortBlockIdForPartitionTable(blockId),
"c3=aa/0-100100000100001_0-0-0-1597411003332");
+ Assert.assertEquals(CarbonTablePath.getShortBlockId(blockId),
"c3=aa/0-100100000100001_0-0-0-1597411003332");
+ // CACHE_LEVEL = BLOCKLET case
+ blockId =
"Part0/Segment_0/part-0-0_batchno0-0-0-1597409791503.snappy.carbondata/0";
+ Assert.assertEquals(CarbonTablePath.getShortBlockId(blockId),
"0/0-0_0-0-0-1597409791503/0");
+ blockId =
"c3=aa/part-0-100100000100001_batchno0-0-0-1597411003332.snappy.carbondata/0";
+ Assert.assertEquals(CarbonTablePath.getShortBlockId(blockId),
"c3=aa/0-100100000100001_0-0-0-1597411003332/0");
// external segment case
blockId =
"#home#root1#Projects#carbondata#integration#spark#target#warehouse#addsegtest#/Segment_2/part-0-0_batchno0-0-1-1597411388431.snappy.carbondata";
Assert.assertEquals(CarbonTablePath.getShortBlockId(blockId),
"#home#root1#Projects#carbondata#integration#spark#target#warehouse#addsegtest#/2/0-0_0-0-1-1597411388431");
diff --git
a/index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestSIWithSecondryIndex.scala
b/index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestSIWithSecondryIndex.scala
index 26ad960..cb5c73f 100644
---
a/index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestSIWithSecondryIndex.scala
+++
b/index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestSIWithSecondryIndex.scala
@@ -198,6 +198,26 @@ class TestSIWithSecondryIndex extends QueryTest with
BeforeAndAfterAll {
checkAnswer(sql("select * from maintableeee where c>1"),
Seq(Row("k","x",2)))
}
+ test("test secondary index with cache_level as blocklet") {
+ sql("drop table if exists maintable")
+ sql("create table maintable (a string,b string,c int) STORED AS
carbondata")
+ sql("insert into maintable values('k','x',2)")
+ sql("create index indextable on table maintable(b) AS 'carbondata'")
+ sql("ALTER TABLE maintable SET TBLPROPERTIES('CACHE_LEVEL'='BLOCKLET')")
+ checkAnswer(sql("select * from maintable where b='x'"),
Seq(Row("k","x",2)))
+ sql("drop table maintable")
+ }
+
+ test("test secondary index with cache_level as blocklet on partitioned
table") {
+ sql("drop table if exists partitionTable")
+ sql("create table partitionTable (a string,b string) partitioned by (c
int) STORED AS carbondata")
+ sql("insert into partitionTable values('k','x',2)")
+ sql("create index indextable on table partitionTable(b) AS 'carbondata'")
+ sql("ALTER TABLE partitionTable SET
TBLPROPERTIES('CACHE_LEVEL'='BLOCKLET')")
+ checkAnswer(sql("select * from partitionTable where b='x'"),
Seq(Row("k","x",2)))
+ sql("drop table partitionTable")
+ }
+
test("validate column_meta_cache and cache_level on SI table") {
sql("drop table if exists column_meta_cache")
sql("create table column_meta_cache(c1 String, c2 String, c3 int, c4
double) STORED AS carbondata")