This is an automated email from the ASF dual-hosted git repository.
kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 56ba03c [CARBONDATA-3979] Added Hive local dictionary support example
56ba03c is described below
commit 56ba03cf460ec39c62d44cb4e399e400a4b548d1
Author: ShreelekhyaG <[email protected]>
AuthorDate: Mon Sep 7 13:00:05 2020 +0530
[CARBONDATA-3979] Added Hive local dictionary support example
Why is this PR needed?
To verify local dictionary support in hive.
What changes were proposed in this PR?
Added example in HiveExample class
Added testcase in HiveTest
Refactored the functions to verify local dictionary.
This closes #3914
---
.../carbondata/core/util/CarbonTestUtil.java | 161 +++++++++++++++++++++
.../carbondata/core/util/CarbonTestUtil.java | 37 -----
docs/hive-guide.md | 5 +
.../apache/carbondata/examples/HiveExample.scala | 31 ++++
.../org/apache/carbondata/hive/HiveCarbonTest.java | 16 ++
.../LoadTableWithLocalDictionaryTestCase.scala | 141 ++----------------
.../TestCreateDDLForComplexMapType.scala | 11 --
.../TestNonTransactionalCarbonTable.scala | 125 +---------------
...onTransactionalCarbonTableWithComplexType.scala | 4 +-
.../LocalDictionarySupportLoadTableTest.scala | 151 +++----------------
10 files changed, 257 insertions(+), 425 deletions(-)
diff --git
a/core/src/main/java/org/apache/carbondata/core/util/CarbonTestUtil.java
b/core/src/main/java/org/apache/carbondata/core/util/CarbonTestUtil.java
new file mode 100644
index 0000000..b7b2a9c
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonTestUtil.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.List;
+import java.util.ListIterator;
+
+import org.apache.carbondata.core.cache.dictionary.DictionaryByteArrayWrapper;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.datastore.FileReader;
+import org.apache.carbondata.core.datastore.block.TableBlockInfo;
+import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
+import
org.apache.carbondata.core.datastore.chunk.reader.CarbonDataReaderFactory;
+import
org.apache.carbondata.core.datastore.chunk.reader.dimension.v3.DimensionChunkReaderV3;
+import org.apache.carbondata.core.datastore.compression.CompressorFactory;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.datastore.page.LazyColumnPage;
+import org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder;
+import
org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoderMeta;
+import
org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory;
+import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
+import org.apache.carbondata.core.metadata.ValueEncoderMeta;
+import org.apache.carbondata.core.metadata.blocklet.BlockletInfo;
+import org.apache.carbondata.core.metadata.blocklet.DataFileFooter;
+import org.apache.carbondata.format.LocalDictionaryChunk;
+
+public class CarbonTestUtil {
+
+ public static ValueEncoderMeta createValueEncoderMeta() {
+ ColumnarFormatVersion version =
+ CarbonProperties.getInstance().getFormatVersion();
+
+ switch (version) {
+ case V3:
+ return new ColumnPageEncoderMeta();
+ default:
+ throw new UnsupportedOperationException("unsupported version: " +
version);
+ }
+ }
+
+ /**
+ * this method returns true if local dictionary is created for all the
blocklets or not
+ * @param storePath
+ * @param blockindex
+ * @return dimensionRawColumnChunks
+ */
+ public static ArrayList<DimensionRawColumnChunk> getDimRawChunk(String
storePath,
+ Integer blockindex) throws IOException {
+ CarbonFile[] dataFiles =
FileFactory.getCarbonFile(storePath).listFiles(new CarbonFileFilter() {
+ @Override
+ public boolean accept(CarbonFile file) {
+ if (file.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT)) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+ });
+ ArrayList<DimensionRawColumnChunk> dimensionRawColumnChunks =
+ read(dataFiles[0].getAbsolutePath(), blockindex);
+ return dimensionRawColumnChunks;
+ }
+
+ public static ArrayList<DimensionRawColumnChunk> read(String filePath,
Integer blockIndex)
+ throws IOException {
+ File carbonDataFiles = new File(filePath);
+ ArrayList<DimensionRawColumnChunk> dimensionRawColumnChunks = new
ArrayList<>();
+ long offset = carbonDataFiles.length();
+ DataFileFooterConverterV3 converter = new DataFileFooterConverterV3();
+ FileReader fileReader =
FileFactory.getFileHolder(FileFactory.getFileType(filePath));
+ long actualOffset = fileReader.readLong(carbonDataFiles.getAbsolutePath(),
offset - 8);
+ TableBlockInfo blockInfo =
+ new TableBlockInfo(carbonDataFiles.getAbsolutePath(), actualOffset,
"0", new String[] {},
+ carbonDataFiles.length(), ColumnarFormatVersion.V3, null);
+ DataFileFooter dataFileFooter = converter.readDataFileFooter(blockInfo);
+ List<BlockletInfo> blockletList = dataFileFooter.getBlockletList();
+ ListIterator<BlockletInfo> iterator = blockletList.listIterator();
+ while (iterator.hasNext()) {
+ DimensionChunkReaderV3 dimensionColumnChunkReader =
+ (DimensionChunkReaderV3) CarbonDataReaderFactory.getInstance()
+ .getDimensionColumnChunkReader(ColumnarFormatVersion.V3,
iterator.next(),
+ carbonDataFiles.getAbsolutePath(), false);
+ dimensionRawColumnChunks
+ .add(dimensionColumnChunkReader.readRawDimensionChunk(fileReader,
blockIndex));
+ }
+ return dimensionRawColumnChunks;
+ }
+
+ public static Boolean validateDictionary(DimensionRawColumnChunk
rawColumnPage, String[] data)
+ throws IOException {
+ LocalDictionaryChunk local_dictionary =
rawColumnPage.getDataChunkV3().local_dictionary;
+ if (null != local_dictionary) {
+ String compressorName =
CarbonMetadataUtil.getCompressorNameFromChunkMeta(
+
rawColumnPage.getDataChunkV3().getData_chunk_list().get(0).getChunk_meta());
+ List<org.apache.carbondata.format.Encoding> encodings =
+ local_dictionary.getDictionary_meta().encoders;
+ DefaultEncodingFactory encodingFactory =
+ (DefaultEncodingFactory) DefaultEncodingFactory.getInstance();
+ ColumnPageDecoder decoder = encodingFactory
+ .createDecoder(encodings,
local_dictionary.getDictionary_meta().getEncoder_meta(),
+ compressorName);
+ LazyColumnPage dictionaryPage = (LazyColumnPage) decoder
+ .decode(local_dictionary.getDictionary_data(), 0,
+ local_dictionary.getDictionary_data().length);
+ HashMap<DictionaryByteArrayWrapper, Integer> dictionaryMap = new
HashMap<>();
+ BitSet usedDictionaryValues = BitSet.valueOf(
+ CompressorFactory.getInstance().getCompressor(compressorName)
+ .unCompressByte(local_dictionary.getDictionary_values()));
+ int index = 0;
+ int i = usedDictionaryValues.nextSetBit(0);
+ while (i >= 0) {
+ dictionaryMap.put(new
DictionaryByteArrayWrapper(dictionaryPage.getBytes(index)), i);
+ i = usedDictionaryValues.nextSetBit(i + 1);
+ index += 1;
+ }
+ for (i = 0; i < data.length; i++) {
+ if (null == dictionaryMap.get(new DictionaryByteArrayWrapper(
+
data[i].getBytes(Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET))))) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+
+ public static Boolean checkForLocalDictionary(
+ List<DimensionRawColumnChunk> dimensionRawColumnChunks) {
+ Boolean isLocalDictionaryGenerated = false;
+ ListIterator<DimensionRawColumnChunk> iterator =
dimensionRawColumnChunks.listIterator();
+ while (iterator.hasNext()) {
+ if (iterator.next().getDataChunkV3().isSetLocal_dictionary()) {
+ isLocalDictionaryGenerated = true;
+ }
+ }
+ return isLocalDictionaryGenerated;
+ }
+}
diff --git
a/core/src/test/java/org/apache/carbondata/core/util/CarbonTestUtil.java
b/core/src/test/java/org/apache/carbondata/core/util/CarbonTestUtil.java
deleted file mode 100644
index 6080d7e..0000000
--- a/core/src/test/java/org/apache/carbondata/core/util/CarbonTestUtil.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.carbondata.core.util;
-
-import
org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoderMeta;
-import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
-import org.apache.carbondata.core.metadata.ValueEncoderMeta;
-
-public class CarbonTestUtil {
-
- public static ValueEncoderMeta createValueEncoderMeta() {
- ColumnarFormatVersion version =
- CarbonProperties.getInstance().getFormatVersion();
-
- switch (version) {
- case V3:
- return new ColumnPageEncoderMeta();
- default:
- throw new UnsupportedOperationException("unsupported version: " +
version);
- }
- }
-}
diff --git a/docs/hive-guide.md b/docs/hive-guide.md
index b6629c3..0fa820c 100644
--- a/docs/hive-guide.md
+++ b/docs/hive-guide.md
@@ -119,5 +119,10 @@ select * from hive_carbon order by id;
### Note
- Partition table support is not handled
+ - Currently because carbon is implemented as a non-native hive table,
therefore the user has to add the `storage_handler` information in
tblproperties if the table has to be accessed from hive. Once the tblproperties
have been updated, the user would not be able to do certain operations like
alter, update/delete, etc., in both spark and hive.
+
+ ```
+ alter table <tableName> set tblproperties('storage_handler'=
'org.apache.carbondata.hive.CarbonStorageHandler');
+```
diff --git
a/examples/spark/src/main/scala/org/apache/carbondata/examples/HiveExample.scala
b/examples/spark/src/main/scala/org/apache/carbondata/examples/HiveExample.scala
index 0e827e6..8646f29 100644
---
a/examples/spark/src/main/scala/org/apache/carbondata/examples/HiveExample.scala
+++
b/examples/spark/src/main/scala/org/apache/carbondata/examples/HiveExample.scala
@@ -100,6 +100,13 @@ object HiveExample {
"insert into complexMap
values(map('Manish','Nalla','Shardul','Singh','Vishal','Kumar'," +
"'EmptyVal','','NullVal', 'null'))")
+ sparkSession.sql("""DROP TABLE IF EXISTS
carbonLocalDictionary""".stripMargin)
+
+ sparkSession.sql(
+ s"create table carbonLocalDictionary(id int, name string, scale decimal,
country string, " +
+ s"salary double) stored as carbondata TBLPROPERTIES
('local_dictionary_enable'='true'," +
+ s"'local_dictionary_include'='name') ")
+
// delete the already existing lock on metastore so that new derby instance
// for HiveServer can run on the same metastore
checkAndDeleteDBLock
@@ -238,6 +245,30 @@ object HiveExample {
s"$outOfOrderColFetched")
assert(outOfOrderColFetched == 4)
+ println(" ********** Local Dictionary details from Describe Formatted
**********")
+
+ val resultDictQuery = statement.executeQuery("describe formatted
carbonLocalDictionary")
+
+ var dataType = ""
+
+ while (resultDictQuery.next) {
+ dataType = dataType + resultDictQuery.getString("data_type")
+ if (resultDictQuery.getString("data_type") != null &&
+
resultDictQuery.getString("data_type").contains("local_dictionary_enable")) {
+ println(s"| " + resultDictQuery.getString("data_type") + " | " + "|
" +
+ resultDictQuery.getString("comment") + " |")
+ assert(resultDictQuery.getString("comment").contains("true"))
+ }
+ if (resultDictQuery.getString("data_type") != null &&
+
resultDictQuery.getString("data_type").contains("local_dictionary_include")) {
+ println(s"| " + resultDictQuery.getString("data_type") + " | " + "|
" +
+ resultDictQuery.getString("comment") + " |")
+ assert(resultDictQuery.getString("comment").contains("name"))
+ }
+ }
+ assert(dataType.contains("local_dictionary_enable"))
+ assert(dataType.contains("local_dictionary_include"))
+
val resultAggQuery = statement
.executeQuery(
"SELECT min(c3_Decimal) as min, max(c3_Decimal) as max, " +
diff --git
a/integration/hive/src/test/java/org/apache/carbondata/hive/HiveCarbonTest.java
b/integration/hive/src/test/java/org/apache/carbondata/hive/HiveCarbonTest.java
index 6a11c86..0188c64 100644
---
a/integration/hive/src/test/java/org/apache/carbondata/hive/HiveCarbonTest.java
+++
b/integration/hive/src/test/java/org/apache/carbondata/hive/HiveCarbonTest.java
@@ -16,12 +16,16 @@
*/
package org.apache.carbondata.hive;
+import java.io.File;
import java.sql.ResultSet;
import java.sql.Statement;
+import java.util.ArrayList;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
import org.apache.carbondata.core.util.CarbonProperties;
+import org.apache.carbondata.core.util.CarbonTestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -74,6 +78,18 @@ public class HiveCarbonTest extends HiveTestUtils {
}
@Test
+ public void verifyLocalDictionaryValues() throws Exception {
+ statement.execute("drop table if exists hive_carbon_table");
+ statement.execute("CREATE TABLE hive_carbon_table(shortField SMALLINT ,
intField INT, bigintField BIGINT , doubleField DOUBLE, stringField STRING,
timestampField TIMESTAMP, decimalField DECIMAL(18,2), dateField DATE, charField
CHAR(5), floatField FLOAT) stored by
'org.apache.carbondata.hive.CarbonStorageHandler' TBLPROPERTIES
('local_dictionary_enable'='true','local_dictionary_include'='stringField')");
+ statement.execute("insert into hive_carbon_table select * from
hive_table");
+ File rootPath = new File(HiveTestUtils.class.getResource("/").getPath() +
"../../../..");
+ String storePath = rootPath.getAbsolutePath() +
"/integration/hive/target/warehouse/warehouse/hive_carbon_table/";
+ ArrayList<DimensionRawColumnChunk> dimRawChunk =
CarbonTestUtil.getDimRawChunk(storePath, 0);
+ String[] dictionaryData = new String[]{"hive", "impala", "flink", "spark"};
+ assert(CarbonTestUtil.validateDictionary(dimRawChunk.get(0),
dictionaryData));
+ }
+
+ @Test
public void verifyInsertIntoSelectOperation() throws Exception {
statement.execute("drop table if exists hive_carbon_table1");
statement.execute("CREATE TABLE hive_carbon_table1(id INT, name STRING,
scale DECIMAL, country STRING, salary DOUBLE) stored by
'org.apache.carbondata.hive.CarbonStorageHandler'");
diff --git
a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/LoadTableWithLocalDictionaryTestCase.scala
b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/LoadTableWithLocalDictionaryTestCase.scala
index 4c00cc2..604cae0 100644
---
a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/LoadTableWithLocalDictionaryTestCase.scala
+++
b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/LoadTableWithLocalDictionaryTestCase.scala
@@ -25,18 +25,8 @@ import scala.collection.JavaConverters._
import org.apache.spark.sql.test.util.QueryTest
import org.scalatest.BeforeAndAfterAll
-import org.apache.carbondata.core.cache.dictionary.DictionaryByteArrayWrapper
import org.apache.carbondata.core.constants.CarbonCommonConstants
-import org.apache.carbondata.core.datastore.block.TableBlockInfo
-import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk
-import
org.apache.carbondata.core.datastore.chunk.reader.CarbonDataReaderFactory
-import
org.apache.carbondata.core.datastore.chunk.reader.dimension.v3.DimensionChunkReaderV3
-import org.apache.carbondata.core.datastore.compression.CompressorFactory
-import org.apache.carbondata.core.datastore.filesystem.{CarbonFile,
CarbonFileFilter}
-import org.apache.carbondata.core.datastore.impl.FileFactory
-import
org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory
-import org.apache.carbondata.core.metadata.ColumnarFormatVersion
-import org.apache.carbondata.core.util.{CarbonMetadataUtil, CarbonProperties,
DataFileFooterConverterV3}
+import org.apache.carbondata.core.util.{CarbonProperties, CarbonTestUtil}
class LoadTableWithLocalDictionaryTestCase extends QueryTest with
BeforeAndAfterAll {
@@ -59,7 +49,7 @@ class LoadTableWithLocalDictionaryTestCase extends QueryTest
with BeforeAndAfter
"CREATE TABLE local2(name string) STORED AS carbondata tblproperties" +
"('local_dictionary_threshold'='2001','local_dictionary_include'='name')")
sql("load data inpath '" + file1 + "' into table local2
OPTIONS('header'='false')")
- assert(!checkForLocalDictionary(getDimRawChunk(0)))
+
assert(!CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
}
test("test successful local dictionary generation") {
@@ -68,21 +58,21 @@ class LoadTableWithLocalDictionaryTestCase extends
QueryTest with BeforeAndAfter
"CREATE TABLE local2(name string) STORED AS carbondata tblproperties" +
"('local_dictionary_enable'='true','local_dictionary_threshold'='9001','local_dictionary_include'='name')")
sql("load data inpath '" + file1 + "' into table local2
OPTIONS('header'='false')")
- assert(checkForLocalDictionary(getDimRawChunk(0)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
}
test("test successful local dictionary generation for default configs") {
sql("drop table if exists local2")
sql("CREATE TABLE local2(name string) STORED AS carbondata
tblproperties('local_dictionary_enable'='true')")
sql("load data inpath '" + file1 + "' into table local2
OPTIONS('header'='false')")
- assert(checkForLocalDictionary(getDimRawChunk(0)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
}
test("test local dictionary generation for local dictioanry include") {
sql("drop table if exists local2")
sql("CREATE TABLE local2(name string) STORED AS carbondata ")
sql("load data inpath '" + file1 + "' into table local2
OPTIONS('header'='false')")
- assert(!checkForLocalDictionary(getDimRawChunk(0)))
+
assert(!CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
}
test("test local dictionary generation for local dictioanry exclude") {
@@ -91,7 +81,7 @@ class LoadTableWithLocalDictionaryTestCase extends QueryTest
with BeforeAndAfter
"CREATE TABLE local2(name string) STORED AS carbondata tblproperties" +
"('local_dictionary_enable'='true')")
sql("load data inpath '" + file1 + "' into table local2
OPTIONS('header'='false')")
- assert(checkForLocalDictionary(getDimRawChunk(0)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
}
test("test local dictionary generation when it is disabled") {
@@ -100,7 +90,7 @@ class LoadTableWithLocalDictionaryTestCase extends QueryTest
with BeforeAndAfter
"CREATE TABLE local2(name string) STORED AS carbondata tblproperties" +
"('local_dictionary_enable'='false','local_dictionary_include'='name')")
sql("load data inpath '" + file1 + "' into table local2
OPTIONS('header'='false')")
- assert(!checkForLocalDictionary(getDimRawChunk(0)))
+
assert(!CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
}
test("test local dictionary generation for invalid threshold
configurations") {
@@ -109,7 +99,7 @@ class LoadTableWithLocalDictionaryTestCase extends QueryTest
with BeforeAndAfter
"CREATE TABLE local2(name string) STORED AS carbondata tblproperties" +
"('local_dictionary_enable'='true','local_dictionary_include'='name','local_dictionary_threshold'='300000')")
sql("load data inpath '" + file1 + "' into table local2
OPTIONS('header'='false')")
- assert(checkForLocalDictionary(getDimRawChunk(0)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
}
test("test local dictionary generation for include and exclude") {
@@ -118,8 +108,8 @@ class LoadTableWithLocalDictionaryTestCase extends
QueryTest with BeforeAndAfter
"CREATE TABLE local2(name string, age string) STORED AS carbondata
tblproperties" +
"('local_dictionary_enable'='true','local_dictionary_include'='name',
'local_dictionary_exclude'='age')")
sql("insert into table local2 values('vishal', '30')")
- assert(checkForLocalDictionary(getDimRawChunk(0)))
- assert(!checkForLocalDictionary(getDimRawChunk(1)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
+
assert(!CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
1)))
}
test("test local dictionary generation for complex type") {
@@ -130,9 +120,9 @@ class LoadTableWithLocalDictionaryTestCase extends
QueryTest with BeforeAndAfter
sql("load data inpath '" + file2 +
"' into table local2
OPTIONS('header'='false','COMPLEX_DELIMITER_LEVEL_1'='$', " +
"'COMPLEX_DELIMITER_LEVEL_2'=':')")
- assert(!checkForLocalDictionary(getDimRawChunk(0)))
- assert(checkForLocalDictionary(getDimRawChunk(1)))
- assert(checkForLocalDictionary(getDimRawChunk(2)))
+
assert(!CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
1)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
2)))
}
test("test local dictionary data validation") {
@@ -153,10 +143,10 @@ class LoadTableWithLocalDictionaryTestCase extends
QueryTest with BeforeAndAfter
sql("drop table if exists local2")
sql("CREATE TABLE local2(name string) STORED AS carbondata
tblproperties('local_dictionary_enable'='true')")
sql("load data inpath '" + resourcesPath + "/localdictionary.csv" + "'
into table local2")
- val dimRawChunk = getDimRawChunk(0)
+ val dimRawChunk = CarbonTestUtil.getDimRawChunk(storePath, 0)
val dictionaryData = Array("vishal", "kumar", "akash", "praveen", "brijoo")
try
- assert(validateDictionary(dimRawChunk.get(0), dictionaryData))
+ assert(CarbonTestUtil.validateDictionary(dimRawChunk.get(0),
dictionaryData))
catch {
case e: Exception =>
assert(false)
@@ -218,105 +208,4 @@ class LoadTableWithLocalDictionaryTestCase extends
QueryTest with BeforeAndAfter
if (file.exists) file.delete
}
- private def checkForLocalDictionary(dimensionRawColumnChunks: util
- .List[DimensionRawColumnChunk]): Boolean = {
- var isLocalDictionaryGenerated = false
- import scala.collection.JavaConverters._
- for (dimensionRawColumnChunk <- dimensionRawColumnChunks.asScala) {
- if (dimensionRawColumnChunk.getDataChunkV3
- .isSetLocal_dictionary) {
- isLocalDictionaryGenerated = true
- }
- }
- isLocalDictionaryGenerated
- }
-
- /**
- * this method returns true if local dictionary is created for all the
blocklets or not
- *
- * @return
- */
- private def getDimRawChunk(blockindex: Int):
util.ArrayList[DimensionRawColumnChunk] = {
- val dataFiles = FileFactory.getCarbonFile(storePath)
- .listFiles(new CarbonFileFilter() {
- override def accept(file: CarbonFile): Boolean = {
- if (file.getName
- .endsWith(CarbonCommonConstants.FACT_FILE_EXT)) {
- true
- } else {
- false
- }
- }
- })
- val dimensionRawColumnChunks = read(dataFiles(0).getAbsolutePath,
- blockindex)
- dimensionRawColumnChunks
- }
-
- private def read(filePath: String, blockIndex: Int) = {
- val carbonDataFiles = new File(filePath)
- val dimensionRawColumnChunks = new
- util.ArrayList[DimensionRawColumnChunk]
- val offset = carbonDataFiles.length
- val converter = new DataFileFooterConverterV3
- val fileReader =
FileFactory.getFileHolder(FileFactory.getFileType(filePath))
- val actualOffset = fileReader.readLong(carbonDataFiles.getAbsolutePath,
offset - 8)
- val blockInfo = new TableBlockInfo(carbonDataFiles.getAbsolutePath,
- actualOffset,
- "0",
- new Array[String](0),
- carbonDataFiles.length,
- ColumnarFormatVersion.V3,
- null)
- val dataFileFooter = converter.readDataFileFooter(blockInfo)
- val blockletList = dataFileFooter.getBlockletList
- import scala.collection.JavaConverters._
- for (blockletInfo <- blockletList.asScala) {
- val dimensionColumnChunkReader =
- CarbonDataReaderFactory
- .getInstance
- .getDimensionColumnChunkReader(ColumnarFormatVersion.V3,
- blockletInfo,
- carbonDataFiles.getAbsolutePath,
- false).asInstanceOf[DimensionChunkReaderV3]
- dimensionRawColumnChunks
- .add(dimensionColumnChunkReader.readRawDimensionChunk(fileReader,
blockIndex))
- }
- dimensionRawColumnChunks
- }
-
- private def validateDictionary(rawColumnPage: DimensionRawColumnChunk,
- data: Array[String]): Boolean = {
- val local_dictionary = rawColumnPage.getDataChunkV3.local_dictionary
- if (null != local_dictionary) {
- val compressorName = CarbonMetadataUtil.getCompressorNameFromChunkMeta(
- rawColumnPage.getDataChunkV3.getData_chunk_list.get(0).getChunk_meta)
- val encodings = local_dictionary.getDictionary_meta.encoders
- val encoderMetas = local_dictionary.getDictionary_meta.getEncoder_meta
- val encodingFactory = DefaultEncodingFactory.getInstance
- val decoder = encodingFactory.createDecoder(encodings, encoderMetas,
compressorName)
- val dictionaryPage = decoder
- .decode(local_dictionary.getDictionary_data, 0,
local_dictionary.getDictionary_data.length)
- val dictionaryMap = new util.HashMap[DictionaryByteArrayWrapper, Integer]
- val usedDictionaryValues = util.BitSet
- .valueOf(CompressorFactory.getInstance.getCompressor(compressorName)
- .unCompressByte(local_dictionary.getDictionary_values))
- var index = 0
- var i = usedDictionaryValues.nextSetBit(0)
- while ( { i >= 0 }) {
- dictionaryMap
- .put(new DictionaryByteArrayWrapper(dictionaryPage.getBytes({ index
+= 1; index - 1 })),
- i)
- i = usedDictionaryValues.nextSetBit(i + 1)
- }
- for (i <- data.indices) {
- if (null == dictionaryMap.get(new
DictionaryByteArrayWrapper(data(i).getBytes))) {
- return false
- }
- }
- return true
- }
- false
- }
- // scalastyle:on lineLength println
}
diff --git
a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateDDLForComplexMapType.scala
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateDDLForComplexMapType.scala
index 6fd082e..ac9b031 100644
---
a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateDDLForComplexMapType.scala
+++
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateDDLForComplexMapType.scala
@@ -18,7 +18,6 @@
package org.apache.carbondata.spark.testsuite.createTable
import java.io.{BufferedWriter, File, FileWriter}
-import java.util
import scala.collection.JavaConverters._
@@ -28,8 +27,6 @@ import org.apache.spark.sql.{AnalysisException, Row}
import org.apache.spark.sql.test.util.QueryTest
import org.scalatest.BeforeAndAfterAll
-import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk
-
class TestCreateDDLForComplexMapType extends QueryTest with BeforeAndAfterAll {
private val conf: Configuration = new Configuration(false)
@@ -38,14 +35,6 @@ class TestCreateDDLForComplexMapType extends QueryTest with
BeforeAndAfterAll {
val path = s"$rootPath/integration/spark/src/test/resources/maptest2.csv"
- private def checkForLocalDictionary(dimensionRawColumnChunks: util
- .List[DimensionRawColumnChunk]): Boolean = {
- var isLocalDictionaryGenerated = false
- isLocalDictionaryGenerated = dimensionRawColumnChunks.asScala
- .filter(dimensionRawColumnChunk => dimensionRawColumnChunk.getDataChunkV3
- .isSetLocal_dictionary).size > 0
- isLocalDictionaryGenerated
- }
def createCSVFile(): Unit = {
val out = new BufferedWriter(new FileWriter(path));
diff --git
a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
index 253d577..c7e59dd 100644
---
a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
+++
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
@@ -40,20 +40,12 @@ import org.junit.Assert
import org.scalatest.BeforeAndAfterAll
import
org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
-import org.apache.carbondata.core.cache.dictionary.DictionaryByteArrayWrapper
import org.apache.carbondata.core.constants.CarbonCommonConstants
-import org.apache.carbondata.core.datastore.block.TableBlockInfo
-import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk
-import
org.apache.carbondata.core.datastore.chunk.reader.CarbonDataReaderFactory
-import
org.apache.carbondata.core.datastore.chunk.reader.dimension.v3.DimensionChunkReaderV3
-import org.apache.carbondata.core.datastore.compression.CompressorFactory
-import org.apache.carbondata.core.datastore.filesystem.{CarbonFile,
CarbonFileFilter}
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile
import org.apache.carbondata.core.datastore.impl.FileFactory
-import
org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory
-import org.apache.carbondata.core.metadata.ColumnarFormatVersion
import org.apache.carbondata.core.metadata.datatype.{DataTypes, Field}
import org.apache.carbondata.core.reader.CarbonFooterReaderV3
-import org.apache.carbondata.core.util.{CarbonMetadataUtil, CarbonProperties,
CarbonUtil, DataFileFooterConverterV3}
+import org.apache.carbondata.core.util.{CarbonProperties, CarbonTestUtil,
CarbonUtil}
import org.apache.carbondata.core.util.path.CarbonTablePath
import
org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException
import org.apache.carbondata.sdk.file._
@@ -2425,7 +2417,7 @@ class TestNonTransactionalCarbonTable extends QueryTest
with BeforeAndAfterAll {
.uniqueIdentifier(System.currentTimeMillis).taskNo(System.nanoTime).outputPath(writerPath).writtenBy("TestNonTransactionalCarbonTable")
generateCarbonData(builder)
assert(FileFactory.getCarbonFile(writerPath).exists())
-
assert(testUtil.checkForLocalDictionary(testUtil.getDimRawChunk(0,writerPath)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(writerPath,
0)))
sql("DROP TABLE IF EXISTS sdkTable")
sql(
s"""CREATE EXTERNAL TABLE sdkTable STORED AS carbondata LOCATION
@@ -2452,7 +2444,7 @@ class TestNonTransactionalCarbonTable extends QueryTest
with BeforeAndAfterAll {
.uniqueIdentifier(System.currentTimeMillis).taskNo(System.nanoTime).outputPath(writerPath).writtenBy("TestNonTransactionalCarbonTable")
generateCarbonData(builder)
assert(FileFactory.getCarbonFile(writerPath).exists())
-
assert(testUtil.checkForLocalDictionary(testUtil.getDimRawChunk(0,writerPath)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(writerPath,
0)))
sql("DROP TABLE IF EXISTS sdkTable")
sql(
s"""CREATE EXTERNAL TABLE sdkTable STORED AS carbondata LOCATION
@@ -2470,7 +2462,7 @@ class TestNonTransactionalCarbonTable extends QueryTest
with BeforeAndAfterAll {
.uniqueIdentifier(System.currentTimeMillis).taskNo(System.nanoTime).outputPath(writerPath).writtenBy("TestNonTransactionalCarbonTable")
generateCarbonData(builder)
assert(FileFactory.getCarbonFile(writerPath).exists())
-
assert(!testUtil.checkForLocalDictionary(testUtil.getDimRawChunk(0,writerPath)))
+
assert(!CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(writerPath,
0)))
sql("DROP TABLE IF EXISTS sdkTable")
sql(
s"""CREATE EXTERNAL TABLE sdkTable STORED AS carbondata LOCATION
@@ -2488,7 +2480,7 @@ class TestNonTransactionalCarbonTable extends QueryTest
with BeforeAndAfterAll {
.uniqueIdentifier(System.currentTimeMillis).taskNo(System.nanoTime).outputPath(writerPath).writtenBy("TestNonTransactionalCarbonTable")
generateCarbonData(builder)
assert(FileFactory.getCarbonFile(writerPath).exists())
-
assert(testUtil.checkForLocalDictionary(testUtil.getDimRawChunk(0,writerPath)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(writerPath,
0)))
sql("DROP TABLE IF EXISTS sdkTable")
sql(
s"""CREATE EXTERNAL TABLE sdkTable STORED AS carbondata LOCATION
@@ -2496,7 +2488,7 @@ class TestNonTransactionalCarbonTable extends QueryTest
with BeforeAndAfterAll {
FileUtils.deleteDirectory(new File(writerPath))
sql("insert into sdkTable select 's1','s2',23 ")
assert(FileFactory.getCarbonFile(writerPath).exists())
-
assert(testUtil.checkForLocalDictionary(testUtil.getDimRawChunk(0,writerPath)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(writerPath,
0)))
val df = sql("describe formatted sdkTable")
checkExistence(df, true, "Local Dictionary Enabled true")
checkAnswer(sql("select count(*) from sdkTable"), Seq(Row(1)))
@@ -2532,7 +2524,7 @@ class TestNonTransactionalCarbonTable extends QueryTest
with BeforeAndAfterAll {
.uniqueIdentifier(System.currentTimeMillis).taskNo(System.nanoTime).outputPath(writerPath).writtenBy("TestNonTransactionalCarbonTable")
generateCarbonData(builder)
assert(FileFactory.getCarbonFile(writerPath).exists())
-
assert(testUtil.checkForLocalDictionary(testUtil.getDimRawChunk(0,writerPath)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(writerPath,
0)))
sql("DROP TABLE IF EXISTS sdkTable")
sql(
s"""CREATE EXTERNAL TABLE sdkTable STORED AS carbondata LOCATION
@@ -2631,105 +2623,4 @@ object testUtil{
writer.close()
}
}
-
- /**
- * this method returns true if local dictionary is created for all the
blocklets or not
- *
- * @return
- */
- def getDimRawChunk(blockindex: Int,storePath :String):
util.ArrayList[DimensionRawColumnChunk] = {
- val dataFiles = FileFactory.getCarbonFile(storePath)
- .listFiles(new CarbonFileFilter() {
- override def accept(file: CarbonFile): Boolean = {
- if (file.getName
- .endsWith(CarbonCommonConstants.FACT_FILE_EXT)) {
- true
- } else {
- false
- }
- }
- })
- val dimensionRawColumnChunks = read(dataFiles(0).getAbsolutePath,
- blockindex)
- dimensionRawColumnChunks
- }
-
- def read(filePath: String, blockIndex: Int) = {
- val carbonDataFiles = new File(filePath)
- val dimensionRawColumnChunks = new
- util.ArrayList[DimensionRawColumnChunk]
- val offset = carbonDataFiles.length
- val converter = new DataFileFooterConverterV3
- val fileReader =
FileFactory.getFileHolder(FileFactory.getFileType(filePath))
- val actualOffset = fileReader.readLong(carbonDataFiles.getAbsolutePath,
offset - 8)
- val blockInfo = new TableBlockInfo(carbonDataFiles.getAbsolutePath,
- actualOffset,
- "0",
- new Array[String](0),
- carbonDataFiles.length,
- ColumnarFormatVersion.V3,
- null)
- val dataFileFooter = converter.readDataFileFooter(blockInfo)
- val blockletList = dataFileFooter.getBlockletList.asScala
- for (blockletInfo <- blockletList) {
- val dimensionColumnChunkReader =
- CarbonDataReaderFactory
- .getInstance
- .getDimensionColumnChunkReader(ColumnarFormatVersion.V3,
- blockletInfo,
- carbonDataFiles.getAbsolutePath,
- false).asInstanceOf[DimensionChunkReaderV3]
- dimensionRawColumnChunks
- .add(dimensionColumnChunkReader.readRawDimensionChunk(fileReader,
blockIndex))
- }
- dimensionRawColumnChunks
- }
-
- def validateDictionary(rawColumnPage: DimensionRawColumnChunk,
- data: Array[String]): Boolean = {
- val local_dictionary = rawColumnPage.getDataChunkV3.local_dictionary
- if (null != local_dictionary) {
- val compressorName = CarbonMetadataUtil.getCompressorNameFromChunkMeta(
- rawColumnPage.getDataChunkV3.getData_chunk_list.get(0).getChunk_meta)
- val encodings = local_dictionary.getDictionary_meta.encoders
- val encoderMetas = local_dictionary.getDictionary_meta.getEncoder_meta
- val encodingFactory = DefaultEncodingFactory.getInstance
- val decoder = encodingFactory.createDecoder(encodings, encoderMetas,
compressorName)
- val dictionaryPage = decoder
- .decode(local_dictionary.getDictionary_data, 0,
local_dictionary.getDictionary_data.length)
- val dictionaryMap = new util.HashMap[DictionaryByteArrayWrapper, Integer]
- val usedDictionaryValues = util.BitSet
- .valueOf(CompressorFactory.getInstance.getCompressor(compressorName)
- .unCompressByte(local_dictionary.getDictionary_values))
- var index = 0
- var i = usedDictionaryValues.nextSetBit(0)
- while ( { i >= 0 }) {
- dictionaryMap
- .put(new DictionaryByteArrayWrapper(dictionaryPage.getBytes({ index
+= 1; index - 1 })),
- i)
- i = usedDictionaryValues.nextSetBit(i + 1)
- }
- for (i <- data.indices) {
- if (null == dictionaryMap.get(new
DictionaryByteArrayWrapper(data(i).getBytes))) {
- return false
- }
- }
- return true
- }
- false
- }
-
- def checkForLocalDictionary(dimensionRawColumnChunks: util
- .List[DimensionRawColumnChunk]): Boolean = {
- var isLocalDictionaryGenerated = false
- import scala.collection.JavaConversions._
- for (dimensionRawColumnChunk <- dimensionRawColumnChunks) {
- if (dimensionRawColumnChunk.getDataChunkV3
- .isSetLocal_dictionary) {
- isLocalDictionaryGenerated = true
- }
- }
- isLocalDictionaryGenerated
- }
-
}
\ No newline at end of file
diff --git
a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithComplexType.scala
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithComplexType.scala
index 6bec8ea..8ee50a2 100644
---
a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithComplexType.scala
+++
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithComplexType.scala
@@ -29,7 +29,7 @@ import org.scalatest.BeforeAndAfterAll
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.datastore.impl.FileFactory
-import org.apache.carbondata.core.util.CarbonProperties
+import org.apache.carbondata.core.util.{CarbonProperties, CarbonTestUtil}
import org.apache.carbondata.sdk.file.CarbonWriter
class TestNonTransactionalCarbonTableWithComplexType extends QueryTest with
BeforeAndAfterAll {
@@ -231,7 +231,7 @@ class TestNonTransactionalCarbonTableWithComplexType
extends QueryTest with Befo
s"""CREATE EXTERNAL TABLE localComplex STORED AS carbondata LOCATION
|'$writerPath' """.stripMargin)
assert(FileFactory.getCarbonFile(writerPath).exists())
- assert(testUtil.checkForLocalDictionary(testUtil.getDimRawChunk(0,
writerPath)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(writerPath,
0)))
sql("describe formatted localComplex").collect
val descLoc = sql("describe formatted localComplex").collect
descLoc.find(_.get(0).toString.contains("Local Dictionary Enabled")) match
{
diff --git
a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/localdictionary/LocalDictionarySupportLoadTableTest.scala
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/localdictionary/LocalDictionarySupportLoadTableTest.scala
index b6e801c..9825728 100644
---
a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/localdictionary/LocalDictionarySupportLoadTableTest.scala
+++
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/localdictionary/LocalDictionarySupportLoadTableTest.scala
@@ -27,18 +27,8 @@ import org.apache.spark.sql.Row
import org.apache.spark.sql.test.util.QueryTest
import org.scalatest.BeforeAndAfterAll
-import org.apache.carbondata.core.cache.dictionary.DictionaryByteArrayWrapper
import org.apache.carbondata.core.constants.CarbonCommonConstants
-import org.apache.carbondata.core.datastore.block.TableBlockInfo
-import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk
-import
org.apache.carbondata.core.datastore.chunk.reader.CarbonDataReaderFactory
-import
org.apache.carbondata.core.datastore.chunk.reader.dimension.v3.DimensionChunkReaderV3
-import org.apache.carbondata.core.datastore.compression.CompressorFactory
-import org.apache.carbondata.core.datastore.filesystem.{CarbonFile,
CarbonFileFilter}
-import org.apache.carbondata.core.datastore.impl.FileFactory
-import
org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory
-import org.apache.carbondata.core.metadata.ColumnarFormatVersion
-import org.apache.carbondata.core.util.{CarbonMetadataUtil, CarbonProperties,
DataFileFooterConverterV3}
+import org.apache.carbondata.core.util.{CarbonProperties, CarbonTestUtil}
class LocalDictionarySupportLoadTableTest extends QueryTest with
BeforeAndAfterAll {
@@ -61,7 +51,7 @@ class LocalDictionarySupportLoadTableTest extends QueryTest
with BeforeAndAfterA
"CREATE TABLE local2(name string) STORED AS carbondata tblproperties" +
"('local_dictionary_threshold'='2001','local_dictionary_include'='name')")
sql("load data inpath '" + file1 + "' into table local2
OPTIONS('header'='false')")
- assert(!checkForLocalDictionary(getDimRawChunk(0)))
+
assert(!CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
}
test("test successful local dictionary generation") {
@@ -70,7 +60,7 @@ class LocalDictionarySupportLoadTableTest extends QueryTest
with BeforeAndAfterA
"tblproperties('local_dictionary_enable'='true','local_dictionary_threshold'='9001',"
+
"'local_dictionary_include'='name')")
sql("load data inpath '" + file1 + "' into table local2
OPTIONS('header'='false')")
- assert(checkForLocalDictionary(getDimRawChunk(0)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
}
test("test successful local dictionary generation for default configs") {
@@ -78,7 +68,7 @@ class LocalDictionarySupportLoadTableTest extends QueryTest
with BeforeAndAfterA
sql("CREATE TABLE local2(name string) STORED AS carbondata " +
"tblproperties('local_dictionary_enable'='true')")
sql("load data inpath '" + file1 + "' into table local2
OPTIONS('header'='false')")
- assert(checkForLocalDictionary(getDimRawChunk(0)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
}
test("test local dictionary generation for local dictionary include") {
@@ -86,7 +76,7 @@ class LocalDictionarySupportLoadTableTest extends QueryTest
with BeforeAndAfterA
sql("CREATE TABLE local2(name string) STORED AS carbondata " +
"TBLPROPERTIES ('local_dictionary_enable'='true',
'local_dictionary_include'='name')")
sql("load data inpath '" + file1 + "' into table local2
OPTIONS('header'='false')")
- assert(checkForLocalDictionary(getDimRawChunk(0)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
}
test("test local dictionary generation for local dictioanry exclude") {
@@ -95,7 +85,7 @@ class LocalDictionarySupportLoadTableTest extends QueryTest
with BeforeAndAfterA
"CREATE TABLE local2(name string) STORED AS carbondata tblproperties" +
"('local_dictionary_enable'='true')")
sql("load data inpath '" + file1 + "' into table local2
OPTIONS('header'='false')")
- assert(checkForLocalDictionary(getDimRawChunk(0)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
}
test("test local dictionary generation when it is disabled") {
@@ -104,7 +94,7 @@ class LocalDictionarySupportLoadTableTest extends QueryTest
with BeforeAndAfterA
"CREATE TABLE local2(name string) STORED AS carbondata tblproperties" +
"('local_dictionary_enable'='false','local_dictionary_include'='name')")
sql("load data inpath '" + file1 + "' into table local2
OPTIONS('header'='false')")
- assert(!checkForLocalDictionary(getDimRawChunk(0)))
+
assert(!CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
}
test("test local dictionary generation for invalid threshold
configurations") {
@@ -114,7 +104,7 @@ class LocalDictionarySupportLoadTableTest extends QueryTest
with BeforeAndAfterA
"tblproperties('local_dictionary_enable'='true','local_dictionary_include'='name',"
+
"'local_dictionary_threshold'='300000')")
sql("load data inpath '" + file1 + "' into table local2
OPTIONS('header'='false')")
- assert(checkForLocalDictionary(getDimRawChunk(0)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
}
test("test local dictionary generation for include and exclude") {
@@ -123,8 +113,8 @@ class LocalDictionarySupportLoadTableTest extends QueryTest
with BeforeAndAfterA
"tblproperties('local_dictionary_enable'='true','local_dictionary_include'='name',
" +
"'local_dictionary_exclude'='age')")
sql("insert into table local2 values('vishal', '30')")
- assert(checkForLocalDictionary(getDimRawChunk(0)))
- assert(!checkForLocalDictionary(getDimRawChunk(1)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
+
assert(!CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
1)))
}
test("test local dictionary generation for complex type") {
@@ -134,9 +124,9 @@ class LocalDictionarySupportLoadTableTest extends QueryTest
with BeforeAndAfterA
sql("load data inpath '" + file2 +
"' into table local2
OPTIONS('header'='false','COMPLEX_DELIMITER_LEVEL_1'='$', " +
"'COMPLEX_DELIMITER_LEVEL_2'=':')")
- assert(!checkForLocalDictionary(getDimRawChunk(0)))
- assert(checkForLocalDictionary(getDimRawChunk(1)))
- assert(checkForLocalDictionary(getDimRawChunk(2)))
+
assert(!CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
1)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
2)))
}
test("test local dictionary generation for map type") {
@@ -148,10 +138,10 @@ class LocalDictionarySupportLoadTableTest extends
QueryTest with BeforeAndAfterA
"map('Manish','Gupta','Manish','Nalla','Shardul','Singh','Vishal','Kumar'))")
checkAnswer(sql("select * from local2"), Seq(
Row(Map("Manish" -> "Nalla", "Shardul" -> "Singh", "Vishal" ->
"Kumar"))))
- assert(!checkForLocalDictionary(getDimRawChunk(0)))
- assert(!checkForLocalDictionary(getDimRawChunk(1)))
- assert(checkForLocalDictionary(getDimRawChunk(2)))
- assert(checkForLocalDictionary(getDimRawChunk(3)))
+
assert(!CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
0)))
+
assert(!CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
1)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
2)))
+
assert(CarbonTestUtil.checkForLocalDictionary(CarbonTestUtil.getDimRawChunk(storePath,
3)))
}
test("test local dictionary data validation") {
@@ -174,10 +164,10 @@ class LocalDictionarySupportLoadTableTest extends
QueryTest with BeforeAndAfterA
sql("CREATE TABLE local2(name string) STORED AS carbondata " +
"tblproperties('local_dictionary_enable'='true')")
sql("load data inpath '" + resourcesPath + "/localdictionary.csv" + "'
into table local2")
- val dimRawChunk = getDimRawChunk(0)
+ val dimRawChunk = CarbonTestUtil.getDimRawChunk(storePath, 0)
val dictionaryData = Array("vishal", "kumar", "akash", "praveen", "brijoo")
try
- assert(validateDictionary(dimRawChunk.get(0), dictionaryData))
+ assert(CarbonTestUtil.validateDictionary(dimRawChunk.get(0),
dictionaryData))
catch {
case e: Exception =>
assert(false)
@@ -242,107 +232,4 @@ class LocalDictionarySupportLoadTableTest extends
QueryTest with BeforeAndAfterA
val file: File = new File(fileName)
if (file.exists) file.delete
}
-
- private def checkForLocalDictionary(dimensionRawColumnChunks: util
- .List[DimensionRawColumnChunk]): Boolean = {
- var isLocalDictionaryGenerated = false
- import scala.collection.JavaConverters._
- for (dimensionRawColumnChunk <- dimensionRawColumnChunks.asScala) {
- if (dimensionRawColumnChunk.getDataChunkV3
- .isSetLocal_dictionary) {
- isLocalDictionaryGenerated = true
- }
- }
- isLocalDictionaryGenerated
- }
-
- /**
- * this method returns true if local dictionary is created for all the
blocklets or not
- *
- * @return
- */
- private def getDimRawChunk(blockindex: Int):
util.ArrayList[DimensionRawColumnChunk] = {
- val dataFiles = FileFactory.getCarbonFile(storePath)
- .listFiles(new CarbonFileFilter() {
- override def accept(file: CarbonFile): Boolean = {
- if (file.getName
- .endsWith(CarbonCommonConstants.FACT_FILE_EXT)) {
- true
- } else {
- false
- }
- }
- })
- val dimensionRawColumnChunks = read(dataFiles(0).getAbsolutePath,
- blockindex)
- dimensionRawColumnChunks
- }
-
- private def read(filePath: String, blockIndex: Int) = {
- val carbonDataFiles = new File(filePath)
- val dimensionRawColumnChunks = new
- util.ArrayList[DimensionRawColumnChunk]
- val offset = carbonDataFiles.length
- val converter = new DataFileFooterConverterV3
- val fileReader =
FileFactory.getFileHolder(FileFactory.getFileType(filePath))
- val actualOffset = fileReader.readLong(carbonDataFiles.getAbsolutePath,
offset - 8)
- val blockInfo = new TableBlockInfo(carbonDataFiles.getAbsolutePath,
- actualOffset,
- "0",
- new Array[String](0),
- carbonDataFiles.length,
- ColumnarFormatVersion.V3,
- null)
- val dataFileFooter = converter.readDataFileFooter(blockInfo)
- val blockletList = dataFileFooter.getBlockletList
- import scala.collection.JavaConverters._
- for (blockletInfo <- blockletList.asScala) {
- val dimensionColumnChunkReader =
- CarbonDataReaderFactory
- .getInstance
- .getDimensionColumnChunkReader(ColumnarFormatVersion.V3,
- blockletInfo,
- carbonDataFiles.getAbsolutePath,
- false).asInstanceOf[DimensionChunkReaderV3]
- dimensionRawColumnChunks
- .add(dimensionColumnChunkReader.readRawDimensionChunk(fileReader,
blockIndex))
- }
- dimensionRawColumnChunks
- }
-
- private def validateDictionary(rawColumnPage: DimensionRawColumnChunk,
- data: Array[String]): Boolean = {
- val local_dictionary = rawColumnPage.getDataChunkV3.local_dictionary
- if (null != local_dictionary) {
- val compressorName = CarbonMetadataUtil.getCompressorNameFromChunkMeta(
- rawColumnPage.getDataChunkV3.getData_chunk_list.get(0).getChunk_meta)
- val encodings = local_dictionary.getDictionary_meta.encoders
- val encoderMetas = local_dictionary.getDictionary_meta.getEncoder_meta
- val encodingFactory = DefaultEncodingFactory.getInstance
- val decoder = encodingFactory.createDecoder(encodings, encoderMetas,
compressorName)
- val dictionaryPage = decoder
- .decode(local_dictionary.getDictionary_data, 0,
local_dictionary.getDictionary_data.length)
- val dictionaryMap = new
- util.HashMap[DictionaryByteArrayWrapper, Integer]
- val usedDictionaryValues = util.BitSet
- .valueOf(CompressorFactory.getInstance.getCompressor(compressorName)
- .unCompressByte(local_dictionary.getDictionary_values))
- var index = 0
- var i = usedDictionaryValues.nextSetBit(0)
- while ( { i >= 0 }) {
- dictionaryMap
- .put(new DictionaryByteArrayWrapper(dictionaryPage.getBytes({ index
+= 1; index - 1 })),
- i)
- i = usedDictionaryValues.nextSetBit(i + 1)
- }
- for (i <- data.indices) {
- if (null == dictionaryMap.get(new
DictionaryByteArrayWrapper(data(i).getBytes))) {
- return false
- }
- }
- return true
- }
- false
- }
-
}