[CARBONDATA-1273] String datatype will be no dictionary column by default This closes #1144
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/191dfb35 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/191dfb35 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/191dfb35 Branch: refs/heads/datamap Commit: 191dfb3513f54ae22a0766e7c00e66e43eb42cb4 Parents: 0481340 Author: QiangCai <[email protected]> Authored: Thu Jul 6 10:41:55 2017 +0800 Committer: jackylk <[email protected]> Committed: Tue Jul 11 16:28:17 2017 +0800 ---------------------------------------------------------------------- .../core/constants/CarbonCommonConstants.java | 23 --- .../carbondata/core/util/CarbonProperties.java | 39 ---- .../InsertIntoCarbonTableTestCase.scala | 2 +- .../dataload/TestBatchSortDataLoad.scala | 5 +- .../dataload/TestLoadDataUseAllDictionary.scala | 1 + .../partition/TestDDLForPartitionTable.scala | 5 +- .../testsuite/sortcolumns/TestSortColumns.scala | 4 +- .../sortcolumns/TestSortColumnsWithUnsafe.scala | 6 +- .../spark/rdd/CarbonGlobalDictionaryRDD.scala | 143 ++++++-------- .../spark/util/GlobalDictionaryUtil.scala | 24 +-- .../spark/sql/catalyst/CarbonDDLSqlParser.scala | 16 +- .../spark/util/AllDictionaryTestCase.scala | 7 +- .../AutoHighCardinalityIdentifyTestCase.scala | 148 -------------- .../util/ExternalColumnDictionaryTestCase.scala | 4 +- ...GlobalDictionaryUtilConcurrentTestCase.scala | 3 +- .../apache/spark/sql/TestCarbonSqlParser.scala | 191 ++++++------------- .../spark/util/AllDictionaryTestCase.scala | 5 +- .../util/ExternalColumnDictionaryTestCase.scala | 4 +- .../sortdata/SortTempFileChunkHolder.java | 2 +- .../store/writer/AbstractFactDataWriter.java | 4 +- 20 files changed, 158 insertions(+), 478 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java index 208bab8..6393131 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java @@ -870,29 +870,6 @@ public final class CarbonCommonConstants { public static final String NO_INVERTED_INDEX = "no_inverted_index"; /** - * this variable is to enable/disable identify high cardinality during first data loading - */ - @CarbonProperty - public static final String HIGH_CARDINALITY_IDENTIFY_ENABLE = "high.cardinality.identify.enable"; - public static final String HIGH_CARDINALITY_IDENTIFY_ENABLE_DEFAULT = "true"; - - /** - * threshold of high cardinality - */ - @CarbonProperty - public static final String HIGH_CARDINALITY_THRESHOLD = "high.cardinality.threshold"; - public static final String HIGH_CARDINALITY_THRESHOLD_DEFAULT = "1000000"; - public static final int HIGH_CARDINALITY_THRESHOLD_MIN = 10000; - - /** - * percentage of cardinality in row count - */ - @CarbonProperty - public static final String HIGH_CARDINALITY_IN_ROW_COUNT_PERCENTAGE = - "high.cardinality.row.count.percentage"; - public static final String HIGH_CARDINALITY_IN_ROW_COUNT_PERCENTAGE_DEFAULT = "80"; - - /** * 16 mb size */ public static final long CARBON_16MB = 16 * 1024 * 1024; http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java index c1e70ff..d14b7ab 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java @@ -90,8 +90,6 @@ public final class CarbonProperties { validateNumCores(); validateNumCoresBlockSort(); validateSortSize(); - validateHighCardinalityIdentify(); - validateHighCardinalityThreshold(); validateCarbonDataFileVersion(); validateExecutorStartUpTime(); validatePrefetchBufferSize(); @@ -329,43 +327,6 @@ public final class CarbonProperties { } } - private void validateHighCardinalityIdentify() { - String highcardIdentifyStr = carbonProperties - .getProperty(CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE, - CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE_DEFAULT); - try { - Boolean.parseBoolean(highcardIdentifyStr); - } catch (NumberFormatException e) { - LOGGER.info("The high cardinality identify value \"" + highcardIdentifyStr - + "\" is invalid. Using the default value \"" - + CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE_DEFAULT); - carbonProperties.setProperty(CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE, - CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE_DEFAULT); - } - } - - private void validateHighCardinalityThreshold() { - String highcardThresholdStr = carbonProperties - .getProperty(CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD, - CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD_DEFAULT); - try { - int highcardThreshold = Integer.parseInt(highcardThresholdStr); - if (highcardThreshold < CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD_MIN) { - LOGGER.info("The high cardinality threshold value \"" + highcardThresholdStr - + "\" is invalid. Using the min value \"" - + CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD_MIN); - carbonProperties.setProperty(CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD, - CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD_MIN + ""); - } - } catch (NumberFormatException e) { - LOGGER.info("The high cardinality threshold value \"" + highcardThresholdStr - + "\" is invalid. Using the default value \"" - + CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD_DEFAULT); - carbonProperties.setProperty(CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD, - CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD_DEFAULT); - } - } - /** * Below method will be used to validate the data file version parameter * if parameter is invalid current version will be set http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/InsertIntoCarbonTableTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/InsertIntoCarbonTableTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/InsertIntoCarbonTableTestCase.scala index c968672..7bd9094 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/InsertIntoCarbonTableTestCase.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/InsertIntoCarbonTableTestCase.scala @@ -39,7 +39,7 @@ class InsertIntoCarbonTableTestCase extends QueryTest with BeforeAndAfterAll { checkAnswer( sql("select imei,deviceInformationId,MAC,deviceColor,device_backColor,modelId,marketName,AMSize,ROMSize,CUPAudit,CPIClocked,series,productionDate,bomCode,internalModels,deliveryTime,channelsId,channelsName,deliveryAreaId,deliveryCountry,deliveryProvince,deliveryCity,deliveryDistrict,deliveryStreet,oxSingleNumber,contractNumber,ActiveCheckTime,ActiveAreaId,ActiveCountry,ActiveProvince,Activecity,ActiveDistrict,ActiveStreet,ActiveOperatorId,Active_releaseId,Active_EMUIVersion,Active_operaSysVersion,Active_BacVerNumber,Active_BacFlashVer,Active_webUIVersion,Active_webUITypeCarrVer,Active_webTypeDataVerNumber,Active_operatorsVersion,Active_phonePADPartitionedVersions,Latest_YEAR,Latest_MONTH,Latest_DAY,Latest_HOUR,Latest_areaId,Latest_country,Latest_province,Latest_city,Latest_district,Latest_street,Latest_releaseId,Latest_EMUIVersion,Latest_operaSysVersion,Latest_BacVerNumber,Latest_BacFlashVer,Latest_webUIVersion,Latest_webUITypeCarrVer,Latest_webTypeDataVerNumber,Latest_oper atorsVersion,Latest_phonePADPartitionedVersions,Latest_operatorId,gamePointId,gamePointDescription from THive order by imei,deviceInformationId,MAC,deviceColor,device_backColor,modelId,marketName,AMSize,ROMSize,CUPAudit,CPIClocked,series,productionDate,bomCode,internalModels,deliveryTime,channelsId,channelsName,deliveryAreaId,deliveryCountry,deliveryProvince,deliveryCity,deliveryDistrict,deliveryStreet,oxSingleNumber,contractNumber,ActiveCheckTime,ActiveAreaId,ActiveCountry,ActiveProvince,Activecity,ActiveDistrict,ActiveStreet,ActiveOperatorId,Active_releaseId,Active_EMUIVersion,Active_operaSysVersion,Active_BacVerNumber,Active_BacFlashVer,Active_webUIVersion,Active_webUITypeCarrVer,Active_webTypeDataVerNumber,Active_operatorsVersion,Active_phonePADPartitionedVersions,Latest_YEAR,Latest_MONTH,Latest_DAY,Latest_HOUR,Latest_areaId,Latest_country,Latest_province,Latest_city,Latest_district,Latest_street,Latest_releaseId,Latest_EMUIVersion,Latest_operaSysVersion,Latest_BacVerNumber,Late st_BacFlashVer,Latest_webUIVersion,Latest_webUITypeCarrVer,Latest_webTypeDataVerNumber,Latest_operatorsVersion,Latest_phonePADPartitionedVersions,Latest_operatorId,gamePointId,gamePointDescription"), sql("select imei,deviceInformationId,MAC,deviceColor,device_backColor,modelId,marketName,AMSize,ROMSize,CUPAudit,CPIClocked,series,productionDate,bomCode,internalModels,deliveryTime,channelsId,channelsName,deliveryAreaId,deliveryCountry,deliveryProvince,deliveryCity,deliveryDistrict,deliveryStreet,oxSingleNumber,contractNumber,ActiveCheckTime,ActiveAreaId,ActiveCountry,ActiveProvince,Activecity,ActiveDistrict,ActiveStreet,ActiveOperatorId,Active_releaseId,Active_EMUIVersion,Active_operaSysVersion,Active_BacVerNumber,Active_BacFlashVer,Active_webUIVersion,Active_webUITypeCarrVer,Active_webTypeDataVerNumber,Active_operatorsVersion,Active_phonePADPartitionedVersions,Latest_YEAR,Latest_MONTH,Latest_DAY,Latest_HOUR,Latest_areaId,Latest_country,Latest_province,Latest_city,Latest_district,Latest_street,Latest_releaseId,Latest_EMUIVersion,Latest_operaSysVersion,Latest_BacVerNumber,Latest_BacFlashVer,Latest_webUIVersion,Latest_webUITypeCarrVer,Latest_webTypeDataVerNumber,Latest_oper atorsVersion,Latest_phonePADPartitionedVersions,Latest_operatorId,gamePointId,gamePointDescription from TCarbon order by imei,deviceInformationId,MAC,deviceColor,device_backColor,modelId,marketName,AMSize,ROMSize,CUPAudit,CPIClocked,series,productionDate,bomCode,internalModels,deliveryTime,channelsId,channelsName,deliveryAreaId,deliveryCountry,deliveryProvince,deliveryCity,deliveryDistrict,deliveryStreet,oxSingleNumber,contractNumber,ActiveCheckTime,ActiveAreaId,ActiveCountry,ActiveProvince,Activecity,ActiveDistrict,ActiveStreet,ActiveOperatorId,Active_releaseId,Active_EMUIVersion,Active_operaSysVersion,Active_BacVerNumber,Active_BacFlashVer,Active_webUIVersion,Active_webUITypeCarrVer,Active_webTypeDataVerNumber,Active_operatorsVersion,Active_phonePADPartitionedVersions,Latest_YEAR,Latest_MONTH,Latest_DAY,Latest_HOUR,Latest_areaId,Latest_country,Latest_province,Latest_city,Latest_district,Latest_street,Latest_releaseId,Latest_EMUIVersion,Latest_operaSysVersion,Latest_BacVerNumber,La test_BacFlashVer,Latest_webUIVersion,Latest_webUITypeCarrVer,Latest_webTypeDataVerNumber,Latest_operatorsVersion,Latest_phonePADPartitionedVersions,Latest_operatorId,gamePointId,gamePointDescription") - ) + ) CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, timeStampPropOrig) } test("insert from hive-sum expression") { http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestBatchSortDataLoad.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestBatchSortDataLoad.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestBatchSortDataLoad.scala index d53b5e5..c5a6a27 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestBatchSortDataLoad.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestBatchSortDataLoad.scala @@ -29,7 +29,6 @@ import org.apache.carbondata.core.util.CarbonProperties class TestBatchSortDataLoad extends QueryTest with BeforeAndAfterAll { var filePath: String = _ - def buildTestData() = { filePath = s"${integrationPath}/spark-common-test/target/big.csv" val file = new File(filePath) @@ -79,6 +78,7 @@ class TestBatchSortDataLoad extends QueryTest with BeforeAndAfterAll { | CREATE TABLE carbon_load1(c1 string, c2 string, c3 string, c4 string, c5 string, | c6 string, c7 int, c8 int, c9 int, c10 int) | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('dictionary_include'='c1,c2,c3,c4,c5,c6') """.stripMargin) sql(s"LOAD DATA LOCAL INPATH '$filePath' into table carbon_load1 " + @@ -160,6 +160,7 @@ class TestBatchSortDataLoad extends QueryTest with BeforeAndAfterAll { | CREATE TABLE carbon_load3(c1 string, c2 string, c3 string, c4 string, c5 string, | c6 string, c7 int, c8 int, c9 int, c10 int) | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('dictionary_include'='c1,c2,c3,c4,c5,c6') """.stripMargin) sql(s"LOAD DATA LOCAL INPATH '$filePath' into table carbon_load3 " + @@ -182,6 +183,7 @@ class TestBatchSortDataLoad extends QueryTest with BeforeAndAfterAll { | CREATE TABLE carbon_load4(c1 string, c2 string, c3 string, c4 string, c5 string, | c6 string, c7 int, c8 int, c9 int, c10 int) | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('dictionary_include'='c1,c2,c3,c4,c5,c6') """.stripMargin) sql(s"LOAD DATA LOCAL INPATH '$filePath' into table carbon_load4 " ) @@ -202,6 +204,7 @@ class TestBatchSortDataLoad extends QueryTest with BeforeAndAfterAll { | CREATE TABLE carbon_load6(c1 string, c2 string, c3 string, c4 string, c5 string, | c6 string, c7 int, c8 int, c9 int, c10 int) | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('dictionary_include'='c1,c2,c3,c4,c5,c6') """.stripMargin) sql(s"LOAD DATA LOCAL INPATH '$filePath' into table carbon_load6 " ) http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataUseAllDictionary.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataUseAllDictionary.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataUseAllDictionary.scala index d6deb89..170c41b 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataUseAllDictionary.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataUseAllDictionary.scala @@ -30,6 +30,7 @@ class TestLoadDataUseAllDictionary extends QueryTest with BeforeAndAfterAll{ (ID Int, date Timestamp, country String, name String, phonetype String, serialname String, salary Int) STORED BY 'carbondata' + TBLPROPERTIES('dictionary_include'='country,name,phonetype,serialname') """) } http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala index 9ad5959..5b77a24 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala @@ -106,9 +106,8 @@ class TestDDLForPartitionTable extends QueryTest with BeforeAndAfterAll { assert(partitionInfo != null) assert(partitionInfo.getColumnSchemaList.get(0).getColumnName.equalsIgnoreCase("workgroupcategory")) assert(partitionInfo.getColumnSchemaList.get(0).getDataType == DataType.STRING) - assert(partitionInfo.getColumnSchemaList.get(0).getEncodingList.size == 2) - assert(partitionInfo.getColumnSchemaList.get(0).getEncodingList.get(0) == Encoding.DICTIONARY) - assert(partitionInfo.getColumnSchemaList.get(0).getEncodingList.get(1) == Encoding.INVERTED_INDEX) + assert(partitionInfo.getColumnSchemaList.get(0).getEncodingList.size == 1) + assert(partitionInfo.getColumnSchemaList.get(0).getEncodingList.get(0) == Encoding.INVERTED_INDEX) assert(partitionInfo.getPartitionType == PartitionType.LIST) assert(partitionInfo.getListInfo.size == 3) assert(partitionInfo.getListInfo.get(0).size == 1) http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala index 01c8021..e25ebb8 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala @@ -72,7 +72,7 @@ class TestSortColumns extends QueryTest with BeforeAndAfterAll { } test("create table with dictionary sort_columns") { - sql("CREATE TABLE sorttable2 (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='empname')") + sql("CREATE TABLE sorttable2 (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='empname', 'dictionary_include'='empname')") sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE sorttable2 OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '\"')""") checkAnswer(sql("select empname from sorttable2"),sql("select empname from origintable1")) } @@ -178,7 +178,7 @@ class TestSortColumns extends QueryTest with BeforeAndAfterAll { } test("filter on sort_columns include no-dictionary, direct-dictionary and dictioanry") { - sql("CREATE TABLE sorttable6 (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='workgroupcategory, doj, empname')") + sql("CREATE TABLE sorttable6 (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='workgroupcategory, doj, empname', 'dictionary_include'='empname')") sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE sorttable6 OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '\"')""") // no dictionary checkAnswer(sql("select * from sorttable6 where workgroupcategory = 1"), sql("select * from origintable1 where workgroupcategory = 1 order by doj")) http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumnsWithUnsafe.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumnsWithUnsafe.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumnsWithUnsafe.scala index 7ffa13f..f45f931 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumnsWithUnsafe.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumnsWithUnsafe.scala @@ -35,11 +35,11 @@ class TestSortColumnsWithUnsafe extends QueryTest with BeforeAndAfterAll { test("create table with no dictionary sort_columns") { sql("CREATE TABLE sorttable1 (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='empno')") sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE sorttable1 OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '\"')""") - checkAnswer(sql("select empno from sorttable1"), sql("select empno from sorttable1 order by empno")) + checkAnswer(sql("select empno from sorttable1"), sql("select empno from origintable1 order by empno")) } test("create table with dictionary sort_columns") { - sql("CREATE TABLE sorttable2 (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='empname')") + sql("CREATE TABLE sorttable2 (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='empname', 'dictionary_include'='empname')") sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE sorttable2 OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '\"')""") checkAnswer(sql("select empname from sorttable2"),sql("select empname from origintable1")) } @@ -145,7 +145,7 @@ class TestSortColumnsWithUnsafe extends QueryTest with BeforeAndAfterAll { } test("filter on sort_columns include no-dictionary, direct-dictionary and dictioanry") { - sql("CREATE TABLE sorttable6 (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='workgroupcategory, doj, empname')") + sql("CREATE TABLE sorttable6 (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='workgroupcategory, doj, empname', 'dictionary_include'='empname')") sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE sorttable6 OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '\"')""") // no dictionary checkAnswer(sql("select * from sorttable6 where workgroupcategory = 1"), sql("select * from origintable1 where workgroupcategory = 1 order by doj")) http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala index d0f9362..a7b8143 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala @@ -153,8 +153,6 @@ case class DictionaryLoadModel(table: CarbonTableIdentifier, isComplexes: Array[Boolean], primDimensions: Array[CarbonDimension], delimiters: Array[String], - highCardIdentifyEnable: Boolean, - highCardThreshold: Int, columnIdentifier: Array[ColumnIdentifier], isFirstLoad: Boolean, hdfsTempLocation: String, @@ -329,18 +327,17 @@ class CarbonBlockDistinctValuesCombineRDD( class CarbonGlobalDictionaryGenerateRDD( prev: RDD[(Int, ColumnDistinctValues)], model: DictionaryLoadModel) - extends CarbonRDD[(Int, String, Boolean)](prev) { + extends CarbonRDD[(Int, String)](prev) { override def getPartitions: Array[Partition] = firstParent[(Int, ColumnDistinctValues)].partitions override def internalCompute(split: Partition, - context: TaskContext): Iterator[(Int, String, Boolean)] = { + context: TaskContext): Iterator[(Int, String)] = { val LOGGER = LogServiceFactory.getLogService(this.getClass.getName) CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION, model.hdfsLocation) val status = CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS - var isHighCardinalityColumn = false - val iter = new Iterator[(Int, String, Boolean)] { + val iter = new Iterator[(Int, String)] { var dictionaryForDistinctValueLookUp: Dictionary = _ var dictionaryForSortIndexWriting: Dictionary = _ var dictionaryForDistinctValueLookUpCleared: Boolean = false @@ -375,90 +372,70 @@ class CarbonGlobalDictionaryGenerateRDD( val distinctValueList = rddIter.next()._2 valuesBuffer ++= distinctValueList.values rowCount += distinctValueList.rowCount - // check high cardinality - if (model.isFirstLoad && model.highCardIdentifyEnable - && !model.isComplexes(split.index) - && model.primDimensions(split.index).isColumnar) { - isHighCardinalityColumn = GlobalDictionaryUtil.isHighCardinalityColumn( - valuesBuffer.size, model) - if (isHighCardinalityColumn) { - break - } - } } } val combineListTime = System.currentTimeMillis() - t1 - if (isHighCardinalityColumn) { - LOGGER.info(s"column ${ model.table.getTableUniqueName }." + - s"${ - model.primDimensions(split.index) - .getColName - } is high cardinality column") + isDictionaryLocked = dictLock.lockWithRetries() + if (isDictionaryLocked) { + logInfo(s"Successfully able to get the dictionary lock for ${ + model.primDimensions(split.index).getColName + }") } else { - isDictionaryLocked = dictLock.lockWithRetries() - if (isDictionaryLocked) { - logInfo(s"Successfully able to get the dictionary lock for ${ + sys.error(s"Dictionary file ${ model.primDimensions(split.index).getColName - }") - } else { - sys - .error(s"Dictionary file ${ - model.primDimensions(split.index).getColName - } is locked for updation. Please try after some time") - } - val t2 = System.currentTimeMillis - val fileType = FileFactory.getFileType(model.dictFilePaths(split.index)) - model.dictFileExists(split.index) = FileFactory - .isFileExist(model.dictFilePaths(split.index), fileType) - dictionaryForDistinctValueLookUp = if (model.dictFileExists(split.index)) { - CarbonLoaderUtil.getDictionary(model.table, - model.columnIdentifier(split.index), - model.hdfsLocation, - model.primDimensions(split.index).getDataType - ) - } else { - null - } - val dictCacheTime = System.currentTimeMillis - t2 - val t3 = System.currentTimeMillis() - val dictWriteTask = new DictionaryWriterTask(valuesBuffer, - dictionaryForDistinctValueLookUp, - model.table, + } is locked for updation. Please try after some time") + } + val t2 = System.currentTimeMillis + val fileType = FileFactory.getFileType(model.dictFilePaths(split.index)) + val isDictFileExists = FileFactory.isFileExist(model.dictFilePaths(split.index), fileType) + dictionaryForDistinctValueLookUp = if (isDictFileExists) { + CarbonLoaderUtil.getDictionary(model.table, model.columnIdentifier(split.index), model.hdfsLocation, - model.primDimensions(split.index).getColumnSchema, - model.dictFileExists(split.index) + model.primDimensions(split.index).getDataType ) - // execute dictionary writer task to get distinct values - val distinctValues = dictWriteTask.execute() - val dictWriteTime = System.currentTimeMillis() - t3 - val t4 = System.currentTimeMillis() - // if new data came than rewrite sort index file - if (distinctValues.size() > 0) { - val sortIndexWriteTask = new SortIndexWriterTask(model.table, - model.columnIdentifier(split.index), - model.primDimensions(split.index).getDataType, - model.hdfsLocation, - dictionaryForDistinctValueLookUp, - distinctValues) - sortIndexWriteTask.execute() - } - val sortIndexWriteTime = System.currentTimeMillis() - t4 - CarbonTimeStatisticsFactory.getLoadStatisticsInstance.recordDicShuffleAndWriteTime() - // After sortIndex writing, update dictionaryMeta - dictWriteTask.updateMetaData() - // clear the value buffer after writing dictionary data - valuesBuffer.clear - CarbonUtil.clearDictionaryCache(dictionaryForDistinctValueLookUp) - dictionaryForDistinctValueLookUpCleared = true - LOGGER.info(s"\n columnName: ${ model.primDimensions(split.index).getColName }" + - s"\n columnId: ${ model.primDimensions(split.index).getColumnId }" + - s"\n new distinct values count: ${ distinctValues.size() }" + - s"\n combine lists: $combineListTime" + - s"\n create dictionary cache: $dictCacheTime" + - s"\n sort list, distinct and write: $dictWriteTime" + - s"\n write sort info: $sortIndexWriteTime") + } else { + null + } + val dictCacheTime = System.currentTimeMillis - t2 + val t3 = System.currentTimeMillis() + val dictWriteTask = new DictionaryWriterTask(valuesBuffer, + dictionaryForDistinctValueLookUp, + model.table, + model.columnIdentifier(split.index), + model.hdfsLocation, + model.primDimensions(split.index).getColumnSchema, + isDictFileExists + ) + // execute dictionary writer task to get distinct values + val distinctValues = dictWriteTask.execute() + val dictWriteTime = System.currentTimeMillis() - t3 + val t4 = System.currentTimeMillis() + // if new data came than rewrite sort index file + if (distinctValues.size() > 0) { + val sortIndexWriteTask = new SortIndexWriterTask(model.table, + model.columnIdentifier(split.index), + model.primDimensions(split.index).getDataType, + model.hdfsLocation, + dictionaryForDistinctValueLookUp, + distinctValues) + sortIndexWriteTask.execute() } + val sortIndexWriteTime = System.currentTimeMillis() - t4 + CarbonTimeStatisticsFactory.getLoadStatisticsInstance.recordDicShuffleAndWriteTime() + // After sortIndex writing, update dictionaryMeta + dictWriteTask.updateMetaData() + // clear the value buffer after writing dictionary data + valuesBuffer.clear + CarbonUtil.clearDictionaryCache(dictionaryForDistinctValueLookUp) + dictionaryForDistinctValueLookUpCleared = true + LOGGER.info(s"\n columnName: ${ model.primDimensions(split.index).getColName }" + + s"\n columnId: ${ model.primDimensions(split.index).getColumnId }" + + s"\n new distinct values count: ${ distinctValues.size() }" + + s"\n combine lists: $combineListTime" + + s"\n create dictionary cache: $dictCacheTime" + + s"\n sort list, distinct and write: $dictWriteTime" + + s"\n write sort info: $sortIndexWriteTime") } catch { case ex: Exception => LOGGER.error(ex) @@ -492,8 +469,8 @@ class CarbonGlobalDictionaryGenerateRDD( } } - override def next(): (Int, String, Boolean) = { - (split.index, status, isHighCardinalityColumn) + override def next(): (Int, String) = { + (split.index, status) } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala index bf41dca..e1c564d 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala @@ -278,11 +278,6 @@ object GlobalDictionaryUtil { } } - def isHighCardinalityColumn(columnCardinality: Int, - model: DictionaryLoadModel): Boolean = { - columnCardinality > model.highCardThreshold - } - /** * create a instance of DictionaryLoadModel * @@ -319,14 +314,6 @@ object GlobalDictionaryUtil { val lockType = CarbonProperties.getInstance .getProperty(CarbonCommonConstants.LOCK_TYPE, CarbonCommonConstants.CARBON_LOCK_TYPE_HDFS) val zookeeperUrl = CarbonProperties.getInstance.getProperty(CarbonCommonConstants.ZOOKEEPER_URL) - // load high cardinality identify configure - val highCardIdentifyEnable = CarbonProperties.getInstance().getProperty( - CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE, - CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE_DEFAULT).toBoolean - val highCardThreshold = CarbonProperties.getInstance().getProperty( - CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD, - CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD_DEFAULT).toInt - val serializationNullFormat = carbonLoadModel.getSerializationNullFormat.split(CarbonCommonConstants.COMMA, 2)(1) // get load count @@ -342,8 +329,6 @@ object GlobalDictionaryUtil { isComplexes.toArray, primDimensions, carbonLoadModel.getDelimiters, - highCardIdentifyEnable, - highCardThreshold, columnIdentifier, carbonLoadModel.getLoadMetadataDetails.size() == 0, hdfsTempLocation, @@ -394,9 +379,8 @@ object GlobalDictionaryUtil { private def checkStatus(carbonLoadModel: CarbonLoadModel, sqlContext: SQLContext, model: DictionaryLoadModel, - status: Array[(Int, String, Boolean)]) = { + status: Array[(Int, String)]) = { var result = false - val noDictionaryColumns = new ArrayBuffer[CarbonDimension] val tableName = model.table.getTableName status.foreach { x => val columnName = model.primDimensions(x._1).getColName @@ -404,12 +388,6 @@ object GlobalDictionaryUtil { result = true LOGGER.error(s"table:$tableName column:$columnName generate global dictionary file failed") } - if (x._3) { - noDictionaryColumns += model.primDimensions(x._1) - } - } - if (noDictionaryColumns.nonEmpty) { - updateTableMetadataFunc(carbonLoadModel, sqlContext, model, noDictionaryColumns.toArray) } if (result) { LOGGER.error("generate global dictionary files failed") http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala index 7bf9765..47d282b 100644 --- a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala +++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala @@ -572,6 +572,8 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser { // All excluded cols should be there in create table cols if (tableProperties.get(CarbonCommonConstants.DICTIONARY_EXCLUDE).isDefined) { + LOGGER.warn("dictionary_exclude option was deprecated, " + + "by default string column does not use global dictionary.") dictExcludeCols = tableProperties.get(CarbonCommonConstants.DICTIONARY_EXCLUDE).get.split(',').map(_.trim) dictExcludeCols @@ -617,19 +619,17 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser { } } - // by default consider all String cols as dims and if any dictionary exclude is present then + // by default consider all String cols as dims and if any dictionary include isn't present then // add it to noDictionaryDims list. consider all dictionary excludes/include cols as dims fields.foreach { field => - if (dictExcludeCols.toSeq.exists(x => x.equalsIgnoreCase(field.column))) { - val dataType = DataTypeUtil.getDataType(field.dataType.get.toUpperCase()) - if (dataType != DataType.TIMESTAMP && dataType != DataType.DATE) { - noDictionaryDims :+= field.column - } - dimFields += field - } else if (dictIncludeCols.exists(x => x.equalsIgnoreCase(field.column))) { + if (dictIncludeCols.exists(x => x.equalsIgnoreCase(field.column))) { dimFields += field } else if (isDetectAsDimentionDatatype(field.dataType.get)) { dimFields += field + // consider all String cols as noDicitonaryDims by default + if (DataType.STRING.getName.equalsIgnoreCase(field.dataType.get)) { + noDictionaryDims :+= field.column + } } else if (sortKeyDimsTmp.exists(x => x.equalsIgnoreCase(field.column))) { noDictionaryDims :+= field.column dimFields += field http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/integration/spark/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala index a2d40b5..32f2ed1 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala @@ -16,8 +16,6 @@ */ package org.apache.carbondata.spark.util -import java.io.File - import org.apache.spark.sql.common.util.QueryTest import org.apache.spark.sql.{CarbonEnv, CarbonRelation} import org.scalatest.BeforeAndAfterAll @@ -83,7 +81,8 @@ class AllDictionaryTestCase extends QueryTest with BeforeAndAfterAll { try { sql( "CREATE TABLE IF NOT EXISTS sample (id STRING, name STRING, city STRING, " + - "age INT) STORED BY 'org.apache.carbondata.format'" + "age INT) STORED BY 'org.apache.carbondata.format' " + + "tblproperties('dictionary_include'='city')" ) } catch { case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) @@ -96,7 +95,7 @@ class AllDictionaryTestCase extends QueryTest with BeforeAndAfterAll { "ActiveProvince: string, Activecity: string, ActiveDistrict: string, ActiveStreet: " + "string>>, proddate struct<productionDate: string,activeDeactivedate: array<string>>, " + "gamePointId INT,contractNumber INT) STORED BY 'org.apache.carbondata.format'" + - "TBLPROPERTIES('DICTIONARY_EXCLUDE'='ROMSize')" + "TBLPROPERTIES('DICTIONARY_EXCLUDE'='ROMSize', 'dictionary_include'='channelsId')" ) } catch { case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/integration/spark/src/test/scala/org/apache/carbondata/spark/util/AutoHighCardinalityIdentifyTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/AutoHighCardinalityIdentifyTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/AutoHighCardinalityIdentifyTestCase.scala deleted file mode 100644 index 598a90e..0000000 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/AutoHighCardinalityIdentifyTestCase.scala +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.carbondata.spark.util - -import java.io.{BufferedWriter, File, FileWriter} -import java.util.Random - -import org.apache.spark.sql.common.util.QueryTest -import org.apache.spark.sql.{CarbonEnv, CarbonRelation} -import org.scalatest.BeforeAndAfterAll - -import org.apache.carbondata.core.metadata.encoder.Encoding -import org.apache.carbondata.core.metadata.schema.table.CarbonTable -import org.apache.carbondata.core.util.path.CarbonStorePath -import org.apache.carbondata.core.metadata.CarbonTableIdentifier -import org.apache.carbondata.core.constants.CarbonCommonConstants -import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil} -import org.apache.carbondata.processing.model.{CarbonDataLoadSchema, CarbonLoadModel} - -/** - * Test Case for org.apache.carbondata.spark.util.GlobalDictionaryUtil - */ -class AutoHighCardinalityIdentifyTestCase extends QueryTest with BeforeAndAfterAll { - - var filePath: String = _ - - def buildCarbonLoadModel(relation: CarbonRelation, - filePath: String, - header: String): CarbonLoadModel = { - val carbonLoadModel = new CarbonLoadModel - carbonLoadModel.setTableName(relation.tableMeta.carbonTableIdentifier.getDatabaseName) - carbonLoadModel.setDatabaseName(relation.tableMeta.carbonTableIdentifier.getTableName) - // carbonLoadModel.setSchema(relation.tableMeta.schema) - val table = relation.tableMeta.carbonTable - val carbonSchema = new CarbonDataLoadSchema(table) - carbonLoadModel.setDatabaseName(table.getDatabaseName) - carbonLoadModel.setTableName(table.getFactTableName) - carbonLoadModel.setCarbonDataLoadSchema(carbonSchema) - carbonLoadModel.setFactFilePath(filePath) - carbonLoadModel.setCsvHeader(header) - carbonLoadModel.setCsvDelimiter(",") - carbonLoadModel.setComplexDelimiterLevel1("\\$") - carbonLoadModel.setComplexDelimiterLevel2("\\:") - carbonLoadModel.setDefaultTimestampFormat(CarbonProperties.getInstance().getProperty( - CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, - CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)) - carbonLoadModel.setCsvHeaderColumns(CommonUtil.getCsvHeaderColumns(carbonLoadModel)) - carbonLoadModel - } - - override def beforeAll { - buildTestData - buildTable - } - - override def afterAll { - sql("drop table if exists highcard") - sql("drop table if exists colgrp_highcard") - } - - def buildTestData() = { - filePath = s"${integrationPath}/spark/target/highcarddata.csv" - val file = new File(filePath) - val writer = new BufferedWriter(new FileWriter(file)) - writer.write("hc1,c2,c3") - writer.newLine() - var i = 0 - val random = new Random - for(i <- 0 until 2000000) { - writer.write("a" + i + "," + - "b" + i%1000 + "," + - i%1000000 + "\n") - if ( i % 10000 == 0) { - writer.flush() - } - } - writer.close - } - - def buildTable() = { - try { - sql("drop table if exists highcard") - sql("""create table if not exists highcard - (hc1 string, c2 string, c3 int) - STORED BY 'org.apache.carbondata.format'""") - } catch { - case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) - } - } - - def relation(tableName: String): CarbonRelation = { - CarbonEnv.get.carbonMetastore - .lookupRelation1(Option(CarbonCommonConstants.DATABASE_DEFAULT_NAME), - tableName)(sqlContext) - .asInstanceOf[CarbonRelation] - } - - private def checkDictFile(table: CarbonTable) = { - val tableIdentifier = new CarbonTableIdentifier(table.getDatabaseName, - table.getFactTableName, "1") - val carbonTablePath = CarbonStorePath.getCarbonTablePath(storeLocation, - tableIdentifier) - val newHc1 = table.getDimensionByName("highcard", "hc1") - val newC2 = table.getDimensionByName("highcard", "c2") - val dictFileHc1 = carbonTablePath.getDictionaryFilePath(newHc1.getColumnId) - val dictFileC2 = carbonTablePath.getDictionaryFilePath(newC2.getColumnId) - assert(!CarbonUtil.isFileExists(dictFileHc1)) - assert(CarbonUtil.isFileExists(dictFileC2)) - } - - private def checkMetaData(oldTable: CarbonTable, newTable: CarbonTable) = { - val oldHc1 = oldTable.getDimensionByName("highcard", "hc1") - val oldc2 = oldTable.getDimensionByName("highcard", "c2") - val newHc1 = newTable.getDimensionByName("highcard", "hc1") - val newC2 = newTable.getDimensionByName("highcard", "c2") - assert(oldHc1.hasEncoding(Encoding.DICTIONARY)) - assert(oldc2.hasEncoding(Encoding.DICTIONARY)) - assert(!newHc1.hasEncoding(Encoding.DICTIONARY)) - assert(newC2.hasEncoding(Encoding.DICTIONARY)) - } - - test("auto identify high cardinality column in first load #396") { - val oldTable = relation("highcard").tableMeta.carbonTable - sql(s"LOAD DATA LOCAL INPATH '$filePath' into table highcard") - val newTable = relation("highcard").tableMeta.carbonTable - sql(s"select count(hc1) from highcard") - - // check dictionary file - checkDictFile(newTable) - // check the meta data - checkMetaData(oldTable, newTable) - } - -} http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala index 4505429..7cfb06d 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala @@ -74,7 +74,7 @@ class ExternalColumnDictionaryTestCase extends QueryTest with BeforeAndAfterAll proddate struct<productionDate:string,activeDeactivedate:array<string>>, gamePointId double,contractNumber double) STORED BY 'org.apache.carbondata.format' - TBLPROPERTIES('DICTIONARY_INCLUDE' = 'deviceInformationId') + TBLPROPERTIES('DICTIONARY_INCLUDE' = 'deviceInformationId, channelsId') """) } catch { case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) @@ -84,7 +84,7 @@ class ExternalColumnDictionaryTestCase extends QueryTest with BeforeAndAfterAll sql("""CREATE TABLE verticalDelimitedTable (deviceInformationId int, channelsId string,contractNumber double) STORED BY 'org.apache.carbondata.format' - TBLPROPERTIES('DICTIONARY_INCLUDE' = 'deviceInformationId') + TBLPROPERTIES('DICTIONARY_INCLUDE' = 'deviceInformationId, channelsId') """) } catch { case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala index 9e0f851..ec033d7 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala @@ -76,7 +76,8 @@ class GlobalDictionaryUtilConcurrentTestCase extends QueryTest with BeforeAndAft def buildTable() = { try { sql( - "CREATE TABLE IF NOT EXISTS employee (empid STRING) STORED BY 'org.apache.carbondata.format'") + "CREATE TABLE IF NOT EXISTS employee (empid STRING) STORED BY 'org.apache.carbondata.format' " + + "tblproperties('dictionary_include'='empid')") } catch { case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) } http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/integration/spark/src/test/scala/org/apache/spark/sql/TestCarbonSqlParser.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/spark/sql/TestCarbonSqlParser.scala b/integration/spark/src/test/scala/org/apache/spark/sql/TestCarbonSqlParser.scala index b8f0a7c..f72bbb3 100644 --- a/integration/spark/src/test/scala/org/apache/spark/sql/TestCarbonSqlParser.scala +++ b/integration/spark/src/test/scala/org/apache/spark/sql/TestCarbonSqlParser.scala @@ -28,16 +28,6 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants */ private class TestCarbonSqlParserStub extends CarbonSqlParser { - //val parser:CarbonSqlDDLParser = new CarbonSqlDDLParser() - - def updateColumnGroupsInFieldTest(fields: Seq[Field], tableProperties: Map[String, String]): Seq[String] = { - - var (dims, msrs, noDictionaryDims, sortkey) = extractDimAndMsrFields(fields, tableProperties) - - updateColumnGroupsInField(tableProperties, - noDictionaryDims, msrs, dims) - } - def extractDimAndMsrFieldsTest(fields: Seq[Field], tableProperties: Map[String, String]): (Seq[Field], Seq[Field], Seq[String], Seq[String]) = { extractDimAndMsrFields(fields, tableProperties) @@ -78,113 +68,6 @@ class TestCarbonSqlParser extends QueryTest { fields } - // Testing the column group Splitting method. - test("Test-updateColumnGroupsInField") { - val colGroupStr = "(col2,col3),(col5,col6),(col7,col8)" - val tableProperties = Map(CarbonCommonConstants.COLUMN_GROUPS -> colGroupStr) - val fields: Seq[Field] = loadAllFields - val stub = new TestCarbonSqlParserStub() - val colgrps = stub.updateColumnGroupsInFieldTest(fields, tableProperties) - assert(colgrps.lift(0).get.equalsIgnoreCase("col2,col3")) - assert(colgrps.lift(1).get.equalsIgnoreCase("col5,col6")) - assert(colgrps.lift(2).get.equalsIgnoreCase("col7,col8")) - - } - test("Test-updateColumnGroupsInField_disordered") { - val colGroupStr = "(col5,col6),(col2,col3),(col7,col8)" - val tableProperties = Map(CarbonCommonConstants.COLUMN_GROUPS -> colGroupStr) - var fields: Seq[Field] = loadAllFields - val stub = new TestCarbonSqlParserStub() - val colgrps = stub.updateColumnGroupsInFieldTest(fields, tableProperties) - assert(colgrps.lift(0).get.equalsIgnoreCase("col2,col3")) - assert(colgrps.lift(1).get.equalsIgnoreCase("col5,col6")) - assert(colgrps.lift(2).get.equalsIgnoreCase("col7,col8")) - - } - test("Test-ColumnGroupsInvalidField_Shouldnotallow") { - val colGroupStr = "(col1,col2),(col10,col6),(col7,col8)" - val tableProperties = Map(CarbonCommonConstants.COLUMN_GROUPS -> colGroupStr) - val fields: Seq[Field] = loadAllFields - val stub = new TestCarbonSqlParserStub() - try { - val colgrps = stub.updateColumnGroupsInFieldTest(fields, tableProperties) - assert(false) - } catch { - case e: Exception => assert(true) - } - } - test("Test-MeasureInColumnGroup_ShouldNotAllow") { - //col1 is measure - val colGroupStr = "(col1,col2),(col5,col6),(col7,col8)" - val tableProperties = Map(CarbonCommonConstants.COLUMN_GROUPS -> colGroupStr) - val fields: Seq[Field] = loadAllFields - val stub = new TestCarbonSqlParserStub() - try { - val colgrps = stub.updateColumnGroupsInFieldTest(fields, tableProperties) - assert(false) - } catch { - case e: Exception => assert(true) - } - } - test("Test-NoDictionaryInColumnGroup_ShouldNotAllow") { - //col5 is no dictionary - val colGroupStr = "(col2,col3),(col5,col6),(col7,col8)" - val noDictStr = "col5" - val tableProperties = Map(CarbonCommonConstants.COLUMN_GROUPS -> colGroupStr, CarbonCommonConstants.DICTIONARY_EXCLUDE -> noDictStr) - val fields: Seq[Field] = loadAllFields - val stub = new TestCarbonSqlParserStub() - try { - val colgrps = stub.updateColumnGroupsInFieldTest(fields, tableProperties) - assert(false) - } catch { - case e: Exception => assert(true) - } - } - test("Test-SameColumnInDifferentGroup_ShouldNotAllow") { - val colGroupStr = "(col2,col3),(col5,col6),(col6,col7,col8)" - val tableProperties = Map(CarbonCommonConstants.COLUMN_GROUPS -> colGroupStr) - val fields: Seq[Field] = loadAllFields - val stub = new TestCarbonSqlParserStub() - try { - val colgrps = stub.updateColumnGroupsInFieldTest(fields, tableProperties) - assert(false) - } catch { - case e: Exception => assert(true) - } - } - - test("Test-ColumnAreNotTogetherAsInSchema_ShouldNotAllow") { - val colGroupStr = "(col2,col3),(col5,col8)" - val tableProperties = Map(CarbonCommonConstants.COLUMN_GROUPS -> colGroupStr) - val fields: Seq[Field] = loadAllFields - val stub = new TestCarbonSqlParserStub() - try { - val colgrps = stub.updateColumnGroupsInFieldTest(fields, tableProperties) - assert(false) - } catch { - case e: Exception => assert(true) - } - } - test("Test-ColumnInColumnGroupAreShuffledButInSequence") { - val colGroupStr = "(col2,col3),(col7,col8,col6)" - val tableProperties = Map(CarbonCommonConstants.COLUMN_GROUPS -> colGroupStr) - val fields: Seq[Field] = loadAllFields - val stub = new TestCarbonSqlParserStub() - - val colgrps = stub.updateColumnGroupsInFieldTest(fields, tableProperties) - assert(colgrps.lift(0).get.equalsIgnoreCase("col2,col3")) - assert(colgrps.lift(1).get.equalsIgnoreCase("col6,col7,col8")) - } - // Testing the column group Splitting method with empty table properties so null will be returned. - test("Test-Empty-updateColumnGroupsInField") { - val tableProperties = Map("" -> "") - val fields: Seq[Field] = loadAllFields - val stub = new TestCarbonSqlParserStub() - val colgrps = stub.updateColumnGroupsInFieldTest(fields, Map()) - //assert( rtn === 1) - assert(null == colgrps) - } - // Testing the extracting of Dims and no Dictionary test("Test-extractDimColsAndNoDictionaryFields") { val tableProperties = Map(CarbonCommonConstants.DICTIONARY_EXCLUDE -> "col2", CarbonCommonConstants.DICTIONARY_INCLUDE -> "col4") @@ -203,8 +86,15 @@ class TestCarbonSqlParser extends QueryTest { assert(dimCols.lift(3).get.column.equalsIgnoreCase("col4")) //No dictionary column names will be available in noDictionary list - assert(noDictionary.size == 1) - assert(noDictionary.lift(0).get.equalsIgnoreCase("col2")) + assert(noDictionary.size == 7) + assert(noDictionary.lift(0).get.equalsIgnoreCase("col1")) + assert(noDictionary.lift(1).get.equalsIgnoreCase("col2")) + assert(noDictionary.lift(2).get.equalsIgnoreCase("col3")) + assert(noDictionary.lift(3).get.equalsIgnoreCase("col5")) + assert(noDictionary.lift(4).get.equalsIgnoreCase("col6")) + assert(noDictionary.lift(5).get.equalsIgnoreCase("col7")) + assert(noDictionary.lift(6).get.equalsIgnoreCase("col8")) + } test("Test-DimAndMsrColsWithNoDictionaryFields1") { @@ -220,8 +110,14 @@ class TestCarbonSqlParser extends QueryTest { assert(dimCols.lift(2).get.column.equalsIgnoreCase("col3")) //below column names will be available in noDictionary list - assert(noDictionary.size == 1) + assert(noDictionary.size == 7) assert(noDictionary.lift(0).get.equalsIgnoreCase("col1")) + assert(noDictionary.lift(1).get.equalsIgnoreCase("col2")) + assert(noDictionary.lift(2).get.equalsIgnoreCase("col3")) + assert(noDictionary.lift(3).get.equalsIgnoreCase("col5")) + assert(noDictionary.lift(4).get.equalsIgnoreCase("col6")) + assert(noDictionary.lift(5).get.equalsIgnoreCase("col7")) + assert(noDictionary.lift(6).get.equalsIgnoreCase("col8")) //check msr assert(msrCols.size == 1) @@ -241,7 +137,13 @@ class TestCarbonSqlParser extends QueryTest { assert(dimCols.lift(2).get.column.equalsIgnoreCase("col3")) //below column names will be available in noDictionary list - assert(noDictionary.size == 0) + assert(noDictionary.size == 6) + assert(noDictionary.lift(0).get.equalsIgnoreCase("col2")) + assert(noDictionary.lift(1).get.equalsIgnoreCase("col3")) + assert(noDictionary.lift(2).get.equalsIgnoreCase("col5")) + assert(noDictionary.lift(3).get.equalsIgnoreCase("col6")) + assert(noDictionary.lift(4).get.equalsIgnoreCase("col7")) + assert(noDictionary.lift(5).get.equalsIgnoreCase("col8")) //check msr assert(msrCols.size == 1) @@ -263,8 +165,14 @@ class TestCarbonSqlParser extends QueryTest { assert(dimCols.lift(3).get.column.equalsIgnoreCase("col4")) //below column names will be available in noDictionary list - assert(noDictionary.size == 1) + assert(noDictionary.size == 7) assert(noDictionary.lift(0).get.equalsIgnoreCase("col1")) + assert(noDictionary.lift(1).get.equalsIgnoreCase("col2")) + assert(noDictionary.lift(2).get.equalsIgnoreCase("col3")) + assert(noDictionary.lift(3).get.equalsIgnoreCase("col5")) + assert(noDictionary.lift(4).get.equalsIgnoreCase("col6")) + assert(noDictionary.lift(5).get.equalsIgnoreCase("col7")) + assert(noDictionary.lift(6).get.equalsIgnoreCase("col8")) //check msr assert(msrCols.size == 0) @@ -283,8 +191,13 @@ class TestCarbonSqlParser extends QueryTest { assert(dimCols.lift(2).get.column.equalsIgnoreCase("col3")) //below column names will be available in noDictionary list - assert(noDictionary.size == 1) - assert(noDictionary.lift(0).get.equalsIgnoreCase("col3")) + assert(noDictionary.size == 6) + assert(noDictionary.lift(0).get.equalsIgnoreCase("col1")) + assert(noDictionary.lift(1).get.equalsIgnoreCase("col3")) + assert(noDictionary.lift(2).get.equalsIgnoreCase("col5")) + assert(noDictionary.lift(3).get.equalsIgnoreCase("col6")) + assert(noDictionary.lift(4).get.equalsIgnoreCase("col7")) + assert(noDictionary.lift(5).get.equalsIgnoreCase("col8")) //check msr assert(msrCols.size == 1) @@ -304,8 +217,13 @@ class TestCarbonSqlParser extends QueryTest { assert(dimCols.lift(2).get.column.equalsIgnoreCase("col3")) //below column names will be available in noDictionary list - assert(noDictionary.size == 1) + assert(noDictionary.size == 6) assert(noDictionary.lift(0).get.equalsIgnoreCase("col1")) + assert(noDictionary.lift(1).get.equalsIgnoreCase("col3")) + assert(noDictionary.lift(2).get.equalsIgnoreCase("col5")) + assert(noDictionary.lift(3).get.equalsIgnoreCase("col6")) + assert(noDictionary.lift(4).get.equalsIgnoreCase("col7")) + assert(noDictionary.lift(5).get.equalsIgnoreCase("col8")) //check msr assert(msrCols.size == 1) @@ -325,8 +243,13 @@ class TestCarbonSqlParser extends QueryTest { assert(dimCols.lift(2).get.column.equalsIgnoreCase("col3")) //below column names will be available in noDictionary list - assert(noDictionary.size == 1) + assert(noDictionary.size == 6) assert(noDictionary.lift(0).get.equalsIgnoreCase("col2")) + assert(noDictionary.lift(1).get.equalsIgnoreCase("col3")) + assert(noDictionary.lift(2).get.equalsIgnoreCase("col5")) + assert(noDictionary.lift(3).get.equalsIgnoreCase("col6")) + assert(noDictionary.lift(4).get.equalsIgnoreCase("col7")) + assert(noDictionary.lift(5).get.equalsIgnoreCase("col8")) //check msr assert(msrCols.size == 1) @@ -349,9 +272,13 @@ class TestCarbonSqlParser extends QueryTest { assert(dimCols.lift(3).get.column.equalsIgnoreCase("col4")) //below column names will be available in noDictionary list - assert(noDictionary.size == 2) + assert(noDictionary.size == 6) assert(noDictionary.lift(0).get.equalsIgnoreCase("col1")) assert(noDictionary.lift(1).get.equalsIgnoreCase("col2")) + assert(noDictionary.lift(2).get.equalsIgnoreCase("col5")) + assert(noDictionary.lift(3).get.equalsIgnoreCase("col6")) + assert(noDictionary.lift(4).get.equalsIgnoreCase("col7")) + assert(noDictionary.lift(5).get.equalsIgnoreCase("col8")) //check msr assert(msrCols.size == 0) @@ -369,8 +296,13 @@ class TestCarbonSqlParser extends QueryTest { assert(dimCols.lift(1).get.column.equalsIgnoreCase("col2")) //below column names will be available in noDictionary list - assert(noDictionary.size == 1) - assert(noDictionary.lift(0).get.equalsIgnoreCase("col2")) + assert(noDictionary.size == 6) + assert(noDictionary.lift(0).get.equalsIgnoreCase("col1")) + assert(noDictionary.lift(1).get.equalsIgnoreCase("col2")) + assert(noDictionary.lift(2).get.equalsIgnoreCase("col5")) + assert(noDictionary.lift(3).get.equalsIgnoreCase("col6")) + assert(noDictionary.lift(4).get.equalsIgnoreCase("col7")) + assert(noDictionary.lift(5).get.equalsIgnoreCase("col8")) //check msr assert(msrCols.size == 1) @@ -390,7 +322,6 @@ class TestCarbonSqlParser extends QueryTest { } - } http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala b/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala index 23800ee..c584fc7 100644 --- a/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala +++ b/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala @@ -83,7 +83,8 @@ class AllDictionaryTestCase extends QueryTest with BeforeAndAfterAll { try { sql( "CREATE TABLE IF NOT EXISTS sample (id STRING, name STRING, city STRING, " + - "age INT) STORED BY 'org.apache.carbondata.format'" + "age INT) STORED BY 'org.apache.carbondata.format' " + + "TBLPROPERTIES('dictionary_include'='city')" ) } catch { case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) @@ -96,7 +97,7 @@ class AllDictionaryTestCase extends QueryTest with BeforeAndAfterAll { "ActiveProvince: string, Activecity: string, ActiveDistrict: string, ActiveStreet: " + "string>>, proddate struct<productionDate: string,activeDeactivedate: array<string>>, " + "gamePointId INT,contractNumber INT) STORED BY 'org.apache.carbondata.format'" + - "TBLPROPERTIES('DICTIONARY_EXCLUDE'='ROMSize')" + "TBLPROPERTIES('DICTIONARY_EXCLUDE'='ROMSize', 'dictionary_include'='channelsId')" ) } catch { case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala b/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala index 5f7ff2e..9c576ec 100644 --- a/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala +++ b/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala @@ -78,7 +78,7 @@ class ExternalColumnDictionaryTestCase extends QueryTest with BeforeAndAfterAll proddate struct<productionDate:string,activeDeactivedate:array<string>>, gamePointId double,contractNumber double) STORED BY 'org.apache.carbondata.format' - TBLPROPERTIES('DICTIONARY_INCLUDE' = 'deviceInformationId') + TBLPROPERTIES('DICTIONARY_INCLUDE' = 'deviceInformationId, channelsId') """) } catch { case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) @@ -89,7 +89,7 @@ class ExternalColumnDictionaryTestCase extends QueryTest with BeforeAndAfterAll """CREATE TABLE verticalDelimitedTable (deviceInformationId int, channelsId string,contractNumber double) STORED BY 'org.apache.carbondata.format' - TBLPROPERTIES('DICTIONARY_INCLUDE' = 'deviceInformationId') + TBLPROPERTIES('DICTIONARY_INCLUDE' = 'deviceInformationId, channelsId') """) } catch { case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortTempFileChunkHolder.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortTempFileChunkHolder.java b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortTempFileChunkHolder.java index ce7b321..1bcbaa8 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortTempFileChunkHolder.java +++ b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortTempFileChunkHolder.java @@ -313,7 +313,7 @@ public class SortTempFileChunkHolder implements Comparable<SortTempFileChunkHold Object[] holder = new Object[3]; int index = 0; int nonDicIndex = 0; - int[] dim = new int[this.dimensionCount]; + int[] dim = new int[this.dimensionCount - this.noDictionaryCount]; byte[][] nonDicArray = new byte[this.noDictionaryCount + this.complexDimensionCount][]; Object[] measures = new Object[this.measureCount]; try { http://git-wip-us.apache.org/repos/asf/carbondata/blob/191dfb35/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java index 5d9e081..1bbe98c 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java @@ -603,9 +603,9 @@ public abstract class AbstractFactDataWriter<T> implements CarbonFactDataWriter< short numberOfByteToStorLength = 2; // as length part is removed, so each no dictionary value index // needs to be reshuffled by 2 bytes - int NumberOfNoDictSortColumns = + int numberOfNoDictSortColumns = dataWriterVo.getSegmentProperties().getNumberOfNoDictSortColumns(); - for (int i = 0; i < NumberOfNoDictSortColumns; i++) { + for (int i = 0; i < numberOfNoDictSortColumns; i++) { output.putShort((short) (buffer.getShort() - numberOfByteToStorLength)); } // copy the data part
