[CARBONDATA-1273] String datatype will be no dictionary column by default This closes #1144
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/01589684 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/01589684 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/01589684 Branch: refs/heads/master Commit: 01589684f2804527872a791c8a0058581e1bc760 Parents: 0934e44 Author: QiangCai <[email protected]> Authored: Thu Jul 6 10:41:55 2017 +0800 Committer: Raghunandan S <[email protected]> Committed: Tue Jul 25 17:29:25 2017 +0800 ---------------------------------------------------------------------- .../core/constants/CarbonCommonConstants.java | 23 --- .../carbondata/core/util/CarbonProperties.java | 37 ---- .../InsertIntoCarbonTableTestCase.scala | 2 +- .../dataload/TestBatchSortDataLoad.scala | 5 +- .../dataload/TestLoadDataUseAllDictionary.scala | 1 + .../partition/TestDDLForPartitionTable.scala | 5 +- .../testsuite/sortcolumns/TestSortColumns.scala | 4 +- .../sortcolumns/TestSortColumnsWithUnsafe.scala | 6 +- .../spark/rdd/CarbonGlobalDictionaryRDD.scala | 143 ++++++-------- .../spark/util/GlobalDictionaryUtil.scala | 24 +-- .../spark/sql/catalyst/CarbonDDLSqlParser.scala | 16 +- .../spark/util/AllDictionaryTestCase.scala | 7 +- .../AutoHighCardinalityIdentifyTestCase.scala | 147 -------------- .../util/ExternalColumnDictionaryTestCase.scala | 4 +- ...GlobalDictionaryUtilConcurrentTestCase.scala | 3 +- .../apache/spark/sql/TestCarbonSqlParser.scala | 191 ++++++------------- .../spark/util/AllDictionaryTestCase.scala | 5 +- .../util/ExternalColumnDictionaryTestCase.scala | 4 +- .../sortdata/SortTempFileChunkHolder.java | 2 +- .../store/writer/AbstractFactDataWriter.java | 4 +- 20 files changed, 158 insertions(+), 475 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java index dfc2153..c105cb0 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java @@ -884,29 +884,6 @@ public final class CarbonCommonConstants { public static final String NO_INVERTED_INDEX = "no_inverted_index"; /** - * this variable is to enable/disable identify high cardinality during first data loading - */ - @CarbonProperty - public static final String HIGH_CARDINALITY_IDENTIFY_ENABLE = "high.cardinality.identify.enable"; - public static final String HIGH_CARDINALITY_IDENTIFY_ENABLE_DEFAULT = "true"; - - /** - * threshold of high cardinality - */ - @CarbonProperty - public static final String HIGH_CARDINALITY_THRESHOLD = "high.cardinality.threshold"; - public static final String HIGH_CARDINALITY_THRESHOLD_DEFAULT = "1000000"; - public static final int HIGH_CARDINALITY_THRESHOLD_MIN = 10000; - - /** - * percentage of cardinality in row count - */ - @CarbonProperty - public static final String HIGH_CARDINALITY_IN_ROW_COUNT_PERCENTAGE = - "high.cardinality.row.count.percentage"; - public static final String HIGH_CARDINALITY_IN_ROW_COUNT_PERCENTAGE_DEFAULT = "80"; - - /** * 16 mb size */ public static final long CARBON_16MB = 16 * 1024 * 1024; http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java index c9dd1ec..3cd0f68 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java @@ -92,8 +92,6 @@ public final class CarbonProperties { validateNumCores(); validateNumCoresBlockSort(); validateSortSize(); - validateHighCardinalityIdentify(); - validateHighCardinalityThreshold(); validateCarbonDataFileVersion(); validateExecutorStartUpTime(); validatePrefetchBufferSize(); @@ -449,41 +447,6 @@ public final class CarbonProperties { } } - private void validateHighCardinalityIdentify() { - String highcardIdentifyStr = - carbonProperties.getProperty(CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE); - boolean validateBoolean = CarbonUtil.validateBoolean(highcardIdentifyStr); - if (!validateBoolean) { - LOGGER.info("The high cardinality identify value \"" + highcardIdentifyStr - + "\" is invalid. Using the default value \"" - + CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE_DEFAULT); - carbonProperties.setProperty(CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE, - CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE_DEFAULT); - } - } - - private void validateHighCardinalityThreshold() { - String highcardThresholdStr = carbonProperties - .getProperty(CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD, - CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD_DEFAULT); - try { - int highcardThreshold = Integer.parseInt(highcardThresholdStr); - if (highcardThreshold < CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD_MIN) { - LOGGER.info("The high cardinality threshold value \"" + highcardThresholdStr - + "\" is invalid. Using the min value \"" - + CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD_MIN); - carbonProperties.setProperty(CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD, - CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD_MIN + ""); - } - } catch (NumberFormatException e) { - LOGGER.info("The high cardinality threshold value \"" + highcardThresholdStr - + "\" is invalid. Using the default value \"" - + CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD_DEFAULT); - carbonProperties.setProperty(CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD, - CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD_DEFAULT); - } - } - /** * Below method will be used to validate the data file version parameter * if parameter is invalid current version will be set http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/InsertIntoCarbonTableTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/InsertIntoCarbonTableTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/InsertIntoCarbonTableTestCase.scala index b9284e9..f62b7c7 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/InsertIntoCarbonTableTestCase.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/InsertIntoCarbonTableTestCase.scala @@ -38,7 +38,7 @@ class InsertIntoCarbonTableTestCase extends QueryTest with BeforeAndAfterAll { checkAnswer( sql("select imei,deviceInformationId,MAC,deviceColor,device_backColor,modelId,marketName,AMSize,ROMSize,CUPAudit,CPIClocked,series,productionDate,bomCode,internalModels,deliveryTime,channelsId,channelsName,deliveryAreaId,deliveryCountry,deliveryProvince,deliveryCity,deliveryDistrict,deliveryStreet,oxSingleNumber,contractNumber,ActiveCheckTime,ActiveAreaId,ActiveCountry,ActiveProvince,Activecity,ActiveDistrict,ActiveStreet,ActiveOperatorId,Active_releaseId,Active_EMUIVersion,Active_operaSysVersion,Active_BacVerNumber,Active_BacFlashVer,Active_webUIVersion,Active_webUITypeCarrVer,Active_webTypeDataVerNumber,Active_operatorsVersion,Active_phonePADPartitionedVersions,Latest_YEAR,Latest_MONTH,Latest_DAY,Latest_HOUR,Latest_areaId,Latest_country,Latest_province,Latest_city,Latest_district,Latest_street,Latest_releaseId,Latest_EMUIVersion,Latest_operaSysVersion,Latest_BacVerNumber,Latest_BacFlashVer,Latest_webUIVersion,Latest_webUITypeCarrVer,Latest_webTypeDataVerNumber,Latest_oper atorsVersion,Latest_phonePADPartitionedVersions,Latest_operatorId,gamePointId,gamePointDescription from THive order by imei,deviceInformationId,MAC,deviceColor,device_backColor,modelId,marketName,AMSize,ROMSize,CUPAudit,CPIClocked,series,productionDate,bomCode,internalModels,deliveryTime,channelsId,channelsName,deliveryAreaId,deliveryCountry,deliveryProvince,deliveryCity,deliveryDistrict,deliveryStreet,oxSingleNumber,contractNumber,ActiveCheckTime,ActiveAreaId,ActiveCountry,ActiveProvince,Activecity,ActiveDistrict,ActiveStreet,ActiveOperatorId,Active_releaseId,Active_EMUIVersion,Active_operaSysVersion,Active_BacVerNumber,Active_BacFlashVer,Active_webUIVersion,Active_webUITypeCarrVer,Active_webTypeDataVerNumber,Active_operatorsVersion,Active_phonePADPartitionedVersions,Latest_YEAR,Latest_MONTH,Latest_DAY,Latest_HOUR,Latest_areaId,Latest_country,Latest_province,Latest_city,Latest_district,Latest_street,Latest_releaseId,Latest_EMUIVersion,Latest_operaSysVersion,Latest_BacVerNumber,Late st_BacFlashVer,Latest_webUIVersion,Latest_webUITypeCarrVer,Latest_webTypeDataVerNumber,Latest_operatorsVersion,Latest_phonePADPartitionedVersions,Latest_operatorId,gamePointId,gamePointDescription"), sql("select imei,deviceInformationId,MAC,deviceColor,device_backColor,modelId,marketName,AMSize,ROMSize,CUPAudit,CPIClocked,series,productionDate,bomCode,internalModels,deliveryTime,channelsId,channelsName,deliveryAreaId,deliveryCountry,deliveryProvince,deliveryCity,deliveryDistrict,deliveryStreet,oxSingleNumber,contractNumber,ActiveCheckTime,ActiveAreaId,ActiveCountry,ActiveProvince,Activecity,ActiveDistrict,ActiveStreet,ActiveOperatorId,Active_releaseId,Active_EMUIVersion,Active_operaSysVersion,Active_BacVerNumber,Active_BacFlashVer,Active_webUIVersion,Active_webUITypeCarrVer,Active_webTypeDataVerNumber,Active_operatorsVersion,Active_phonePADPartitionedVersions,Latest_YEAR,Latest_MONTH,Latest_DAY,Latest_HOUR,Latest_areaId,Latest_country,Latest_province,Latest_city,Latest_district,Latest_street,Latest_releaseId,Latest_EMUIVersion,Latest_operaSysVersion,Latest_BacVerNumber,Latest_BacFlashVer,Latest_webUIVersion,Latest_webUITypeCarrVer,Latest_webTypeDataVerNumber,Latest_oper atorsVersion,Latest_phonePADPartitionedVersions,Latest_operatorId,gamePointId,gamePointDescription from TCarbon order by imei,deviceInformationId,MAC,deviceColor,device_backColor,modelId,marketName,AMSize,ROMSize,CUPAudit,CPIClocked,series,productionDate,bomCode,internalModels,deliveryTime,channelsId,channelsName,deliveryAreaId,deliveryCountry,deliveryProvince,deliveryCity,deliveryDistrict,deliveryStreet,oxSingleNumber,contractNumber,ActiveCheckTime,ActiveAreaId,ActiveCountry,ActiveProvince,Activecity,ActiveDistrict,ActiveStreet,ActiveOperatorId,Active_releaseId,Active_EMUIVersion,Active_operaSysVersion,Active_BacVerNumber,Active_BacFlashVer,Active_webUIVersion,Active_webUITypeCarrVer,Active_webTypeDataVerNumber,Active_operatorsVersion,Active_phonePADPartitionedVersions,Latest_YEAR,Latest_MONTH,Latest_DAY,Latest_HOUR,Latest_areaId,Latest_country,Latest_province,Latest_city,Latest_district,Latest_street,Latest_releaseId,Latest_EMUIVersion,Latest_operaSysVersion,Latest_BacVerNumber,La test_BacFlashVer,Latest_webUIVersion,Latest_webUITypeCarrVer,Latest_webTypeDataVerNumber,Latest_operatorsVersion,Latest_phonePADPartitionedVersions,Latest_operatorId,gamePointId,gamePointDescription") - ) + ) CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, timeStampPropOrig) } test("insert from hive-sum expression") { http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestBatchSortDataLoad.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestBatchSortDataLoad.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestBatchSortDataLoad.scala index 0ac689b..77ad466 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestBatchSortDataLoad.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestBatchSortDataLoad.scala @@ -28,7 +28,6 @@ import org.apache.spark.sql.test.util.QueryTest class TestBatchSortDataLoad extends QueryTest with BeforeAndAfterAll { var filePath: String = _ - def buildTestData() = { filePath = s"${integrationPath}/spark-common-test/target/big.csv" val file = new File(filePath) @@ -78,6 +77,7 @@ class TestBatchSortDataLoad extends QueryTest with BeforeAndAfterAll { | CREATE TABLE carbon_load1(c1 string, c2 string, c3 string, c4 string, c5 string, | c6 string, c7 int, c8 int, c9 int, c10 int) | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('dictionary_include'='c1,c2,c3,c4,c5,c6') """.stripMargin) sql(s"LOAD DATA LOCAL INPATH '$filePath' into table carbon_load1 " + @@ -159,6 +159,7 @@ class TestBatchSortDataLoad extends QueryTest with BeforeAndAfterAll { | CREATE TABLE carbon_load3(c1 string, c2 string, c3 string, c4 string, c5 string, | c6 string, c7 int, c8 int, c9 int, c10 int) | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('dictionary_include'='c1,c2,c3,c4,c5,c6') """.stripMargin) sql(s"LOAD DATA LOCAL INPATH '$filePath' into table carbon_load3 " + @@ -181,6 +182,7 @@ class TestBatchSortDataLoad extends QueryTest with BeforeAndAfterAll { | CREATE TABLE carbon_load4(c1 string, c2 string, c3 string, c4 string, c5 string, | c6 string, c7 int, c8 int, c9 int, c10 int) | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('dictionary_include'='c1,c2,c3,c4,c5,c6') """.stripMargin) sql(s"LOAD DATA LOCAL INPATH '$filePath' into table carbon_load4 " ) @@ -201,6 +203,7 @@ class TestBatchSortDataLoad extends QueryTest with BeforeAndAfterAll { | CREATE TABLE carbon_load6(c1 string, c2 string, c3 string, c4 string, c5 string, | c6 string, c7 int, c8 int, c9 int, c10 int) | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('dictionary_include'='c1,c2,c3,c4,c5,c6') """.stripMargin) sql(s"LOAD DATA LOCAL INPATH '$filePath' into table carbon_load6 " ) http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataUseAllDictionary.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataUseAllDictionary.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataUseAllDictionary.scala index c8d47e9..8e16ba9 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataUseAllDictionary.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataUseAllDictionary.scala @@ -29,6 +29,7 @@ class TestLoadDataUseAllDictionary extends QueryTest with BeforeAndAfterAll{ (ID Int, date Timestamp, country String, name String, phonetype String, serialname String, salary Int) STORED BY 'carbondata' + TBLPROPERTIES('dictionary_include'='country,name,phonetype,serialname') """) } http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala index eb155d4..de2f42f 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala @@ -105,9 +105,8 @@ class TestDDLForPartitionTable extends QueryTest with BeforeAndAfterAll { assert(partitionInfo != null) assert(partitionInfo.getColumnSchemaList.get(0).getColumnName.equalsIgnoreCase("workgroupcategory")) assert(partitionInfo.getColumnSchemaList.get(0).getDataType == DataType.STRING) - assert(partitionInfo.getColumnSchemaList.get(0).getEncodingList.size == 2) - assert(partitionInfo.getColumnSchemaList.get(0).getEncodingList.get(0) == Encoding.DICTIONARY) - assert(partitionInfo.getColumnSchemaList.get(0).getEncodingList.get(1) == Encoding.INVERTED_INDEX) + assert(partitionInfo.getColumnSchemaList.get(0).getEncodingList.size == 1) + assert(partitionInfo.getColumnSchemaList.get(0).getEncodingList.get(0) == Encoding.INVERTED_INDEX) assert(partitionInfo.getPartitionType == PartitionType.LIST) assert(partitionInfo.getListInfo.size == 3) assert(partitionInfo.getListInfo.get(0).size == 1) http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala index b039a01..2704d23 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumns.scala @@ -71,7 +71,7 @@ class TestSortColumns extends QueryTest with BeforeAndAfterAll { } test("create table with dictionary sort_columns") { - sql("CREATE TABLE sorttable2 (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='empname')") + sql("CREATE TABLE sorttable2 (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='empname', 'dictionary_include'='empname')") sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE sorttable2 OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '\"')""") checkAnswer(sql("select empname from sorttable2"),sql("select empname from origintable1")) } @@ -177,7 +177,7 @@ class TestSortColumns extends QueryTest with BeforeAndAfterAll { } test("filter on sort_columns include no-dictionary, direct-dictionary and dictioanry") { - sql("CREATE TABLE sorttable6 (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='workgroupcategory, doj, empname')") + sql("CREATE TABLE sorttable6 (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='workgroupcategory, doj, empname', 'dictionary_include'='empname')") sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE sorttable6 OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '\"')""") // no dictionary checkAnswer(sql("select * from sorttable6 where workgroupcategory = 1"), sql("select * from origintable1 where workgroupcategory = 1 order by doj")) http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumnsWithUnsafe.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumnsWithUnsafe.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumnsWithUnsafe.scala index 0e0811a..e98b701 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumnsWithUnsafe.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/sortcolumns/TestSortColumnsWithUnsafe.scala @@ -34,11 +34,11 @@ class TestSortColumnsWithUnsafe extends QueryTest with BeforeAndAfterAll { test("create table with no dictionary sort_columns") { sql("CREATE TABLE sorttable1 (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='empno')") sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE sorttable1 OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '\"')""") - checkAnswer(sql("select empno from sorttable1"), sql("select empno from sorttable1 order by empno")) + checkAnswer(sql("select empno from sorttable1"), sql("select empno from origintable1 order by empno")) } test("create table with dictionary sort_columns") { - sql("CREATE TABLE sorttable2 (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='empname')") + sql("CREATE TABLE sorttable2 (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='empname', 'dictionary_include'='empname')") sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE sorttable2 OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '\"')""") checkAnswer(sql("select empname from sorttable2"),sql("select empname from origintable1")) } @@ -144,7 +144,7 @@ class TestSortColumnsWithUnsafe extends QueryTest with BeforeAndAfterAll { } test("filter on sort_columns include no-dictionary, direct-dictionary and dictioanry") { - sql("CREATE TABLE sorttable6 (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='workgroupcategory, doj, empname')") + sql("CREATE TABLE sorttable6 (empno int, empname String, designation String, doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,salary int) STORED BY 'org.apache.carbondata.format' tblproperties('sort_columns'='workgroupcategory, doj, empname', 'dictionary_include'='empname')") sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE sorttable6 OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '\"')""") // no dictionary checkAnswer(sql("select * from sorttable6 where workgroupcategory = 1"), sql("select * from origintable1 where workgroupcategory = 1 order by doj")) http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala index d0f9362..a7b8143 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala @@ -153,8 +153,6 @@ case class DictionaryLoadModel(table: CarbonTableIdentifier, isComplexes: Array[Boolean], primDimensions: Array[CarbonDimension], delimiters: Array[String], - highCardIdentifyEnable: Boolean, - highCardThreshold: Int, columnIdentifier: Array[ColumnIdentifier], isFirstLoad: Boolean, hdfsTempLocation: String, @@ -329,18 +327,17 @@ class CarbonBlockDistinctValuesCombineRDD( class CarbonGlobalDictionaryGenerateRDD( prev: RDD[(Int, ColumnDistinctValues)], model: DictionaryLoadModel) - extends CarbonRDD[(Int, String, Boolean)](prev) { + extends CarbonRDD[(Int, String)](prev) { override def getPartitions: Array[Partition] = firstParent[(Int, ColumnDistinctValues)].partitions override def internalCompute(split: Partition, - context: TaskContext): Iterator[(Int, String, Boolean)] = { + context: TaskContext): Iterator[(Int, String)] = { val LOGGER = LogServiceFactory.getLogService(this.getClass.getName) CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION, model.hdfsLocation) val status = CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS - var isHighCardinalityColumn = false - val iter = new Iterator[(Int, String, Boolean)] { + val iter = new Iterator[(Int, String)] { var dictionaryForDistinctValueLookUp: Dictionary = _ var dictionaryForSortIndexWriting: Dictionary = _ var dictionaryForDistinctValueLookUpCleared: Boolean = false @@ -375,90 +372,70 @@ class CarbonGlobalDictionaryGenerateRDD( val distinctValueList = rddIter.next()._2 valuesBuffer ++= distinctValueList.values rowCount += distinctValueList.rowCount - // check high cardinality - if (model.isFirstLoad && model.highCardIdentifyEnable - && !model.isComplexes(split.index) - && model.primDimensions(split.index).isColumnar) { - isHighCardinalityColumn = GlobalDictionaryUtil.isHighCardinalityColumn( - valuesBuffer.size, model) - if (isHighCardinalityColumn) { - break - } - } } } val combineListTime = System.currentTimeMillis() - t1 - if (isHighCardinalityColumn) { - LOGGER.info(s"column ${ model.table.getTableUniqueName }." + - s"${ - model.primDimensions(split.index) - .getColName - } is high cardinality column") + isDictionaryLocked = dictLock.lockWithRetries() + if (isDictionaryLocked) { + logInfo(s"Successfully able to get the dictionary lock for ${ + model.primDimensions(split.index).getColName + }") } else { - isDictionaryLocked = dictLock.lockWithRetries() - if (isDictionaryLocked) { - logInfo(s"Successfully able to get the dictionary lock for ${ + sys.error(s"Dictionary file ${ model.primDimensions(split.index).getColName - }") - } else { - sys - .error(s"Dictionary file ${ - model.primDimensions(split.index).getColName - } is locked for updation. Please try after some time") - } - val t2 = System.currentTimeMillis - val fileType = FileFactory.getFileType(model.dictFilePaths(split.index)) - model.dictFileExists(split.index) = FileFactory - .isFileExist(model.dictFilePaths(split.index), fileType) - dictionaryForDistinctValueLookUp = if (model.dictFileExists(split.index)) { - CarbonLoaderUtil.getDictionary(model.table, - model.columnIdentifier(split.index), - model.hdfsLocation, - model.primDimensions(split.index).getDataType - ) - } else { - null - } - val dictCacheTime = System.currentTimeMillis - t2 - val t3 = System.currentTimeMillis() - val dictWriteTask = new DictionaryWriterTask(valuesBuffer, - dictionaryForDistinctValueLookUp, - model.table, + } is locked for updation. Please try after some time") + } + val t2 = System.currentTimeMillis + val fileType = FileFactory.getFileType(model.dictFilePaths(split.index)) + val isDictFileExists = FileFactory.isFileExist(model.dictFilePaths(split.index), fileType) + dictionaryForDistinctValueLookUp = if (isDictFileExists) { + CarbonLoaderUtil.getDictionary(model.table, model.columnIdentifier(split.index), model.hdfsLocation, - model.primDimensions(split.index).getColumnSchema, - model.dictFileExists(split.index) + model.primDimensions(split.index).getDataType ) - // execute dictionary writer task to get distinct values - val distinctValues = dictWriteTask.execute() - val dictWriteTime = System.currentTimeMillis() - t3 - val t4 = System.currentTimeMillis() - // if new data came than rewrite sort index file - if (distinctValues.size() > 0) { - val sortIndexWriteTask = new SortIndexWriterTask(model.table, - model.columnIdentifier(split.index), - model.primDimensions(split.index).getDataType, - model.hdfsLocation, - dictionaryForDistinctValueLookUp, - distinctValues) - sortIndexWriteTask.execute() - } - val sortIndexWriteTime = System.currentTimeMillis() - t4 - CarbonTimeStatisticsFactory.getLoadStatisticsInstance.recordDicShuffleAndWriteTime() - // After sortIndex writing, update dictionaryMeta - dictWriteTask.updateMetaData() - // clear the value buffer after writing dictionary data - valuesBuffer.clear - CarbonUtil.clearDictionaryCache(dictionaryForDistinctValueLookUp) - dictionaryForDistinctValueLookUpCleared = true - LOGGER.info(s"\n columnName: ${ model.primDimensions(split.index).getColName }" + - s"\n columnId: ${ model.primDimensions(split.index).getColumnId }" + - s"\n new distinct values count: ${ distinctValues.size() }" + - s"\n combine lists: $combineListTime" + - s"\n create dictionary cache: $dictCacheTime" + - s"\n sort list, distinct and write: $dictWriteTime" + - s"\n write sort info: $sortIndexWriteTime") + } else { + null + } + val dictCacheTime = System.currentTimeMillis - t2 + val t3 = System.currentTimeMillis() + val dictWriteTask = new DictionaryWriterTask(valuesBuffer, + dictionaryForDistinctValueLookUp, + model.table, + model.columnIdentifier(split.index), + model.hdfsLocation, + model.primDimensions(split.index).getColumnSchema, + isDictFileExists + ) + // execute dictionary writer task to get distinct values + val distinctValues = dictWriteTask.execute() + val dictWriteTime = System.currentTimeMillis() - t3 + val t4 = System.currentTimeMillis() + // if new data came than rewrite sort index file + if (distinctValues.size() > 0) { + val sortIndexWriteTask = new SortIndexWriterTask(model.table, + model.columnIdentifier(split.index), + model.primDimensions(split.index).getDataType, + model.hdfsLocation, + dictionaryForDistinctValueLookUp, + distinctValues) + sortIndexWriteTask.execute() } + val sortIndexWriteTime = System.currentTimeMillis() - t4 + CarbonTimeStatisticsFactory.getLoadStatisticsInstance.recordDicShuffleAndWriteTime() + // After sortIndex writing, update dictionaryMeta + dictWriteTask.updateMetaData() + // clear the value buffer after writing dictionary data + valuesBuffer.clear + CarbonUtil.clearDictionaryCache(dictionaryForDistinctValueLookUp) + dictionaryForDistinctValueLookUpCleared = true + LOGGER.info(s"\n columnName: ${ model.primDimensions(split.index).getColName }" + + s"\n columnId: ${ model.primDimensions(split.index).getColumnId }" + + s"\n new distinct values count: ${ distinctValues.size() }" + + s"\n combine lists: $combineListTime" + + s"\n create dictionary cache: $dictCacheTime" + + s"\n sort list, distinct and write: $dictWriteTime" + + s"\n write sort info: $sortIndexWriteTime") } catch { case ex: Exception => LOGGER.error(ex) @@ -492,8 +469,8 @@ class CarbonGlobalDictionaryGenerateRDD( } } - override def next(): (Int, String, Boolean) = { - (split.index, status, isHighCardinalityColumn) + override def next(): (Int, String) = { + (split.index, status) } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala index bf41dca..e1c564d 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala @@ -278,11 +278,6 @@ object GlobalDictionaryUtil { } } - def isHighCardinalityColumn(columnCardinality: Int, - model: DictionaryLoadModel): Boolean = { - columnCardinality > model.highCardThreshold - } - /** * create a instance of DictionaryLoadModel * @@ -319,14 +314,6 @@ object GlobalDictionaryUtil { val lockType = CarbonProperties.getInstance .getProperty(CarbonCommonConstants.LOCK_TYPE, CarbonCommonConstants.CARBON_LOCK_TYPE_HDFS) val zookeeperUrl = CarbonProperties.getInstance.getProperty(CarbonCommonConstants.ZOOKEEPER_URL) - // load high cardinality identify configure - val highCardIdentifyEnable = CarbonProperties.getInstance().getProperty( - CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE, - CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE_DEFAULT).toBoolean - val highCardThreshold = CarbonProperties.getInstance().getProperty( - CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD, - CarbonCommonConstants.HIGH_CARDINALITY_THRESHOLD_DEFAULT).toInt - val serializationNullFormat = carbonLoadModel.getSerializationNullFormat.split(CarbonCommonConstants.COMMA, 2)(1) // get load count @@ -342,8 +329,6 @@ object GlobalDictionaryUtil { isComplexes.toArray, primDimensions, carbonLoadModel.getDelimiters, - highCardIdentifyEnable, - highCardThreshold, columnIdentifier, carbonLoadModel.getLoadMetadataDetails.size() == 0, hdfsTempLocation, @@ -394,9 +379,8 @@ object GlobalDictionaryUtil { private def checkStatus(carbonLoadModel: CarbonLoadModel, sqlContext: SQLContext, model: DictionaryLoadModel, - status: Array[(Int, String, Boolean)]) = { + status: Array[(Int, String)]) = { var result = false - val noDictionaryColumns = new ArrayBuffer[CarbonDimension] val tableName = model.table.getTableName status.foreach { x => val columnName = model.primDimensions(x._1).getColName @@ -404,12 +388,6 @@ object GlobalDictionaryUtil { result = true LOGGER.error(s"table:$tableName column:$columnName generate global dictionary file failed") } - if (x._3) { - noDictionaryColumns += model.primDimensions(x._1) - } - } - if (noDictionaryColumns.nonEmpty) { - updateTableMetadataFunc(carbonLoadModel, sqlContext, model, noDictionaryColumns.toArray) } if (result) { LOGGER.error("generate global dictionary files failed") http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala index 3704f8a..a309beb 100644 --- a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala +++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala @@ -573,6 +573,8 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser { // All excluded cols should be there in create table cols if (tableProperties.get(CarbonCommonConstants.DICTIONARY_EXCLUDE).isDefined) { + LOGGER.warn("dictionary_exclude option was deprecated, " + + "by default string column does not use global dictionary.") dictExcludeCols = tableProperties.get(CarbonCommonConstants.DICTIONARY_EXCLUDE).get.split(',').map(_.trim) dictExcludeCols @@ -618,19 +620,17 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser { } } - // by default consider all String cols as dims and if any dictionary exclude is present then + // by default consider all String cols as dims and if any dictionary include isn't present then // add it to noDictionaryDims list. consider all dictionary excludes/include cols as dims fields.foreach { field => - if (dictExcludeCols.toSeq.exists(x => x.equalsIgnoreCase(field.column))) { - val dataType = DataTypeUtil.getDataType(field.dataType.get.toUpperCase()) - if (dataType != DataType.TIMESTAMP && dataType != DataType.DATE) { - noDictionaryDims :+= field.column - } - dimFields += field - } else if (dictIncludeCols.exists(x => x.equalsIgnoreCase(field.column))) { + if (dictIncludeCols.exists(x => x.equalsIgnoreCase(field.column))) { dimFields += field } else if (isDetectAsDimentionDatatype(field.dataType.get)) { dimFields += field + // consider all String cols as noDicitonaryDims by default + if (DataType.STRING.getName.equalsIgnoreCase(field.dataType.get)) { + noDictionaryDims :+= field.column + } } else if (sortKeyDimsTmp.exists(x => x.equalsIgnoreCase(field.column))) { noDictionaryDims :+= field.column dimFields += field http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/integration/spark/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala index c149152..6ed826f 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala @@ -16,8 +16,6 @@ */ package org.apache.carbondata.spark.util -import java.io.File - import org.apache.spark.sql.test.util.QueryTest import org.apache.spark.sql.{CarbonEnv, CarbonRelation} import org.scalatest.BeforeAndAfterAll @@ -82,7 +80,8 @@ class AllDictionaryTestCase extends QueryTest with BeforeAndAfterAll { try { sql( "CREATE TABLE IF NOT EXISTS sample (id STRING, name STRING, city STRING, " + - "age INT) STORED BY 'org.apache.carbondata.format'" + "age INT) STORED BY 'org.apache.carbondata.format' " + + "tblproperties('dictionary_include'='city')" ) } catch { case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) @@ -95,7 +94,7 @@ class AllDictionaryTestCase extends QueryTest with BeforeAndAfterAll { "ActiveProvince: string, Activecity: string, ActiveDistrict: string, ActiveStreet: " + "string>>, proddate struct<productionDate: string,activeDeactivedate: array<string>>, " + "gamePointId INT,contractNumber INT) STORED BY 'org.apache.carbondata.format'" + - "TBLPROPERTIES('DICTIONARY_EXCLUDE'='ROMSize')" + "TBLPROPERTIES('DICTIONARY_EXCLUDE'='ROMSize', 'dictionary_include'='channelsId')" ) } catch { case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/integration/spark/src/test/scala/org/apache/carbondata/spark/util/AutoHighCardinalityIdentifyTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/AutoHighCardinalityIdentifyTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/AutoHighCardinalityIdentifyTestCase.scala deleted file mode 100644 index 7842b11..0000000 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/AutoHighCardinalityIdentifyTestCase.scala +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.carbondata.spark.util - -import java.io.{BufferedWriter, File, FileWriter} -import java.util.Random - -import org.apache.spark.sql.test.util.QueryTest -import org.apache.spark.sql.{CarbonEnv, CarbonRelation} -import org.scalatest.BeforeAndAfterAll -import org.apache.carbondata.core.metadata.encoder.Encoding -import org.apache.carbondata.core.metadata.schema.table.CarbonTable -import org.apache.carbondata.core.util.path.CarbonStorePath -import org.apache.carbondata.core.metadata.CarbonTableIdentifier -import org.apache.carbondata.core.constants.CarbonCommonConstants -import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil} -import org.apache.carbondata.processing.model.{CarbonDataLoadSchema, CarbonLoadModel} - -/** - * Test Case for org.apache.carbondata.spark.util.GlobalDictionaryUtil - */ -class AutoHighCardinalityIdentifyTestCase extends QueryTest with BeforeAndAfterAll { - - var filePath: String = _ - - def buildCarbonLoadModel(relation: CarbonRelation, - filePath: String, - header: String): CarbonLoadModel = { - val carbonLoadModel = new CarbonLoadModel - carbonLoadModel.setTableName(relation.tableMeta.carbonTableIdentifier.getDatabaseName) - carbonLoadModel.setDatabaseName(relation.tableMeta.carbonTableIdentifier.getTableName) - // carbonLoadModel.setSchema(relation.tableMeta.schema) - val table = relation.tableMeta.carbonTable - val carbonSchema = new CarbonDataLoadSchema(table) - carbonLoadModel.setDatabaseName(table.getDatabaseName) - carbonLoadModel.setTableName(table.getFactTableName) - carbonLoadModel.setCarbonDataLoadSchema(carbonSchema) - carbonLoadModel.setFactFilePath(filePath) - carbonLoadModel.setCsvHeader(header) - carbonLoadModel.setCsvDelimiter(",") - carbonLoadModel.setComplexDelimiterLevel1("\\$") - carbonLoadModel.setComplexDelimiterLevel2("\\:") - carbonLoadModel.setDefaultTimestampFormat(CarbonProperties.getInstance().getProperty( - CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, - CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)) - carbonLoadModel.setCsvHeaderColumns(CommonUtil.getCsvHeaderColumns(carbonLoadModel)) - carbonLoadModel - } - - override def beforeAll { - buildTestData - buildTable - } - - override def afterAll { - sql("drop table if exists highcard") - sql("drop table if exists colgrp_highcard") - } - - def buildTestData() = { - filePath = s"${integrationPath}/spark/target/highcarddata.csv" - val file = new File(filePath) - val writer = new BufferedWriter(new FileWriter(file)) - writer.write("hc1,c2,c3") - writer.newLine() - var i = 0 - val random = new Random - for(i <- 0 until 2000000) { - writer.write("a" + i + "," + - "b" + i%1000 + "," + - i%1000000 + "\n") - if ( i % 10000 == 0) { - writer.flush() - } - } - writer.close - } - - def buildTable() = { - try { - sql("drop table if exists highcard") - sql("""create table if not exists highcard - (hc1 string, c2 string, c3 int) - STORED BY 'org.apache.carbondata.format'""") - } catch { - case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) - } - } - - def relation(tableName: String): CarbonRelation = { - CarbonEnv.get.carbonMetastore - .lookupRelation1(Option(CarbonCommonConstants.DATABASE_DEFAULT_NAME), - tableName)(sqlContext) - .asInstanceOf[CarbonRelation] - } - - private def checkDictFile(table: CarbonTable) = { - val tableIdentifier = new CarbonTableIdentifier(table.getDatabaseName, - table.getFactTableName, "1") - val carbonTablePath = CarbonStorePath.getCarbonTablePath(storeLocation, - tableIdentifier) - val newHc1 = table.getDimensionByName("highcard", "hc1") - val newC2 = table.getDimensionByName("highcard", "c2") - val dictFileHc1 = carbonTablePath.getDictionaryFilePath(newHc1.getColumnId) - val dictFileC2 = carbonTablePath.getDictionaryFilePath(newC2.getColumnId) - assert(!CarbonUtil.isFileExists(dictFileHc1)) - assert(CarbonUtil.isFileExists(dictFileC2)) - } - - private def checkMetaData(oldTable: CarbonTable, newTable: CarbonTable) = { - val oldHc1 = oldTable.getDimensionByName("highcard", "hc1") - val oldc2 = oldTable.getDimensionByName("highcard", "c2") - val newHc1 = newTable.getDimensionByName("highcard", "hc1") - val newC2 = newTable.getDimensionByName("highcard", "c2") - assert(oldHc1.hasEncoding(Encoding.DICTIONARY)) - assert(oldc2.hasEncoding(Encoding.DICTIONARY)) - assert(!newHc1.hasEncoding(Encoding.DICTIONARY)) - assert(newC2.hasEncoding(Encoding.DICTIONARY)) - } - - test("auto identify high cardinality column in first load #396") { - val oldTable = relation("highcard").tableMeta.carbonTable - sql(s"LOAD DATA LOCAL INPATH '$filePath' into table highcard") - val newTable = relation("highcard").tableMeta.carbonTable - sql(s"select count(hc1) from highcard") - - // check dictionary file - checkDictFile(newTable) - // check the meta data - checkMetaData(oldTable, newTable) - } - -} http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala index 706d9d6..027086e 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala @@ -73,7 +73,7 @@ class ExternalColumnDictionaryTestCase extends QueryTest with BeforeAndAfterAll proddate struct<productionDate:string,activeDeactivedate:array<string>>, gamePointId double,contractNumber double) STORED BY 'org.apache.carbondata.format' - TBLPROPERTIES('DICTIONARY_INCLUDE' = 'deviceInformationId') + TBLPROPERTIES('DICTIONARY_INCLUDE' = 'deviceInformationId, channelsId') """) } catch { case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) @@ -83,7 +83,7 @@ class ExternalColumnDictionaryTestCase extends QueryTest with BeforeAndAfterAll sql("""CREATE TABLE verticalDelimitedTable (deviceInformationId int, channelsId string,contractNumber double) STORED BY 'org.apache.carbondata.format' - TBLPROPERTIES('DICTIONARY_INCLUDE' = 'deviceInformationId') + TBLPROPERTIES('DICTIONARY_INCLUDE' = 'deviceInformationId, channelsId') """) } catch { case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala index e95b9cd..a35f88b 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala @@ -76,7 +76,8 @@ class GlobalDictionaryUtilConcurrentTestCase extends QueryTest with BeforeAndAft def buildTable() = { try { sql( - "CREATE TABLE IF NOT EXISTS employee (empid STRING) STORED BY 'org.apache.carbondata.format'") + "CREATE TABLE IF NOT EXISTS employee (empid STRING) STORED BY 'org.apache.carbondata.format' " + + "tblproperties('dictionary_include'='empid')") } catch { case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) } http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/integration/spark/src/test/scala/org/apache/spark/sql/TestCarbonSqlParser.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/apache/spark/sql/TestCarbonSqlParser.scala b/integration/spark/src/test/scala/org/apache/spark/sql/TestCarbonSqlParser.scala index e79aa80..13d4167 100644 --- a/integration/spark/src/test/scala/org/apache/spark/sql/TestCarbonSqlParser.scala +++ b/integration/spark/src/test/scala/org/apache/spark/sql/TestCarbonSqlParser.scala @@ -28,16 +28,6 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants */ private class TestCarbonSqlParserStub extends CarbonSqlParser { - //val parser:CarbonSqlDDLParser = new CarbonSqlDDLParser() - - def updateColumnGroupsInFieldTest(fields: Seq[Field], tableProperties: Map[String, String]): Seq[String] = { - - var (dims, msrs, noDictionaryDims, sortkey) = extractDimAndMsrFields(fields, tableProperties) - - updateColumnGroupsInField(tableProperties, - noDictionaryDims, msrs, dims) - } - def extractDimAndMsrFieldsTest(fields: Seq[Field], tableProperties: Map[String, String]): (Seq[Field], Seq[Field], Seq[String], Seq[String]) = { extractDimAndMsrFields(fields, tableProperties) @@ -78,113 +68,6 @@ class TestCarbonSqlParser extends QueryTest { fields } - // Testing the column group Splitting method. - test("Test-updateColumnGroupsInField") { - val colGroupStr = "(col2,col3),(col5,col6),(col7,col8)" - val tableProperties = Map(CarbonCommonConstants.COLUMN_GROUPS -> colGroupStr) - val fields: Seq[Field] = loadAllFields - val stub = new TestCarbonSqlParserStub() - val colgrps = stub.updateColumnGroupsInFieldTest(fields, tableProperties) - assert(colgrps.lift(0).get.equalsIgnoreCase("col2,col3")) - assert(colgrps.lift(1).get.equalsIgnoreCase("col5,col6")) - assert(colgrps.lift(2).get.equalsIgnoreCase("col7,col8")) - - } - test("Test-updateColumnGroupsInField_disordered") { - val colGroupStr = "(col5,col6),(col2,col3),(col7,col8)" - val tableProperties = Map(CarbonCommonConstants.COLUMN_GROUPS -> colGroupStr) - var fields: Seq[Field] = loadAllFields - val stub = new TestCarbonSqlParserStub() - val colgrps = stub.updateColumnGroupsInFieldTest(fields, tableProperties) - assert(colgrps.lift(0).get.equalsIgnoreCase("col2,col3")) - assert(colgrps.lift(1).get.equalsIgnoreCase("col5,col6")) - assert(colgrps.lift(2).get.equalsIgnoreCase("col7,col8")) - - } - test("Test-ColumnGroupsInvalidField_Shouldnotallow") { - val colGroupStr = "(col1,col2),(col10,col6),(col7,col8)" - val tableProperties = Map(CarbonCommonConstants.COLUMN_GROUPS -> colGroupStr) - val fields: Seq[Field] = loadAllFields - val stub = new TestCarbonSqlParserStub() - try { - val colgrps = stub.updateColumnGroupsInFieldTest(fields, tableProperties) - assert(false) - } catch { - case e: Exception => assert(true) - } - } - test("Test-MeasureInColumnGroup_ShouldNotAllow") { - //col1 is measure - val colGroupStr = "(col1,col2),(col5,col6),(col7,col8)" - val tableProperties = Map(CarbonCommonConstants.COLUMN_GROUPS -> colGroupStr) - val fields: Seq[Field] = loadAllFields - val stub = new TestCarbonSqlParserStub() - try { - val colgrps = stub.updateColumnGroupsInFieldTest(fields, tableProperties) - assert(false) - } catch { - case e: Exception => assert(true) - } - } - test("Test-NoDictionaryInColumnGroup_ShouldNotAllow") { - //col5 is no dictionary - val colGroupStr = "(col2,col3),(col5,col6),(col7,col8)" - val noDictStr = "col5" - val tableProperties = Map(CarbonCommonConstants.COLUMN_GROUPS -> colGroupStr, CarbonCommonConstants.DICTIONARY_EXCLUDE -> noDictStr) - val fields: Seq[Field] = loadAllFields - val stub = new TestCarbonSqlParserStub() - try { - val colgrps = stub.updateColumnGroupsInFieldTest(fields, tableProperties) - assert(false) - } catch { - case e: Exception => assert(true) - } - } - test("Test-SameColumnInDifferentGroup_ShouldNotAllow") { - val colGroupStr = "(col2,col3),(col5,col6),(col6,col7,col8)" - val tableProperties = Map(CarbonCommonConstants.COLUMN_GROUPS -> colGroupStr) - val fields: Seq[Field] = loadAllFields - val stub = new TestCarbonSqlParserStub() - try { - val colgrps = stub.updateColumnGroupsInFieldTest(fields, tableProperties) - assert(false) - } catch { - case e: Exception => assert(true) - } - } - - test("Test-ColumnAreNotTogetherAsInSchema_ShouldNotAllow") { - val colGroupStr = "(col2,col3),(col5,col8)" - val tableProperties = Map(CarbonCommonConstants.COLUMN_GROUPS -> colGroupStr) - val fields: Seq[Field] = loadAllFields - val stub = new TestCarbonSqlParserStub() - try { - val colgrps = stub.updateColumnGroupsInFieldTest(fields, tableProperties) - assert(false) - } catch { - case e: Exception => assert(true) - } - } - test("Test-ColumnInColumnGroupAreShuffledButInSequence") { - val colGroupStr = "(col2,col3),(col7,col8,col6)" - val tableProperties = Map(CarbonCommonConstants.COLUMN_GROUPS -> colGroupStr) - val fields: Seq[Field] = loadAllFields - val stub = new TestCarbonSqlParserStub() - - val colgrps = stub.updateColumnGroupsInFieldTest(fields, tableProperties) - assert(colgrps.lift(0).get.equalsIgnoreCase("col2,col3")) - assert(colgrps.lift(1).get.equalsIgnoreCase("col6,col7,col8")) - } - // Testing the column group Splitting method with empty table properties so null will be returned. - test("Test-Empty-updateColumnGroupsInField") { - val tableProperties = Map("" -> "") - val fields: Seq[Field] = loadAllFields - val stub = new TestCarbonSqlParserStub() - val colgrps = stub.updateColumnGroupsInFieldTest(fields, Map()) - //assert( rtn === 1) - assert(null == colgrps) - } - // Testing the extracting of Dims and no Dictionary test("Test-extractDimColsAndNoDictionaryFields") { val tableProperties = Map(CarbonCommonConstants.DICTIONARY_EXCLUDE -> "col2", CarbonCommonConstants.DICTIONARY_INCLUDE -> "col4") @@ -203,8 +86,15 @@ class TestCarbonSqlParser extends QueryTest { assert(dimCols.lift(3).get.column.equalsIgnoreCase("col4")) //No dictionary column names will be available in noDictionary list - assert(noDictionary.size == 1) - assert(noDictionary.lift(0).get.equalsIgnoreCase("col2")) + assert(noDictionary.size == 7) + assert(noDictionary.lift(0).get.equalsIgnoreCase("col1")) + assert(noDictionary.lift(1).get.equalsIgnoreCase("col2")) + assert(noDictionary.lift(2).get.equalsIgnoreCase("col3")) + assert(noDictionary.lift(3).get.equalsIgnoreCase("col5")) + assert(noDictionary.lift(4).get.equalsIgnoreCase("col6")) + assert(noDictionary.lift(5).get.equalsIgnoreCase("col7")) + assert(noDictionary.lift(6).get.equalsIgnoreCase("col8")) + } test("Test-DimAndMsrColsWithNoDictionaryFields1") { @@ -220,8 +110,14 @@ class TestCarbonSqlParser extends QueryTest { assert(dimCols.lift(2).get.column.equalsIgnoreCase("col3")) //below column names will be available in noDictionary list - assert(noDictionary.size == 1) + assert(noDictionary.size == 7) assert(noDictionary.lift(0).get.equalsIgnoreCase("col1")) + assert(noDictionary.lift(1).get.equalsIgnoreCase("col2")) + assert(noDictionary.lift(2).get.equalsIgnoreCase("col3")) + assert(noDictionary.lift(3).get.equalsIgnoreCase("col5")) + assert(noDictionary.lift(4).get.equalsIgnoreCase("col6")) + assert(noDictionary.lift(5).get.equalsIgnoreCase("col7")) + assert(noDictionary.lift(6).get.equalsIgnoreCase("col8")) //check msr assert(msrCols.size == 1) @@ -241,7 +137,13 @@ class TestCarbonSqlParser extends QueryTest { assert(dimCols.lift(2).get.column.equalsIgnoreCase("col3")) //below column names will be available in noDictionary list - assert(noDictionary.size == 0) + assert(noDictionary.size == 6) + assert(noDictionary.lift(0).get.equalsIgnoreCase("col2")) + assert(noDictionary.lift(1).get.equalsIgnoreCase("col3")) + assert(noDictionary.lift(2).get.equalsIgnoreCase("col5")) + assert(noDictionary.lift(3).get.equalsIgnoreCase("col6")) + assert(noDictionary.lift(4).get.equalsIgnoreCase("col7")) + assert(noDictionary.lift(5).get.equalsIgnoreCase("col8")) //check msr assert(msrCols.size == 1) @@ -263,8 +165,14 @@ class TestCarbonSqlParser extends QueryTest { assert(dimCols.lift(3).get.column.equalsIgnoreCase("col4")) //below column names will be available in noDictionary list - assert(noDictionary.size == 1) + assert(noDictionary.size == 7) assert(noDictionary.lift(0).get.equalsIgnoreCase("col1")) + assert(noDictionary.lift(1).get.equalsIgnoreCase("col2")) + assert(noDictionary.lift(2).get.equalsIgnoreCase("col3")) + assert(noDictionary.lift(3).get.equalsIgnoreCase("col5")) + assert(noDictionary.lift(4).get.equalsIgnoreCase("col6")) + assert(noDictionary.lift(5).get.equalsIgnoreCase("col7")) + assert(noDictionary.lift(6).get.equalsIgnoreCase("col8")) //check msr assert(msrCols.size == 0) @@ -283,8 +191,13 @@ class TestCarbonSqlParser extends QueryTest { assert(dimCols.lift(2).get.column.equalsIgnoreCase("col3")) //below column names will be available in noDictionary list - assert(noDictionary.size == 1) - assert(noDictionary.lift(0).get.equalsIgnoreCase("col3")) + assert(noDictionary.size == 6) + assert(noDictionary.lift(0).get.equalsIgnoreCase("col1")) + assert(noDictionary.lift(1).get.equalsIgnoreCase("col3")) + assert(noDictionary.lift(2).get.equalsIgnoreCase("col5")) + assert(noDictionary.lift(3).get.equalsIgnoreCase("col6")) + assert(noDictionary.lift(4).get.equalsIgnoreCase("col7")) + assert(noDictionary.lift(5).get.equalsIgnoreCase("col8")) //check msr assert(msrCols.size == 1) @@ -304,8 +217,13 @@ class TestCarbonSqlParser extends QueryTest { assert(dimCols.lift(2).get.column.equalsIgnoreCase("col3")) //below column names will be available in noDictionary list - assert(noDictionary.size == 1) + assert(noDictionary.size == 6) assert(noDictionary.lift(0).get.equalsIgnoreCase("col1")) + assert(noDictionary.lift(1).get.equalsIgnoreCase("col3")) + assert(noDictionary.lift(2).get.equalsIgnoreCase("col5")) + assert(noDictionary.lift(3).get.equalsIgnoreCase("col6")) + assert(noDictionary.lift(4).get.equalsIgnoreCase("col7")) + assert(noDictionary.lift(5).get.equalsIgnoreCase("col8")) //check msr assert(msrCols.size == 1) @@ -325,8 +243,13 @@ class TestCarbonSqlParser extends QueryTest { assert(dimCols.lift(2).get.column.equalsIgnoreCase("col3")) //below column names will be available in noDictionary list - assert(noDictionary.size == 1) + assert(noDictionary.size == 6) assert(noDictionary.lift(0).get.equalsIgnoreCase("col2")) + assert(noDictionary.lift(1).get.equalsIgnoreCase("col3")) + assert(noDictionary.lift(2).get.equalsIgnoreCase("col5")) + assert(noDictionary.lift(3).get.equalsIgnoreCase("col6")) + assert(noDictionary.lift(4).get.equalsIgnoreCase("col7")) + assert(noDictionary.lift(5).get.equalsIgnoreCase("col8")) //check msr assert(msrCols.size == 1) @@ -349,9 +272,13 @@ class TestCarbonSqlParser extends QueryTest { assert(dimCols.lift(3).get.column.equalsIgnoreCase("col4")) //below column names will be available in noDictionary list - assert(noDictionary.size == 2) + assert(noDictionary.size == 6) assert(noDictionary.lift(0).get.equalsIgnoreCase("col1")) assert(noDictionary.lift(1).get.equalsIgnoreCase("col2")) + assert(noDictionary.lift(2).get.equalsIgnoreCase("col5")) + assert(noDictionary.lift(3).get.equalsIgnoreCase("col6")) + assert(noDictionary.lift(4).get.equalsIgnoreCase("col7")) + assert(noDictionary.lift(5).get.equalsIgnoreCase("col8")) //check msr assert(msrCols.size == 0) @@ -369,8 +296,13 @@ class TestCarbonSqlParser extends QueryTest { assert(dimCols.lift(1).get.column.equalsIgnoreCase("col2")) //below column names will be available in noDictionary list - assert(noDictionary.size == 1) - assert(noDictionary.lift(0).get.equalsIgnoreCase("col2")) + assert(noDictionary.size == 6) + assert(noDictionary.lift(0).get.equalsIgnoreCase("col1")) + assert(noDictionary.lift(1).get.equalsIgnoreCase("col2")) + assert(noDictionary.lift(2).get.equalsIgnoreCase("col5")) + assert(noDictionary.lift(3).get.equalsIgnoreCase("col6")) + assert(noDictionary.lift(4).get.equalsIgnoreCase("col7")) + assert(noDictionary.lift(5).get.equalsIgnoreCase("col8")) //check msr assert(msrCols.size == 1) @@ -390,7 +322,6 @@ class TestCarbonSqlParser extends QueryTest { } - } http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala b/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala index 89bf839..6afa25b 100644 --- a/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala +++ b/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/AllDictionaryTestCase.scala @@ -81,7 +81,8 @@ class AllDictionaryTestCase extends Spark2QueryTest with BeforeAndAfterAll { try { sql( "CREATE TABLE IF NOT EXISTS sample (id STRING, name STRING, city STRING, " + - "age INT) STORED BY 'org.apache.carbondata.format'" + "age INT) STORED BY 'org.apache.carbondata.format' " + + "TBLPROPERTIES('dictionary_include'='city')" ) } catch { case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) @@ -94,7 +95,7 @@ class AllDictionaryTestCase extends Spark2QueryTest with BeforeAndAfterAll { "ActiveProvince: string, Activecity: string, ActiveDistrict: string, ActiveStreet: " + "string>>, proddate struct<productionDate: string,activeDeactivedate: array<string>>, " + "gamePointId INT,contractNumber INT) STORED BY 'org.apache.carbondata.format'" + - "TBLPROPERTIES('DICTIONARY_EXCLUDE'='ROMSize')" + "TBLPROPERTIES('DICTIONARY_EXCLUDE'='ROMSize', 'dictionary_include'='channelsId')" ) } catch { case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala b/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala index 7350e2d..7f6e88a 100644 --- a/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala +++ b/integration/spark2/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala @@ -76,7 +76,7 @@ class ExternalColumnDictionaryTestCase extends Spark2QueryTest with BeforeAndAft proddate struct<productionDate:string,activeDeactivedate:array<string>>, gamePointId double,contractNumber double) STORED BY 'org.apache.carbondata.format' - TBLPROPERTIES('DICTIONARY_INCLUDE' = 'deviceInformationId') + TBLPROPERTIES('DICTIONARY_INCLUDE' = 'deviceInformationId, channelsId') """) } catch { case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) @@ -87,7 +87,7 @@ class ExternalColumnDictionaryTestCase extends Spark2QueryTest with BeforeAndAft """CREATE TABLE verticalDelimitedTable (deviceInformationId int, channelsId string,contractNumber double) STORED BY 'org.apache.carbondata.format' - TBLPROPERTIES('DICTIONARY_INCLUDE' = 'deviceInformationId') + TBLPROPERTIES('DICTIONARY_INCLUDE' = 'deviceInformationId, channelsId') """) } catch { case ex: Throwable => LOGGER.error(ex.getMessage + "\r\n" + ex.getStackTraceString) http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortTempFileChunkHolder.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortTempFileChunkHolder.java b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortTempFileChunkHolder.java index ce7b321..1bcbaa8 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortTempFileChunkHolder.java +++ b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortTempFileChunkHolder.java @@ -313,7 +313,7 @@ public class SortTempFileChunkHolder implements Comparable<SortTempFileChunkHold Object[] holder = new Object[3]; int index = 0; int nonDicIndex = 0; - int[] dim = new int[this.dimensionCount]; + int[] dim = new int[this.dimensionCount - this.noDictionaryCount]; byte[][] nonDicArray = new byte[this.noDictionaryCount + this.complexDimensionCount][]; Object[] measures = new Object[this.measureCount]; try { http://git-wip-us.apache.org/repos/asf/carbondata/blob/01589684/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java index 5d9e081..1bbe98c 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java @@ -603,9 +603,9 @@ public abstract class AbstractFactDataWriter<T> implements CarbonFactDataWriter< short numberOfByteToStorLength = 2; // as length part is removed, so each no dictionary value index // needs to be reshuffled by 2 bytes - int NumberOfNoDictSortColumns = + int numberOfNoDictSortColumns = dataWriterVo.getSegmentProperties().getNumberOfNoDictSortColumns(); - for (int i = 0; i < NumberOfNoDictSortColumns; i++) { + for (int i = 0; i < numberOfNoDictSortColumns; i++) { output.putShort((short) (buffer.getShort() - numberOfByteToStorLength)); } // copy the data part
