Repository: carbondata Updated Branches: refs/heads/master cfb8ed9f5 -> 687118a1c
[CARBONDATA-2155][CARBONDATA-2152] [Presto] Fixed IS NULL not working correctly on string datatype with dictionary include in presto Fixed IS NULL not working correctly on string datatype with dictionary_include in presto integration,Fixed Min function working incorrectly for string type with dictionary include in presto integration This closes #2152 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/687118a1 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/687118a1 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/687118a1 Branch: refs/heads/master Commit: 687118a1cf7113aed4718bad3c87d8bed1fd49af Parents: cfb8ed9 Author: anubhav100 <anubhav.ta...@knoldus.in> Authored: Mon Apr 9 18:15:31 2018 +0530 Committer: chenliang613 <chenliang...@huawei.com> Committed: Thu Apr 12 15:07:31 2018 +0800 ---------------------------------------------------------------------- .../CarbonDictionaryDecodeReadSupport.scala | 7 +- .../presto/src/test/resources/alldatatype.csv | 1 + .../integrationtest/PrestoAllDataTypeTest.scala | 59 ++++++++++++-- .../presto/util/CarbonDataStoreCreator.scala | 82 ++++++++++++-------- 4 files changed, 110 insertions(+), 39 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/687118a1/integration/presto/src/main/scala/org/apache/carbondata/presto/CarbonDictionaryDecodeReadSupport.scala ---------------------------------------------------------------------- diff --git a/integration/presto/src/main/scala/org/apache/carbondata/presto/CarbonDictionaryDecodeReadSupport.scala b/integration/presto/src/main/scala/org/apache/carbondata/presto/CarbonDictionaryDecodeReadSupport.scala index 9c05177..82cdf3a 100644 --- a/integration/presto/src/main/scala/org/apache/carbondata/presto/CarbonDictionaryDecodeReadSupport.scala +++ b/integration/presto/src/main/scala/org/apache/carbondata/presto/CarbonDictionaryDecodeReadSupport.scala @@ -93,7 +93,12 @@ class CarbonDictionaryDecodeReadSupport[T] extends CarbonReadSupport[T] { while (chunks.hasNext) { { val value: Array[Byte] = chunks.next - sliceArray(count) = wrappedBuffer(value, 0, value.length) + if(count ==1) { + sliceArray(count) = null + } + else { + sliceArray(count) = wrappedBuffer(value, 0, value.length) + } count += 1 } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/687118a1/integration/presto/src/test/resources/alldatatype.csv ---------------------------------------------------------------------- diff --git a/integration/presto/src/test/resources/alldatatype.csv b/integration/presto/src/test/resources/alldatatype.csv index 7201542..0fa46aa 100644 --- a/integration/presto/src/test/resources/alldatatype.csv +++ b/integration/presto/src/test/resources/alldatatype.csv @@ -9,3 +9,4 @@ ID,date,country,name,phonetype,serialname,salary,bonus,monthlyBonus,dob,shortfie 8,2015-07-30,china,geetika,phone1848,ASD57308,15007.500,500.88,200.97,2008-09-21 11:10:06,10,true 9,2015-07-18,china,ravindra,phone706,ASD86717,15008.00,700.999,45.25,2009-06-19 15:10:06,1,true 9,2015/07/18,china,jitesh,phone706,ASD86717,15008.00,500.414,11.655,2001-08-29 13:09:03,12,true + http://git-wip-us.apache.org/repos/asf/carbondata/blob/687118a1/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoAllDataTypeTest.scala ---------------------------------------------------------------------- diff --git a/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoAllDataTypeTest.scala b/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoAllDataTypeTest.scala index 49da227..ce17682 100644 --- a/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoAllDataTypeTest.scala +++ b/integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoAllDataTypeTest.scala @@ -34,6 +34,34 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll { + "../../../..").getCanonicalPath private val storePath = s"$rootPath/integration/presto/target/store" + + // Table schema: + // +-------------+----------------+-------------+------------+ + // | Column name | Data type | Column type | Dictionary | + // +-------------+----------------+--------------+-----------+ + // | id | string | dimension | yes | + // +-------------+----------------+-------------+------------+ + // | date | date | dimension | yes | + // +-------------+----------------+-------------+------------+ + // | country | string | dimension | yes | + // +-------------+----------------+-------------+------------- + // | name | string | dimension | yes | + // +-------------+----------------+-------------+------------- + // | phonetype | string | dimension | yes | + // +-------------+----------------+-------------+------------- + // | serialname | string | dimension | true | + // +-------------+----------------+-------------+------------- + // | bonus |short decimal | measure | false | + // +-------------+----------------+-------------+------------- + // | monthlyBonus| longdecimal | measure | false | + // +-------------+----------------+-------------+------------- + // | dob | timestamp | dimension | true | + // +-------------+----------------+-------------+------------+ + // | shortField | shortfield | measure | true | + // +-------------+----------------+-------------+------------- + // |isCurrentEmp | boolean | measure | true | + // +-------------+----------------+-------------+------------+ + override def beforeAll: Unit = { import org.apache.carbondata.presto.util.CarbonDataStoreCreator CarbonDataStoreCreator @@ -50,7 +78,7 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll { test("test the result for count(*) in presto") { val actualResult: List[Map[String, Any]] = PrestoServer .executeQuery("SELECT COUNT(*) AS RESULT FROM TESTDB.TESTTABLE ") - val expectedResult: List[Map[String, Any]] = List(Map("RESULT" -> 10)) + val expectedResult: List[Map[String, Any]] = List(Map("RESULT" -> 11)) assert(actualResult.equals(expectedResult)) } test("test the result for count() clause with distinct operator in presto") { @@ -160,7 +188,9 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll { Map("NAME" -> "liang"), Map("NAME" -> "prince"), Map("NAME" -> "ravindra"), - Map("NAME" -> "sahil")) + Map("NAME" -> "sahil"), + Map("NAME" -> null) + ) assert(actualResult.equals(expectedResult)) } test("select DATE type with order by clause") { @@ -175,7 +205,9 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll { Map("DATE" -> "2015-07-28"), Map("DATE" -> "2015-07-29"), Map("DATE" -> "2015-07-30"), - Map("DATE" -> null)) + Map("DATE" -> null), + Map("DATE" -> null) + ) assert(actualResult.filterNot(_.get("DATE") == null).zipWithIndex.forall { case (map, index) => map.get("DATE").toString @@ -194,7 +226,9 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll { Map("ID" -> 6), Map("ID" -> 7), Map("ID" -> 8), - Map("ID" -> 9)) + Map("ID" -> 9), + Map("ID" -> null) + ) assert(actualResult.equals(expectedResult)) @@ -345,7 +379,7 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll { test("test for null operator on date data type") { val actualResult: List[Map[String, Any]] = PrestoServer .executeQuery("SELECT ID FROM TESTDB.TESTTABLE WHERE DATE IS NULL") - val expectedResult: List[Map[String, Any]] = List(Map("ID" -> 9)) + val expectedResult: List[Map[String, Any]] = List(Map("ID" -> 9),Map("ID" -> null)) assert(actualResult.equals(expectedResult)) } @@ -390,7 +424,7 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll { val actualResult: List[Map[String, Any]] = PrestoServer .executeQuery( "SELECT ID from testdb.testtable WHERE SHORTFIELD IS NULL ORDER BY SHORTFIELD ") - val expectedResult: List[Map[String, Any]] = List(Map("ID" -> 7)) + val expectedResult: List[Map[String, Any]] = List(Map("ID" -> 7),Map("ID" -> null)) assert(actualResult.equals(expectedResult)) } @@ -450,4 +484,17 @@ class PrestoAllDataTypeTest extends FunSuiteLike with BeforeAndAfterAll { .executeQuery("SELECT id AS RESULT FROM TESTDB.TESTTABLE WHERE isCurrentEmployee is NOT null AND ID>8") assert(actualResult.head("RESULT").toString.toInt==9) } + test("test the is null operator when null is included in string data type dictionary_include"){ + // See CARBONDATA-2155 + val actualResult: List[Map[String, Any]] = PrestoServer.executeQuery("SELECT SERIALNAME FROM TESTDB.TESTTABLE WHERE SERIALNAME IS NULL") + assert(actualResult equals List(Map("SERIALNAME" -> null))) + } + test("test the min function when null is included in string data type with dictionary_include"){ + // See CARBONDATA-2152 + val actualResult = PrestoServer.executeQuery("SELECT MIN(SERIALNAME) FROM TESTDB.TESTTABLE") + val expectedResult = List(Map("_col0" -> "ASD14875")) + + assert(actualResult.equals(expectedResult)) + } + } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/carbondata/blob/687118a1/integration/presto/src/test/scala/org/apache/carbondata/presto/util/CarbonDataStoreCreator.scala ---------------------------------------------------------------------- diff --git a/integration/presto/src/test/scala/org/apache/carbondata/presto/util/CarbonDataStoreCreator.scala b/integration/presto/src/test/scala/org/apache/carbondata/presto/util/CarbonDataStoreCreator.scala index baf02fd..e12af63 100644 --- a/integration/presto/src/test/scala/org/apache/carbondata/presto/util/CarbonDataStoreCreator.scala +++ b/integration/presto/src/test/scala/org/apache/carbondata/presto/util/CarbonDataStoreCreator.scala @@ -24,6 +24,7 @@ import java.util import java.util.{ArrayList, Date, List, UUID} import scala.collection.JavaConversions._ +import scala.collection.mutable import com.google.gson.Gson import org.apache.hadoop.conf.Configuration @@ -34,34 +35,26 @@ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl import org.apache.hadoop.mapreduce.{RecordReader, TaskType} import org.apache.carbondata.common.logging.LogServiceFactory -import org.apache.carbondata.core.cache.dictionary.{Dictionary, DictionaryColumnUniqueIdentifier, -ReverseDictionary} +import org.apache.carbondata.core.cache.dictionary.{Dictionary, DictionaryColumnUniqueIdentifier, ReverseDictionary} import org.apache.carbondata.core.cache.{Cache, CacheProvider, CacheType} import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.datastore.impl.FileFactory -import org.apache.carbondata.core.fileoperations.{AtomicFileOperations, AtomicFileOperationsImpl, -FileWriteOperation} -import org.apache.carbondata.core.metadata.converter.{SchemaConverter, -ThriftWrapperSchemaConverterImpl} +import org.apache.carbondata.core.fileoperations.{AtomicFileOperations, AtomicFileOperationsImpl, FileWriteOperation} +import org.apache.carbondata.core.metadata.converter.{SchemaConverter, ThriftWrapperSchemaConverterImpl} import org.apache.carbondata.core.metadata.datatype.DataTypes import org.apache.carbondata.core.metadata.encoder.Encoding -import org.apache.carbondata.core.metadata.schema.table.column.{CarbonColumn, CarbonDimension, -CarbonMeasure, ColumnSchema} +import org.apache.carbondata.core.metadata.schema.table.column.{CarbonColumn, CarbonDimension, CarbonMeasure, ColumnSchema} import org.apache.carbondata.core.metadata.schema.table.{CarbonTable, TableInfo, TableSchema} import org.apache.carbondata.core.metadata.schema.{SchemaEvolution, SchemaEvolutionEntry} -import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonMetadata, -CarbonTableIdentifier, ColumnIdentifier} +import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonMetadata, CarbonTableIdentifier, ColumnIdentifier} import org.apache.carbondata.core.statusmanager.{LoadMetadataDetails, SegmentStatus} import org.apache.carbondata.core.util.path.CarbonTablePath import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil} -import org.apache.carbondata.core.writer.sortindex.{CarbonDictionarySortIndexWriter, -CarbonDictionarySortIndexWriterImpl, CarbonDictionarySortInfo, CarbonDictionarySortInfoPreparator} -import org.apache.carbondata.core.writer.{CarbonDictionaryWriter, CarbonDictionaryWriterImpl, -ThriftWriter} +import org.apache.carbondata.core.writer.sortindex.{CarbonDictionarySortIndexWriter, CarbonDictionarySortIndexWriterImpl, CarbonDictionarySortInfo, CarbonDictionarySortInfoPreparator} +import org.apache.carbondata.core.writer.{CarbonDictionaryWriter, CarbonDictionaryWriterImpl, ThriftWriter} import org.apache.carbondata.processing.loading.DataLoadExecutor import org.apache.carbondata.processing.loading.constants.DataLoadProcessorConstants -import org.apache.carbondata.processing.loading.csvinput.{BlockDetails, CSVInputFormat, -CSVRecordReaderIterator, StringArrayWritable} +import org.apache.carbondata.processing.loading.csvinput.{BlockDetails, CSVInputFormat, CSVRecordReaderIterator, StringArrayWritable} import org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException import org.apache.carbondata.processing.loading.model.{CarbonDataLoadSchema, CarbonLoadModel} import org.apache.carbondata.processing.util.TableOptionConstant @@ -266,8 +259,8 @@ object CarbonDataStoreCreator { val monthlyBonus: ColumnSchema = new ColumnSchema() monthlyBonus.setColumnName("monthlyBonus") monthlyBonus.setColumnar(true) - monthlyBonus.setDataType(DataTypes.createDecimalType(10, 4)) - monthlyBonus.setPrecision(10) + monthlyBonus.setDataType(DataTypes.createDecimalType(18, 4)) + monthlyBonus.setPrecision(18) monthlyBonus.setScale(4) monthlyBonus.setSchemaOrdinal(8) monthlyBonus.setEncodingList(invertedIndexEncoding) @@ -358,33 +351,33 @@ object CarbonDataStoreCreator { val reader: BufferedReader = new BufferedReader( new FileReader(factFilePath)) val header: String = reader.readLine() - val split: Array[String] = header.split(",") val allCols: util.List[CarbonColumn] = new util.ArrayList[CarbonColumn]() - val dims: util.List[CarbonDimension] = + val dimensions: util.List[CarbonDimension] = table.getDimensionByTableName(table.getTableName) - allCols.addAll(dims) + allCols.addAll(dimensions) val msrs: List[CarbonMeasure] = table.getMeasureByTableName(table.getTableName) allCols.addAll(msrs) - val set: Array[util.Set[String]] = Array.ofDim[util.Set[String]](dims.size) - val dimsIndex = dims.map(dim => dim.getColumnSchema.getSchemaOrdinal) - for (i <- set.indices) { - set(i) = new util.HashSet[String]() + val dimensionsIndex = dimensions.map(dim => dim.getColumnSchema.getSchemaOrdinal) + val dimensionSet: Array[util.List[String]] = Array.ofDim[util.List[String]](dimensions.size) + + for (i <- dimensionSet.indices) { + dimensionSet(i) = new util.ArrayList[String]() } var line: String = reader.readLine() while (line != null) { val data: Array[String] = line.split(",") - for (i <- set.indices) { - set(i).add(data(dimsIndex(i))) + for (index <- dimensionSet.indices) { + addDictionaryValuesToDimensionSet(dimensions, dimensionsIndex, dimensionSet, data, index) } line = reader.readLine() } val dictCache: Cache[DictionaryColumnUniqueIdentifier, ReverseDictionary] = CacheProvider .getInstance.createCache(CacheType.REVERSE_DICTIONARY) - for (i <- set.indices) { + for (index <- dimensionSet.indices) { val columnIdentifier: ColumnIdentifier = - new ColumnIdentifier(dims.get(i).getColumnId, null, null) + new ColumnIdentifier(dimensions.get(index).getColumnId, null, null) val dictionaryColumnUniqueIdentifier: DictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier( @@ -393,7 +386,7 @@ object CarbonDataStoreCreator { columnIdentifier.getDataType) val writer: CarbonDictionaryWriter = new CarbonDictionaryWriterImpl( dictionaryColumnUniqueIdentifier) - for (value <- set(i)) { + for (value <- dimensionSet(index).distinct) { writer.write(value) } writer.close() @@ -403,7 +396,7 @@ object CarbonDataStoreCreator { new DictionaryColumnUniqueIdentifier( absoluteTableIdentifier, columnIdentifier, - dims.get(i).getDataType) + dimensions.get(index).getDataType) ) .asInstanceOf[Dictionary] val preparator: CarbonDictionarySortInfoPreparator = @@ -412,7 +405,7 @@ object CarbonDataStoreCreator { val dictionarySortInfo: CarbonDictionarySortInfo = preparator.getDictionarySortInfo(newDistinctValues, dict, - dims.get(i).getDataType) + dimensions.get(index).getDataType) val carbonDictionaryWriter: CarbonDictionarySortIndexWriter = new CarbonDictionarySortIndexWriterImpl(dictionaryColumnUniqueIdentifier) try { @@ -433,6 +426,31 @@ object CarbonDataStoreCreator { } + private def addDictionaryValuesToDimensionSet(dims: util.List[CarbonDimension], + dimensionIndex: mutable.Buffer[Int], + dimensionSet: Array[util.List[String]], + data: Array[String], + index: Int) = { + if (isDictionaryDefaultMember(dims, dimensionSet, index)) { + dimensionSet(index).add(CarbonCommonConstants.MEMBER_DEFAULT_VAL) + dimensionSet(index).add(data(dimensionIndex(index))) + } + else { + if (data.length == 1) { + dimensionSet(index).add("""\N""") + } else { + dimensionSet(index).add(data(dimensionIndex(index))) + } + } + } + + private def isDictionaryDefaultMember(dims: util.List[CarbonDimension], + dimensionSet: Array[util.List[String]], + index: Int) = { + dimensionSet(index).isEmpty && dims(index).hasEncoding(Encoding.DICTIONARY) && + !dims(index).hasEncoding(Encoding.DIRECT_DICTIONARY) + } + /** * Execute graph which will further load data *