[GitHub] [carbondata] akashrn5 commented on a change in pull request #4115: [CARBONDATA-4163] Support adding of single-level complex columns(array/struct)

GitBox Wed, 14 Apr 2021 01:04:39 -0700


akashrn5 commented on a change in pull request #4115:
URL: https://github.com/apache/carbondata/pull/4115#discussion_r613021868




##########
File path: 
integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala
##########
@@ -275,6 +275,56 @@ class AlterTableColumnSchemaGenerator(
         allColumns ++= Seq(columnSchema)
       }
       newCols ++= Seq(columnSchema)
+      if(field.dataType== Some("Array") ) {
+        columnSchema.setNumberOfChild(field.children.size)
+        val childField = field.children.get(0)
+        val childSchema: ColumnSchema = TableNewProcessor.createColumnSchema(
+          childField,
+          new java.util.ArrayList[Encoding](),
+          isDimensionCol = true,
+          childField.precision,
+          childField.scale,
+          childField.schemaOrdinal + currentSchemaOrdinal,
+          alterTableModel.highCardinalityDims,
+          alterTableModel.databaseName.getOrElse(dbName),
+          isSortColumn(childField.name.getOrElse(childField.column)),
+          isVarcharColumn(childField.name.getOrElse(childField.column)))
+
+        if (childSchema.getDataType == DataTypes.VARCHAR) {
+          // put the new long string columns in 'longStringCols'
+          // and add them after old long string columns
+          longStringCols ++= Seq(childSchema)
+        } else {
+          allColumns ++= Seq(childSchema)
+        }
+        newCols ++= Seq(childSchema)
+      } else if (field.dataType == Some("Struct")) {

Review comment:
       same as above

##########
File path: 
integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala
##########
@@ -275,6 +275,56 @@ class AlterTableColumnSchemaGenerator(
         allColumns ++= Seq(columnSchema)
       }
       newCols ++= Seq(columnSchema)
+      if(field.dataType== Some("Array") ) {

Review comment:
       instead of hardcoding, please use existing APIs like 
`org.apache.carbondata.core.metadata.datatype.DataTypes#isArrayType`

##########
File path: 
integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableAddColumnCommand.scala
##########
@@ -85,6 +85,8 @@ private[sql] case class CarbonAlterTableAddColumnCommand(
       timeStamp = System.currentTimeMillis
       val schemaEvolutionEntry = new 
org.apache.carbondata.core.metadata.schema.SchemaEvolutionEntry
       schemaEvolutionEntry.setTimeStamp(timeStamp)
+      // filter out complex children columns
+      newCols = newCols.filter(x => !x.isComplexColumn)
       schemaEvolutionEntry.setAdded(newCols.toList.asJava)

Review comment:
       can we add in Schema Evolution entry, when you add columns schema as 
added, if you are just adding parent make sure, that column schema as its all 
child columns, just confirm if its there already, else please handle it.

##########
File path: 
integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala
##########
@@ -534,10 +581,10 @@ object TableNewProcessor {
     if (highCardinalityDims.contains(colName)) {
       encoders.remove(Encoding.DICTIONARY)
     }
-    if (dataType == DataTypes.DATE) {
-      encoders.add(Encoding.DICTIONARY)
-      encoders.add(Encoding.DIRECT_DICTIONARY)
-    } else if (dataType == DataTypes.TIMESTAMP && 
!highCardinalityDims.contains(colName)) {
+    // complex children does not require encoding
+    if (!colName.contains('.') && (dataType == DataTypes.DATE ||

Review comment:
       use static constant for point

##########
File path: 
integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParserUtil.scala
##########
@@ -683,12 +683,6 @@ object CarbonSparkSqlParserUtil {
       fields: List[Field],
       tblProp: Option[List[(String, String)]]
   ): CarbonAlterTableAddColumnCommand = {
-    fields.foreach { f =>

Review comment:
       i think since you are supporting only for single level complex column, 
if user tries to add multi level you should throw exception, and wherever you 
handle this, please add a TODO and mention jira ID which you have already 
created to support multilevel

##########
File path: 
integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/alterTable/TestAlterTableAddColumns.scala
##########
@@ -136,6 +140,209 @@ class TestAlterTableAddColumns extends QueryTest with 
BeforeAndAfterAll {
     sql(s"""DROP TABLE IF EXISTS ${ tableName }""")
   }
 
+  test("Test adding of array of all primitive datatypes") {
+    sql("DROP TABLE IF EXISTS alter_com")
+    sql("CREATE TABLE alter_com(intfield int) STORED AS carbondata")
+    sql(
+      "ALTER TABLE alter_com ADD COLUMNS(arr1 array<short>, arr2 array<int>, 
arr3 " +
+      "array<long>, arr4 array<double>, arr5 array<decimal(8,2)>, arr6 
array<string>, arr7 " +
+      "array<char(5)>, arr8 array<varchar(50)>, arr9 array<boolean>, arr10 
array<date>, arr11 " +
+      "array<timestamp> )")
+    sql("desc table alter_com").show(false)
+    sql(
+      "insert into alter_com 
values(1,array(1,5),array(1,2),array(1,2,3),array(1.2d,2.3d),array" +
+      
"(4.5,6.7),array('hello','world'),array('a','bcd'),array('abcd','efg'),array(true,false),"
 +
+      "array('2017-02-01','2018-09-11'),array('2017-02-01 
00:01:00','2018-02-01 02:21:00') )")
+    checkAnswer(sql(
+      "select * from alter_com"),
+      Seq(Row(1, make(Array(1, 5)),
+        make(Array(1, 2)),
+        make(Array(1, 2, 3)),
+        make(Array(1.2, 2.3)),
+        make(Array(java.math.BigDecimal.valueOf(4.5).setScale(2),
+          java.math.BigDecimal.valueOf(6.7).setScale(2))),
+        make(Array("hello", "world")),
+        make(Array("a", "bcd")),
+        make(Array("abcd", "efg")),
+        make(Array(true, false)),
+        make(Array(Date.valueOf("2017-02-01"), Date.valueOf("2018-09-11"))),
+        make(Array(Timestamp.valueOf("2017-02-01 00:01:00"),
+          Timestamp.valueOf("2018-02-01 02:21:00")))
+      )))
+    sql("DROP TABLE IF EXISTS alter_com")
+  }
+
+  test("Test adding of struct of all primitive datatypes") {
+    sql("DROP TABLE IF EXISTS alter_com")
+    sql(
+      "CREATE TABLE alter_com(intField INT, structField 
struct<a:short,b:int,c:long,d:double," +
+      
"e:decimal(8,2),f:string,g:char(5),h:varchar(50),i:boolean,j:date,k:timestamp>) 
STORED AS " +
+      "carbondata")
+    sql(
+      "insert into alter_com values(1, 
named_struct('a',1,'b',2,'c',3,'d',1.23,'e',2.34,'f'," +
+      "'hello','g','abc','h','def','i',true,'j','2017-02-01','k','2018-02-01 
02:00:00.0') ) ")
+    sql("select * from alter_com").show(false)
+    sql("DROP TABLE IF EXISTS alter_com")
+  }
+
+  def insertIntoTableForArrayType(): Unit = {
+    sql("insert into alter_com 
values(4,array(2),array(1,2,3,4),array('abc','def'))")
+    sql("insert into alter_com values(5,array(1,2),array(1), 
array('Hulk','Thor'))")
+    sql(
+      "insert into alter_com 
values(6,array(1,2,3),array(1234,8,33333),array('Iron','Man'," +
+      "'Jarvis'))")
+  }
+
+  def checkRestulForArrayType(): Unit = {
+    val totalRows = sql("select * from alter_com").collect()
+    val a = sql("select * from alter_com where array_contains(arr2,2)").collect
+    val b = sql("select * from alter_com where 
array_contains(arr3,'Thor')").collect
+    val c = sql("select * from alter_com where intField = 1").collect
+    assert(totalRows.size == 6)
+    assert(a.size == 1)
+    assert(b.size == 1)
+    // check default value for newly added array column that is index - 3 and 4
+    assert(c(0)(2) == null && c(0)(3) == null)
+  }
+
+  test("Test alter add for arrays enabling local dictionary") {
+    sql("DROP TABLE IF EXISTS alter_com")
+    sql(
+      "CREATE TABLE alter_com(intField INT, arr1 array<int>) " +
+      "STORED AS carbondata TBLPROPERTIES('local_dictionary_enable'='true')")
+
+    sql("insert into alter_com values(1,array(1) )")
+    sql("insert into alter_com values(2,array(9,0) )")
+    sql("insert into alter_com values(3,array(11,12,13) )")
+
+    sql(
+      "ALTER TABLE alter_com ADD COLUMNS(arr2 array<int>, arr3 array<string>) 
TBLPROPERTIES" +
+      "('LOCAL_DICTIONARY_INCLUDE'='arr3')")
+    val schema = sql("describe alter_com").collect()
+    assert(schema.size == 4)
+
+    // For the previous segments the default value for newly added array 
column is null
+    sql("insert into alter_com 
values(4,array(2),array(1,2,3,4),array('abc','def'))")
+    sql("insert into alter_com values(5,array(1,2),array(1), 
array('Hulk','Thor'))")
+    sql(
+      "insert into alter_com 
values(6,array(1,2,3),array(1234,8,33333),array('Iron','Man'," +
+      "'Jarvis'))")
+
+    val totalRows = sql("select * from alter_com").collect()
+    val a = sql("select * from alter_com where array_contains(arr2,2)").collect
+    val b = sql("select * from alter_com where 
array_contains(arr3,'Thor')").collect
+    val c = sql("select * from alter_com where intField = 1").collect
+    assert(totalRows.size == 6)
+    assert(a.size == 1)
+    assert(b.size == 1)
+    // check default value for newly added array column that is index - 3 and 4
+    assert(c(0)(2) == null && c(0)(3) == null)
+    sql("DROP TABLE IF EXISTS alter_com")
+
+  }
+
+  test("Test alter add for arrays disabling local dictionary") {
+    sql("DROP TABLE IF EXISTS alter_com")
+    sql(
+      "CREATE TABLE alter_com(intField INT, arr1 array<int>) " +
+      "STORED AS carbondata TBLPROPERTIES('local_dictionary_enable'='false')")
+
+    sql("insert into alter_com values(1,array(1) )")
+    sql("insert into alter_com values(2,array(9,0) )")
+    sql("insert into alter_com values(3,array(11,12,13) )")
+    sql(
+      "ALTER TABLE alter_com ADD COLUMNS(arr2 array<int>, arr3 array<string>) 
TBLPROPERTIES" +
+      "('LOCAL_DICTIONARY_EXCLUDE'='arr3')")
+    val schema = sql("describe alter_com").collect()
+    assert(schema.size == 4)
+
+    // For the previous segments the default value for newly added array 
column is null
+    sql("insert into alter_com 
values(4,array(2),array(1,2,3,4),array('abc','def'))")
+    sql("insert into alter_com values(5,array(1,2),array(1), 
array('Hulk','Thor'))")
+    sql(
+      "insert into alter_com 
values(6,array(1,2,3),array(1234,80,3333),array('Iron','Man'," +
+      "'Jarvis'))")
+
+    val totalRows = sql("select * from alter_com").collect()
+    val a = sql("select * from alter_com where array_contains(arr2,2)").collect
+    val b = sql("select * from alter_com where 
array_contains(arr3,'Thor')").collect
+    val c = sql("select * from alter_com where intField = 1").collect
+    assert(totalRows.size == 6)
+    assert(a.size == 1)
+    assert(b.size == 1)
+    // check default value for newly added array column that is index - 3 and 4
+    assert(c(0)(2) == null && c(0)(3) == null)
+    sql("DROP TABLE IF EXISTS alter_com")
+
+  }
+
+  def insertIntoTableForStructType(): Unit = {
+    sql(
+      "insert into alter_struct values(2, named_struct('id1', 
'id2','name1','name2'), " +
+      "named_struct('a','id2','b', 'abc2'), 'hello world', 5, 
named_struct('c','id3'," +
+      "'d', 'abc3','e', 22), array(1,2,3)  )")
+    sql(
+      "insert into alter_struct values(3, named_struct('id1', 
'id3','name1','name3'), " +
+      "named_struct('a','id2.1','b', 'abc2.1'), 'India', 5, 
named_struct('c','id3.1'," +
+      "'d', 'abc3.1','e', 25), array(4,5)  )")
+
+  }
+
+  def checkResultForStructType(): Unit = {
+    val totalRows = sql("select * from alter_struct").collect()
+    val a = sql("select * from alter_struct where struct1.a = 'id2' 
").collect()
+    val b = sql(
+      "select * from alter_struct where struct1.a = 'id2' or struct2.c = 
'id3.1' or " +
+      "array_contains(arr,5) ").collect()
+    val c = sql("select * from alter_struct where roll = 1").collect()
+
+    assert(totalRows.size == 3)
+    assert(a.size == 1)
+    assert(b.size == 2)
+    // check default value for newly added struct column
+    assert(c(0)(2) == null)
+  }
+
+  test("Test alter add for structs enabling local dictionary") {
+    sql("DROP TABLE IF EXISTS alter_struct")
+    sql(
+      "create table alter_struct(roll int, department 
struct<id1:string,name1:string>) STORED AS " +
+      "carbondata TBLPROPERTIES('local_dictionary_enable'='true')")
+    sql(
+      "insert into alter_struct values(1, named_struct('id1', 
'id1','name1','name1'))")
+    sql(
+      "ALTER TABLE alter_struct ADD COLUMNS(struct1 struct<a:string,b:string>, 
temp string," +
+      " intField int, struct2 struct<c:string,d:string,e:int>, arr array<int>) 
TBLPROPERTIES " +
+      "('LOCAL_DICTIONARY_INCLUDE'='struct1, struct2')")
+    val schema = sql("describe alter_struct").collect()
+    assert(schema.size == 7)
+    // For the previous segments the default value for newly added struct 
column is null
+    insertIntoTableForStructType
+    checkResultForStructType
+    sql("DROP TABLE IF EXISTS alter_struct")
+
+  }
+
+  test("Test alter add for structs, disabling local dictionary") {
+    sql("DROP TABLE IF EXISTS alter_struct")
+    sql(
+      "create table alter_struct(roll int, department 
struct<id1:string,name1:string>) STORED AS " +
+      "carbondata TBLPROPERTIES('local_dictionary_enable'='false')")
+    sql(
+      "insert into alter_struct values(1, named_struct('id1', 
'id1','name1','name1'))")
+    sql(
+      "ALTER TABLE alter_struct ADD COLUMNS(struct1 struct<a:string,b:string>, 
temp string," +
+      " intField int, struct2 struct<c:string,d:string,e:int>, arr array<int>) 
TBLPROPERTIES " +
+      "('LOCAL_DICTIONARY_EXCLUDE'='struct1, struct2')")
+    val schema = sql("describe alter_struct").collect()
+    assert(schema.size == 7)
+    // For the previous segments the default value for newly added struct 
column is null
+    insertIntoTableForStructType
+    checkResultForStructType
+    sql("DROP TABLE IF EXISTS alter_struct")
+
+  }
+

Review comment:
       also add a test case to validate adding of multi level complex column




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

[GitHub] [carbondata] akashrn5 commented on a change in pull request #4115: [CARBONDATA-4163] Support adding of single-level complex columns(array/struct)

Reply via email to