This is an automated email from the ASF dual-hosted git repository.

indhumuthumurugesh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 07c98e8  [CARBONDATA-4189] alter table validation issues
07c98e8 is described below

commit 07c98e877b0287f259d07f7ad8f519f0f60c2b07
Author: Mahesh Raju Somalaraju <[email protected]>
AuthorDate: Tue May 18 16:02:38 2021 +0530

    [CARBONDATA-4189] alter table validation issues
    
    Why is this PR needed?
    1. Alter table duplicate columns check for dimensions/complex columns missed
    2. Alter table properties with long strings for complex columns should not 
support
    
    What changes were proposed in this PR?
    1. Changed the dimension columns list type in preparing dimensions columns
       [LinkedHashSet to Scala Seq] for handling the duplicate columns
    2. Added check for throwing an exception in case of long strings for 
complex columns
    
    Does this PR introduce any user interface change?
    No
    
    Is any new testcase added?
    Yes
    
    This closes #4138
---
 .../spark/sql/catalyst/CarbonParserUtil.scala      | 10 +++---
 .../org/apache/spark/util/AlterTableUtil.scala     | 15 ++++----
 .../restructure/AlterTableValidationTestCase.scala | 40 ++++++++++++++++++++++
 3 files changed, 54 insertions(+), 11 deletions(-)

diff --git 
a/integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala
 
b/integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala
index 266cd95..f41f2bf 100644
--- 
a/integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala
+++ 
b/integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala
@@ -639,7 +639,7 @@ object CarbonParserUtil {
   protected def extractDimAndMsrFields(fields: Seq[Field], indexFields: 
Seq[Field],
       tableProperties: Map[String, String]):
   (Seq[Field], Seq[Field], Seq[String], Seq[String], Seq[String]) = {
-    var dimFields: LinkedHashSet[Field] = LinkedHashSet[Field]()
+    var dimFields: Seq[Field] = Seq[Field]()
     var msrFields: Seq[Field] = Seq[Field]()
     var noDictionaryDims: Seq[String] = Seq[String]()
     var varcharCols: Seq[String] = Seq[String]()
@@ -712,9 +712,9 @@ object CarbonParserUtil {
     allFields.foreach { field =>
       if (field.dataType.get.toUpperCase.equals("TIMESTAMP")) {
         noDictionaryDims :+= field.column
-        dimFields += field
+        dimFields :+= field
       } else if (isDetectAsDimensionDataType(field.dataType.get)) {
-        dimFields += field
+        dimFields :+= field
         // consider all String and binary cols as noDictionaryDims by default
         if ((DataTypes.STRING.getName.equalsIgnoreCase(field.dataType.get)) ||
             (DataTypes.BINARY.getName.equalsIgnoreCase(field.dataType.get))) {
@@ -728,7 +728,7 @@ object CarbonParserUtil {
                                                     field.column}}in 
sort_columns.")
       } else if (sortKeyDimsTmp.exists(x => x.equalsIgnoreCase(field.column))) 
{
         noDictionaryDims :+= field.column
-        dimFields += field
+        dimFields :+= field
       } else {
         msrFields :+= field
       }
@@ -740,7 +740,7 @@ object CarbonParserUtil {
     } else {
       tableProperties.put(CarbonCommonConstants.SORT_COLUMNS, 
sortKeyDimsTmp.mkString(","))
     }
-    (dimFields.toSeq, msrFields, noDictionaryDims, sortKeyDimsTmp, varcharCols)
+    (dimFields, msrFields, noDictionaryDims, sortKeyDimsTmp, varcharCols)
   }
 
   def isDefaultMeasure(dataType: Option[String]): Boolean = {
diff --git 
a/integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala 
b/integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala
index 2133b16..e3ec06e 100644
--- 
a/integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala
+++ 
b/integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala
@@ -912,14 +912,17 @@ object AlterTableUtil {
     val colSchemas = 
carbonTable.getTableInfo.getFactTable.getListOfColumns.asScala
     longStringCols.foreach { col =>
       if (!colSchemas.exists(x => x.getColumnName.equalsIgnoreCase(col))) {
-        val errorMsg = "LONG_STRING_COLUMNS column: " + col +
-                       " does not exist in table. Please check the DDL."
+        val errorMsg = s"LONG_STRING_COLUMNS column: $col does not exist in 
table. Please check " +
+                       s"the DDL."
         throw new MalformedCarbonCommandException(errorMsg)
       } else if (colSchemas.exists(x => x.getColumnName.equalsIgnoreCase(col) 
&&
-                                          !x.getDataType.toString
-                                            .equalsIgnoreCase("STRING"))) {
-        val errMsg = "LONG_STRING_COLUMNS column: " + col +
-                     " is not a string datatype column"
+                                        x.isComplexColumn)) {
+        val errMsg = s"Complex child column $col cannot be set as 
LONG_STRING_COLUMNS."
+        throw new MalformedCarbonCommandException(errMsg)
+      } else if (colSchemas.exists(x => x.getColumnName.equalsIgnoreCase(col) 
&&
+                                        !x.getDataType.toString
+                                          .equalsIgnoreCase("STRING"))) {
+        val errMsg = s"LONG_STRING_COLUMNS column: $col is not a string 
datatype column"
         throw new MalformedCarbonCommandException(errMsg)
       }
     }
diff --git 
a/integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/AlterTableValidationTestCase.scala
 
b/integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/AlterTableValidationTestCase.scala
index 1f60f9b..3fc4c62 100644
--- 
a/integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/AlterTableValidationTestCase.scala
+++ 
b/integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/AlterTableValidationTestCase.scala
@@ -812,6 +812,46 @@ test("test alter command for boolean data type with 
correct default measure valu
     sql("DROP TABLE t1")
   }
 
+  test("testing the duplicate add columns for complex data types") {
+    sql("drop table if exists alter_complex")
+    sql("create table alter_complex (a int, b string, arr1 array<int>) " +
+        "stored as carbondata")
+    val errMsg = "Alter table add operation failed: Duplicate column found 
with name"
+    assert(intercept[ProcessMetaDataException] {
+      sql("alter table alter_complex add columns(arr1 array<int>)")
+    }.getMessage.contains(errMsg))
+    assert(intercept[ProcessMetaDataException] {
+      sql("alter table alter_complex add columns(arr2 array<int>, arr2 
array<int>)")
+    }.getMessage.contains(errMsg))
+    assert(intercept[ProcessMetaDataException] {
+      sql("alter table alter_complex add columns(c int, c int)")
+    }.getMessage.contains(errMsg))
+    assert(intercept[ProcessMetaDataException] {
+      sql("alter table alter_complex add columns(c int, c string)")
+    }.getMessage.contains(errMsg))
+    assert(intercept[ProcessMetaDataException] {
+      sql("alter table alter_complex add columns(c string, c int)")
+    }.getMessage.contains(errMsg))
+    assert(intercept[ProcessMetaDataException] {
+      sql("alter table alter_complex add columns(c string, c array<string>)")
+    }.getMessage.contains(errMsg))
+    sql("drop table if exists alter_complex")
+  }
+
+  test("testing the long string properties for complex columns") {
+    sql("drop table if exists alter_complex")
+    sql("create table alter_complex (a int, arr1 array<string>) " +
+        "stored as carbondata")
+    assert(intercept[RuntimeException] {
+      sql("alter table alter_complex SET TBLPROPERTIES" +
+          "('LONG_STRING_COLUMNS'='arr1.val')")
+    }.getMessage
+      .contains(
+        "Alter table newProperties operation failed: Complex child column 
arr1.val cannot be set " +
+        "as LONG_STRING_COLUMNS"))
+    sql("drop table if exists alter_complex")
+  }
+
   def sortScopeInDescFormatted(tableName: String): String = {
     sql(s"DESCRIBE FORMATTED $tableName").filter(
       (x: Row) => x.getString(0).equalsIgnoreCase("sort scope")

Reply via email to