Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2436#discussion_r200898644
--- Diff:
integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
---
@@ -486,6 +504,60 @@ abstract class CarbonDDLSqlParser extends
AbstractCarbonSparkSQLParser {
}
}
+ /**
+ * This method validates the long string columns
+ *
+ * @param fields
+ * @param varcharCols
+ * @return
+ */
+ private def validateLongStringColumns(fields: Seq[Field],
+ varcharCols: Seq[String]): Unit = {
+ var longStringColumnsMap: Map[String, Field] = Map[String, Field]()
+ fields.foreach(field =>
+ longStringColumnsMap.put(field.column.toUpperCase, field)
+ )
+ var dataTypeErr: Set[String] = Set[String]()
+ var duplicateColumnErr: Map[String, Int] = Map[String, Int]()
+ var nullColumnErr: Set[String] = Set[String]()
+ var tmpStr: String = ""
+ varcharCols.foreach {
+ column =>
+ tmpStr = column.toUpperCase
+ duplicateColumnErr.get(tmpStr) match {
+ case None => duplicateColumnErr.put(tmpStr, 1)
+ case Some(count) => duplicateColumnErr.put(tmpStr, count + 1)
+ }
+ longStringColumnsMap.get(tmpStr) match {
+ case None => nullColumnErr += column
+ case Some(field) => if
(!DataTypes.STRING.getName.equalsIgnoreCase(field.dataType.get)) {
+ dataTypeErr += column
+ }
+ }
+ }
+ if (!nullColumnErr.isEmpty) {
+ val errMsg = "long_string_columns:" +
+ nullColumnErr.mkString(",") +
+ " does not exist in table. Please check create table
statement."
+ throw new MalformedCarbonCommandException(errMsg)
+ }
+
+ var duplicateColumns = duplicateColumnErr.filter(kv => kv._2 !=
1).keySet
--- End diff --
I feel these logic can be optimized, can you describe what validation is
done in this function?
---