maropu commented on a change in pull request #29303:
URL: https://github.com/apache/spark/pull/29303#discussion_r463601005



##########
File path: 
sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala
##########
@@ -126,12 +124,52 @@ private[hive] class SparkGetColumnsOperation(
     HiveThriftServer2.eventManager.onStatementFinish(statementId)
   }
 
+  /**
+   * For numeric and datetime types, it returns the default size of its 
catalyst type
+   * For struct type, when its elements are fixed-size, the summation of all 
element sizes will be
+   * returned.
+   * For array, map, string, and binaries, the column size is variable, return 
null as unknown.
+   */
+  private def getColumnSize(typ: DataType): Option[Int] = typ match {

Review comment:
       Hive returns the almost same values for column size?
   
https://github.com/apache/hive/blob/3e5e99eae154ceb8f9aa4e4ec71e6b05310e98e4/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java#L187-L211

##########
File path: 
sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
##########
@@ -101,6 +104,79 @@ trait ThriftServerWithSparkContextSuite extends 
SharedThriftServer {
       }
     }
   }
+
+  test("SparkGetColumnsOperation") {
+    val schemaName = "default"
+    val tableName = "spark_get_col_operation"
+    val decimalType = DecimalType(10, 2)
+    val ddl =
+      s"""
+         |CREATE TABLE $schemaName.$tableName

Review comment:
       I think we don't a strict rule for the format though, how about 
following the existing format?
   
https://github.com/apache/spark/blob/8014b0b5d61237dc4851d4ae9927778302d692da/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala#L35-L41

##########
File path: 
sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
##########
@@ -101,6 +104,79 @@ trait ThriftServerWithSparkContextSuite extends 
SharedThriftServer {
       }
     }
   }
+
+  test("SparkGetColumnsOperation") {

Review comment:
       Please make this test title clearer, too.

##########
File path: 
sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
##########
@@ -101,6 +104,79 @@ trait ThriftServerWithSparkContextSuite extends 
SharedThriftServer {
       }
     }
   }
+
+  test("SparkGetColumnsOperation") {
+    val schemaName = "default"
+    val tableName = "spark_get_col_operation"
+    val decimalType = DecimalType(10, 2)
+    val ddl =
+      s"""
+         |CREATE TABLE $schemaName.$tableName
+         |  (
+         |    a boolean comment '0',

Review comment:
       Could you test all the types where possible for test coverage?

##########
File path: 
sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
##########
@@ -101,6 +104,79 @@ trait ThriftServerWithSparkContextSuite extends 
SharedThriftServer {
       }
     }
   }
+
+  test("SparkGetColumnsOperation") {
+    val schemaName = "default"
+    val tableName = "spark_get_col_operation"
+    val decimalType = DecimalType(10, 2)
+    val ddl =
+      s"""
+         |CREATE TABLE $schemaName.$tableName
+         |  (
+         |    a boolean comment '0',
+         |    b int comment '1',
+         |    c float comment '2',
+         |    d ${decimalType.sql} comment '3',
+         |    e array<long> comment '4',
+         |    f array<string> comment '5',
+         |    g map<smallint, tinyint> comment '6',
+         |    h date comment '7',
+         |    i timestamp comment '8',
+         |    j struct<X: bigint,Y: double> comment '9'
+         |  ) using parquet""".stripMargin
+
+    withCLIServiceClient { client =>
+      val sessionHandle = client.openSession(user, "")
+      val confOverlay = new java.util.HashMap[java.lang.String, 
java.lang.String]
+      val opHandle = client.executeStatement(sessionHandle, ddl, confOverlay)
+      var status = client.getOperationStatus(opHandle)
+      while (!status.getState.isTerminal) {
+        Thread.sleep(10)
+        status = client.getOperationStatus(opHandle)
+      }
+      val getCol = client.getColumns(sessionHandle, "", schemaName, tableName, 
null)
+      val rowSet = client.fetchResults(getCol)
+      val columns = rowSet.toTRowSet.getColumns
+      assert(columns.get(0).getStringVal.getValues.asScala.forall(_.isEmpty),
+        "catalog name mismatches")
+
+      assert(columns.get(1).getStringVal.getValues.asScala.forall(_ == 
schemaName),
+        "schema name mismatches")
+
+      assert(columns.get(2).getStringVal.getValues.asScala.forall(_ == 
tableName),
+        "table name mismatches")
+
+      // column name
+      columns.get(3).getStringVal.getValues.asScala.zipWithIndex.foreach {
+        case (v, i) => assert(v === ('a' + i).toChar.toString, "column name 
mismatches")
+      }
+
+      val javaTypes = columns.get(4).getI32Val.getValues
+      assert(javaTypes.get(3).intValue() === java.sql.Types.DECIMAL)
+      assert(javaTypes.get(6).intValue() === java.sql.Types.JAVA_OBJECT)
+
+      val typeNames = columns.get(5).getStringVal.getValues
+      assert(typeNames.get(3) === decimalType.sql)
+
+      val colSize = columns.get(6).getI32Val.getValues
+      assert(colSize.get(3).intValue() === decimalType.defaultSize)

Review comment:
       Could you check all the elements in `colSize`? (Rather, I think we need 
to check all elements in the fetched `rowSet`.)




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to