yaooqinn commented on a change in pull request #26942: [SPARK-30301][SQL] Fix 
wrong results when datetimes as fields of complex types
URL: https://github.com/apache/spark/pull/26942#discussion_r397631010
 
 

 ##########
 File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
 ##########
 @@ -56,78 +56,41 @@ object HiveResult {
       // We need the types so we can output struct field names
       val types = executedPlan.output.map(_.dataType)
       // Reformat to match hive tab delimited output.
-      result.map(_.zip(types).map(toHiveString)).map(_.mkString("\t"))
+      result.map(_.zip(types).map(e => toHiveString(e)))
+        .map(_.mkString("\t"))
   }
 
-  private val primitiveTypes = Seq(
-    StringType,
-    IntegerType,
-    LongType,
-    DoubleType,
-    FloatType,
-    BooleanType,
-    ByteType,
-    ShortType,
-    DateType,
-    TimestampType,
-    BinaryType)
-
   private lazy val zoneId = 
DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone)
   private lazy val dateFormatter = DateFormatter(zoneId)
   private lazy val timestampFormatter = 
TimestampFormatter.getFractionFormatter(zoneId)
 
-  /** Hive outputs fields of structs slightly differently than top level 
attributes. */
-  private def toHiveStructString(a: (Any, DataType)): String = a match {
-    case (struct: Row, StructType(fields)) =>
-      struct.toSeq.zip(fields).map {
-        case (v, t) => s""""${t.name}":${toHiveStructString((v, 
t.dataType))}"""
-      }.mkString("{", ",", "}")
-    case (seq: Seq[_], ArrayType(typ, _)) =>
-      seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
-    case (map: Map[_, _], MapType(kType, vType, _)) =>
-      map.map {
-        case (key, value) =>
-          toHiveStructString((key, kType)) + ":" + toHiveStructString((value, 
vType))
-      }.toSeq.sorted.mkString("{", ",", "}")
-    case (null, _) => "null"
-    case (s: String, StringType) => "\"" + s + "\""
-    case (decimal, DecimalType()) => decimal.toString
-    case (interval: CalendarInterval, CalendarIntervalType) =>
-      SQLConf.get.intervalOutputStyle match {
-        case SQL_STANDARD => toSqlStandardString(interval)
-        case ISO_8601 => toIso8601String(interval)
-        case MULTI_UNITS => toMultiUnitsString(interval)
-      }
-    case (other, tpe) if primitiveTypes contains tpe => other.toString
-  }
-
   /** Formats a datum (based on the given data type) and returns the string 
representation. */
-  def toHiveString(a: (Any, DataType)): String = a match {
-    case (struct: Row, StructType(fields)) =>
-      struct.toSeq.zip(fields).map {
-        case (v, t) => s""""${t.name}":${toHiveStructString((v, 
t.dataType))}"""
-      }.mkString("{", ",", "}")
-    case (seq: Seq[_], ArrayType(typ, _)) =>
-      seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
-    case (map: Map[_, _], MapType(kType, vType, _)) =>
-      map.map {
-        case (key, value) =>
-          toHiveStructString((key, kType)) + ":" + toHiveStructString((value, 
vType))
-      }.toSeq.sorted.mkString("{", ",", "}")
-    case (null, _) => "NULL"
+  def toHiveString(a: (Any, DataType), nested: Boolean = false): String = a 
match {
+    case (null, _) => if (nested) "null" else "NULL"
+    case (b, BooleanType) => b.toString
     case (d: Date, DateType) => 
dateFormatter.format(DateTimeUtils.fromJavaDate(d))
     case (t: Timestamp, TimestampType) =>
-      DateTimeUtils.timestampToString(timestampFormatter, 
DateTimeUtils.fromJavaTimestamp(t))
+      timestampFormatter.format(DateTimeUtils.fromJavaTimestamp(t))
     case (bin: Array[Byte], BinaryType) => new String(bin, 
StandardCharsets.UTF_8)
 
 Review comment:
   ```sql
   0: jdbc:hive2://localhost:10000> desc bt_arr;
   INFO  : Compiling 
command(queryId=hive_20200325062323_10f71e38-fe8d-4094-aa87-5361df066edb): desc 
bt_arr
   INFO  : Semantic Analysis Completed
   INFO  : Returning Hive schema: 
Schema(fieldSchemas:[FieldSchema(name:col_name, type:string, comment:from 
deserializer), FieldSchema(name:data_type, type:string, comment:from 
deserializer), FieldSchema(name:comment, type:string, comment:from 
deserializer)], properties:null)
   INFO  : Completed compiling 
command(queryId=hive_20200325062323_10f71e38-fe8d-4094-aa87-5361df066edb); Time 
taken: 0.056 seconds
   INFO  : Concurrency mode is disabled, not creating a lock manager
   INFO  : Executing 
command(queryId=hive_20200325062323_10f71e38-fe8d-4094-aa87-5361df066edb): desc 
bt_arr
   INFO  : Starting task [Stage-0:DDL] in serial mode
   INFO  : Completed executing 
command(queryId=hive_20200325062323_10f71e38-fe8d-4094-aa87-5361df066edb); Time 
taken: 0.02 seconds
   INFO  : OK
   +-----------+----------------+----------+--+
   | col_name  |   data_type    | comment  |
   +-----------+----------------+----------+--+
   | _c0       | array<binary>  |          |
   +-----------+----------------+----------+--+
   1 row selected (0.158 seconds)
   0: jdbc:hive2://localhost:10000> select * from bt_arr;
   INFO  : Compiling 
command(queryId=hive_20200325062323_edfa2e37-a8da-481b-97e9-f4ed9a37b9a4): 
select * from bt_arr
   INFO  : Semantic Analysis Completed
   INFO  : Returning Hive schema: 
Schema(fieldSchemas:[FieldSchema(name:bt_arr._c0, type:array<binary>, 
comment:null)], properties:null)
   INFO  : Completed compiling 
command(queryId=hive_20200325062323_edfa2e37-a8da-481b-97e9-f4ed9a37b9a4); Time 
taken: 0.053 seconds
   INFO  : Concurrency mode is disabled, not creating a lock manager
   INFO  : Executing 
command(queryId=hive_20200325062323_edfa2e37-a8da-481b-97e9-f4ed9a37b9a4): 
select * from bt_arr
   INFO  : Completed executing 
command(queryId=hive_20200325062323_edfa2e37-a8da-481b-97e9-f4ed9a37b9a4); Time 
taken: 0.001 seconds
   INFO  : OK
   +-------------+--+
   | bt_arr._c0  |
   +-------------+--+
   | [spark]     |
   | [hello]     |
   | [3]         |
   | [.]         |
   | [0]         |
   +-------------+--+
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to