maropu commented on a change in pull request #33363:
URL: https://github.com/apache/spark/pull/33363#discussion_r670913494



##########
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
##########
@@ -240,7 +240,11 @@ trait BaseScriptTransformationExec extends UnaryExecNode {
     (f: String => Any, converter: Any => Any) =>
       (data: String) => converter {
         try {
-          f(data)
+          if (data == ioschema.outputRowFormatMap("TOK_TABLEROWFORMATNULL")) {
+            null
+          } else {
+            f(data)
+          }

Review comment:
       nit:
   ```
           if (data == ioschema.outputRowFormatMap("TOK_TABLEROWFORMATNULL")) {
             null
           } else {
             try {
               f(data)
             } catch {
               case NonFatal(_) => null
             }
           }
   ```
   ?

##########
File path: sql/core/src/test/resources/sql-tests/inputs/transform.sql
##########
@@ -121,6 +121,38 @@ USING 'cat' AS (d)
   NULL DEFINED AS 'NULL'
 FROM t;
 
+SELECT TRANSFORM(a, b, c, null)
+  ROW FORMAT DELIMITED
+  FIELDS TERMINATED BY '@'
+  LINES TERMINATED BY '\n'
+USING 'cat' AS (a, b, c, d)
+  ROW FORMAT DELIMITED
+  FIELDS TERMINATED BY '@'
+  LINES TERMINATED BY '\n'
+  NULL DEFINED AS 'NULL'

Review comment:
       What if `NULL DEFINED` includes a meta character, e.g., `\n`? It works 
well?

##########
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
##########
@@ -274,11 +278,21 @@ abstract class BaseScriptTransformationWriterThread 
extends Thread with Logging
         ioSchema.inputRowFormatMap("TOK_TABLEROWFORMATLINES")
       } else {
         val sb = new StringBuilder
-        sb.append(row.get(0, inputSchema(0)))
+        val first = row.get(0, inputSchema(0))
+        if (first == null) {
+          sb.append(ioSchema.inputRowFormatMap("TOK_TABLEROWFORMATNULL"))
+        } else {
+          sb.append(first)
+        }
         var i = 1
         while (i < len) {
           sb.append(ioSchema.inputRowFormatMap("TOK_TABLEROWFORMATFIELD"))
-          sb.append(row.get(i, inputSchema(i)))
+          val value = row.get(i, inputSchema(i))
+          if (value == null) {
+            sb.append(ioSchema.inputRowFormatMap("TOK_TABLEROWFORMATNULL"))
+          } else {
+            sb.append(value)
+          }

Review comment:
       nit:
   ```
           val sb = new StringBuilder
           def appendToBuffer(s: AnyRef): Unit = {
             if (s == null) {
               sb.append(ioSchema.inputRowFormatMap("TOK_TABLEROWFORMATNULL"))
             } else {
               sb.append(s)
             }
           }
           val first = row.get(0, inputSchema(0))
           appendToBuffer(first)
           var i = 1
           while (i < len) {
             sb.append(ioSchema.inputRowFormatMap("TOK_TABLEROWFORMATFIELD"))
             val value = row.get(i, inputSchema(i))
             appendToBuffer(value)
             i += 1
           }
   ```
   ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to