maropu commented on a change in pull request #33363:
URL: https://github.com/apache/spark/pull/33363#discussion_r670913494
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
##########
@@ -240,7 +240,11 @@ trait BaseScriptTransformationExec extends UnaryExecNode {
(f: String => Any, converter: Any => Any) =>
(data: String) => converter {
try {
- f(data)
+ if (data == ioschema.outputRowFormatMap("TOK_TABLEROWFORMATNULL")) {
+ null
+ } else {
+ f(data)
+ }
Review comment:
nit:
```
if (data == ioschema.outputRowFormatMap("TOK_TABLEROWFORMATNULL")) {
null
} else {
try {
f(data)
} catch {
case NonFatal(_) => null
}
}
```
?
##########
File path: sql/core/src/test/resources/sql-tests/inputs/transform.sql
##########
@@ -121,6 +121,38 @@ USING 'cat' AS (d)
NULL DEFINED AS 'NULL'
FROM t;
+SELECT TRANSFORM(a, b, c, null)
+ ROW FORMAT DELIMITED
+ FIELDS TERMINATED BY '@'
+ LINES TERMINATED BY '\n'
+USING 'cat' AS (a, b, c, d)
+ ROW FORMAT DELIMITED
+ FIELDS TERMINATED BY '@'
+ LINES TERMINATED BY '\n'
+ NULL DEFINED AS 'NULL'
Review comment:
What if `NULL DEFINED` includes a meta character, e.g., `\n`? It works
well?
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
##########
@@ -274,11 +278,21 @@ abstract class BaseScriptTransformationWriterThread
extends Thread with Logging
ioSchema.inputRowFormatMap("TOK_TABLEROWFORMATLINES")
} else {
val sb = new StringBuilder
- sb.append(row.get(0, inputSchema(0)))
+ val first = row.get(0, inputSchema(0))
+ if (first == null) {
+ sb.append(ioSchema.inputRowFormatMap("TOK_TABLEROWFORMATNULL"))
+ } else {
+ sb.append(first)
+ }
var i = 1
while (i < len) {
sb.append(ioSchema.inputRowFormatMap("TOK_TABLEROWFORMATFIELD"))
- sb.append(row.get(i, inputSchema(i)))
+ val value = row.get(i, inputSchema(i))
+ if (value == null) {
+ sb.append(ioSchema.inputRowFormatMap("TOK_TABLEROWFORMATNULL"))
+ } else {
+ sb.append(value)
+ }
Review comment:
nit:
```
val sb = new StringBuilder
def appendToBuffer(s: AnyRef): Unit = {
if (s == null) {
sb.append(ioSchema.inputRowFormatMap("TOK_TABLEROWFORMATNULL"))
} else {
sb.append(s)
}
}
val first = row.get(0, inputSchema(0))
appendToBuffer(first)
var i = 1
while (i < len) {
sb.append(ioSchema.inputRowFormatMap("TOK_TABLEROWFORMATFIELD"))
val value = row.get(i, inputSchema(i))
appendToBuffer(value)
i += 1
}
```
?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]