wForget commented on code in PR #47303:
URL: https://github.com/apache/spark/pull/47303#discussion_r1675201152
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala:
##########
@@ -2685,18 +2685,32 @@ case class Chr(child: Expression)
case class Base64(child: Expression)
extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+ lazy val chunkBase64: Boolean = SQLConf.get.chunkBase64StringEnabled
+ lazy val encoder: JBase64.Encoder = if (chunkBase64) {
+ JBase64.getMimeEncoder
+ } else {
+ JBase64.getMimeEncoder(-1, Array())
+ }
+
override def dataType: DataType = SQLConf.get.defaultStringType
override def inputTypes: Seq[DataType] = Seq(BinaryType)
protected override def nullSafeEval(bytes: Any): Any = {
-
UTF8String.fromBytes(JBase64.getMimeEncoder.encode(bytes.asInstanceOf[Array[Byte]]))
+ UTF8String.fromBytes(encoder.encode(bytes.asInstanceOf[Array[Byte]]))
}
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, (child) => {
- s"""${ev.value} = UTF8String.fromBytes(
- ${classOf[JBase64].getName}.getMimeEncoder().encode($child));
- """})
+ if (chunkBase64) {
+ s"""${ev.value} = UTF8String.fromBytes(
+ ${classOf[JBase64].getName}.getMimeEncoder().encode($child));
Review Comment:
> Why don't we use the encoder directly?
`java.util.Base64$Encoder` is not serializable.
```
-- !query
select base64(c7), base64(c8), base64(v), ascii(s) from char_tbl4
-- !query schema
struct<>
-- !query output
java.io.NotSerializableException
java.util.Base64$Encoder
Serialization stack:
- object not serializable (class: java.util.Base64$Encoder, value:
java.util.Base64$Encoder@423ed07f)
- element of array (index: 2)
- array (class [Ljava.lang.Object;, size 5)
- field (class:
org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory, name:
org$apache$spark$sql$execution$WholeStageCodegenEvaluatorFactory$$references,
type: class [Ljava.lang.Object;)
- object (class
org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory,
org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory@2fd9633e)
- element of array (index: 0)
- array (class [Ljava.lang.Object;, size 1)
- field (class: java.lang.invoke.SerializedLambda, name: capturedArgs,
type: class [Ljava.lang.Object;)
- object (class java.lang.invoke.SerializedLambda,
SerializedLambda[capturingClass=class
org.apache.spark.sql.execution.WholeStageCodegenExec,
functionalInterfaceMethod=scala/Function2.apply:(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;,
implementation=invokeStatic
org/apache/spark/sql/execution/WholeStageCodegenExec.$anonfun$doExecute$4$adapted:(Lorg/apache/spark/sql/execution/WholeStageCodegenEvaluatorFactory;Ljava/lang/Object;Lscala/collection/Iterator;)Lscala/collection/Iterator;,
instantiatedMethodType=(Ljava/lang/Object;Lscala/collection/Iterator;)Lscala/collection/Iterator;,
numCaptured=1])
- writeReplace data (class: java.lang.invoke.SerializedLambda)
- object (class
org.apache.spark.sql.execution.WholeStageCodegenExec$$Lambda$2458/0x000002cf3e949c30,
org.apache.spark.sql.execution.WholeStageCodegenExec$$Lambda$2458/0x000002cf3e949c30@603a0fa7)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]