panbingkun commented on code in PR #48466:
URL: https://github.com/apache/spark/pull/48466#discussion_r1802202487


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala:
##########
@@ -680,53 +678,36 @@ case class JsonToStructs(
         messageParameters = Map("schema" -> toSQLType(nullableSchema)))
   }
 
-  // This converts parsed rows to the desired output by the given schema.
-  @transient
-  lazy val converter = nullableSchema match {
-    case _: StructType =>
-      (rows: Iterator[InternalRow]) => if (rows.hasNext) rows.next() else null
-    case _: ArrayType =>
-      (rows: Iterator[InternalRow]) => if (rows.hasNext) 
rows.next().getArray(0) else null
-    case _: MapType =>
-      (rows: Iterator[InternalRow]) => if (rows.hasNext) rows.next().getMap(0) 
else null
-  }
-
-  val nameOfCorruptRecord = 
SQLConf.get.getConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD)
-  @transient lazy val parser = {
-    val parsedOptions = new JSONOptions(options, timeZoneId.get, 
nameOfCorruptRecord)
-    val mode = parsedOptions.parseMode
-    if (mode != PermissiveMode && mode != FailFastMode) {
-      throw QueryCompilationErrors.parseModeUnsupportedError("from_json", mode)
-    }
-    val (parserSchema, actualSchema) = nullableSchema match {
-      case s: StructType =>
-        ExprUtils.verifyColumnNameOfCorruptRecord(s, 
parsedOptions.columnNameOfCorruptRecord)
-        (s, StructType(s.filterNot(_.name == 
parsedOptions.columnNameOfCorruptRecord)))
-      case other =>
-        (StructType(Array(StructField("value", other))), other)
-    }
-
-    val rawParser = new JacksonParser(actualSchema, parsedOptions, 
allowArrayAsStructs = false)
-    val createParser = CreateJacksonParser.utf8String _
-
-    new FailureSafeParser[UTF8String](
-      input => rawParser.parse(input, createParser, identity[UTF8String]),
-      mode,
-      parserSchema,
-      parsedOptions.columnNameOfCorruptRecord)
-  }
-
   override def dataType: DataType = nullableSchema
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
 
-  override def nullSafeEval(json: Any): Any = nullableSchema match {
-    case _: VariantType =>
-      VariantExpressionEvalUtils.parseJson(json.asInstanceOf[UTF8String],
-        allowDuplicateKeys = variantAllowDuplicateKeys)
-    case _ =>
-      converter(parser.parse(json.asInstanceOf[UTF8String]))
+  @transient
+  private val nameOfCorruptRecord = 
SQLConf.get.getConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD)
+
+  @transient
+  private lazy val evaluator = new JsonToStructsEvaluator(
+    options, nullableSchema, nameOfCorruptRecord, timeZoneId, 
variantAllowDuplicateKeys)
+
+  override def nullSafeEval(json: Any): Any = 
evaluator.evaluate(json.asInstanceOf[UTF8String])
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val refEvaluator = ctx.addReferenceObj("evaluator", evaluator)
+    val eval = child.genCode(ctx)
+    val resultType = CodeGenerator.boxedType(dataType)
+    val resultTerm = ctx.freshName("result")
+    ev.copy(code =
+      code"""
+         |${eval.code}
+         |$resultType $resultTerm = ($resultType) $refEvaluator.evaluate(
+         |  ${eval.isNull} ? null : ${eval.value});

Review Comment:
   The code generated by an example is roughly like this:
   - Before
   ```java
   /* 031 */       boolean localtablescan_isNull_0 = 
localtablescan_row_0.isNullAt(0);
   /* 032 */       UTF8String localtablescan_value_0 = localtablescan_isNull_0 ?
   /* 033 */       null : (localtablescan_row_0.getUTF8String(0));
   /* 034 */       InternalRow project_result_0 = (InternalRow) 
((org.apache.spark.sql.catalyst.expressions.json.JsonToStructsEvaluator) 
references[1] /* evaluator */).evaluate(
   /* 035 */         localtablescan_isNull_0 ? null : localtablescan_value_0);
   /* 036 */       boolean project_isNull_0 = project_result_0 == null;
   /* 037 */       InternalRow project_value_0 = null;
   /* 038 */       if (!project_isNull_0) {
   /* 039 */         project_value_0 = project_result_0;
   /* 040 */       }
   /* 041 */       project_mutableStateArray_0[0].reset();
   /* 042 */
   /* 043 */       project_mutableStateArray_0[0].zeroOutNullBytes();
   ```
   
   - After
   ```java
   /* 031 */       boolean localtablescan_isNull_0 = 
localtablescan_row_0.isNullAt(0);
   /* 032 */       UTF8String localtablescan_value_0 = localtablescan_isNull_0 ?
   /* 033 */       null : (localtablescan_row_0.getUTF8String(0));
   /* 034 */       InternalRow project_result_0 = (InternalRow) 
((org.apache.spark.sql.catalyst.expressions.json.JsonToStructsEvaluator) 
references[1] /* evaluator */).evaluate(localtablescan_value_0);
   /* 035 */       boolean project_isNull_0 = project_result_0 == null;
   /* 036 */       InternalRow project_value_0 = null;
   /* 037 */       if (!project_isNull_0) {
   /* 038 */         project_value_0 = project_result_0;
   /* 039 */       }
   /* 040 */       project_mutableStateArray_0[0].reset();
   /* 041 */
   /* 042 */       project_mutableStateArray_0[0].zeroOutNullBytes();
   ```
   
   - Obviously unnecessary
   <img width="679" alt="image" 
src="https://github.com/user-attachments/assets/384b9995-8aac-4553-89b0-22e2236e5de4";>
   
   - So it has been removed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to