yaooqinn commented on PR #42481:
URL: https://github.com/apache/spark/pull/42481#issuecomment-1687474449
```scala
scala> val df =spark.sql("select regexp_replace('', '[a\\\\d]{0, 2}', 'x')")
df: org.apache.spark.sql.DataFrame = [regexp_replace(, [a\d]{0, 2}, x, 1):
string]
scala> val qe = df.queryExecution
org.apache.spark.SparkRuntimeException: [INVALID_PARAMETER_VALUE.PATTERN]
The value of parameter(s) `regexp` in `regexp_replace` is invalid: '[a\\d]{0,
2}'.
at
org.apache.spark.sql.errors.QueryExecutionErrors$.invalidPatternError(QueryExecutionErrors.scala:2754)
at
org.apache.spark.sql.catalyst.expressions.RegExpReplace.nullSafeEval(regexpExpressions.scala:646)
at
org.apache.spark.sql.catalyst.expressions.QuaternaryExpression.eval(Expression.scala:920)
at
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.org$apache$spark$sql$catalyst$optimizer$ConstantFolding$$constantFolding(expressions.scala:80)
at
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.$anonfun$constantFolding$4(expressions.scala:90)
at
org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1249)
at
org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1248)
at
org.apache.spark.sql.catalyst.expressions.UnaryExpression.mapChildren(Expression.scala:532)
at
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.org$apache$spark$sql$catalyst$optimizer$ConstantFolding$$constantFolding(expressions.scala:90)
at
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1.$anonfun$applyOrElse$1(expressions.scala:94)
at
org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$1(QueryPlan.scala:207)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:104)
at
org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpression$1(QueryPlan.scala:207)
at
org.apache.spark.sql.catalyst.plans.QueryPlan.recursiveTransform$1(QueryPlan.scala:218)
at
org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$3(QueryPlan.scala:223)
at
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
at scala.collection.immutable.List.foreach(List.scala:431)
at scala.collection.TraversableLike.map(TraversableLike.scala:286)
at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
at scala.collection.immutable.List.map(List.scala:305)
at
org.apache.spark.sql.catalyst.plans.QueryPlan.recursiveTransform$1(QueryPlan.scala:223)
at
org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$4(QueryPlan.scala:228)
at
org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:355)
at
org.apache.spark.sql.catalyst.plans.QueryPlan.mapExpressions(QueryPlan.scala:228)
at
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1.applyOrElse(expressions.scala:94)
at
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1.applyOrElse(expressions.scala:93)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:512)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:104)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:512)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:31)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformWithPruning(TreeNode.scala:478)
at
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.apply(expressions.scala:93)
at
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.apply(expressions.scala:46)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222)
at
scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
at
scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
at scala.collection.immutable.List.foldLeft(List.scala:91)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211)
at scala.collection.immutable.List.foreach(List.scala:431)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:88)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$optimizedPlan$1(QueryExecution.scala:143)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:202)
at
org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:526)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:202)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
at
org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:201)
at
org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:139)
at
org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:135)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$writePlans$4(QueryExecution.scala:285)
at
org.apache.spark.sql.catalyst.plans.QueryPlan$.append(QueryPlan.scala:671)
at
org.apache.spark.sql.execution.QueryExecution.writePlans(QueryExecution.scala:285)
at
org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:302)
at
org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:295)
at scala.runtime.ScalaRunTime$.inner$1(ScalaRunTime.scala:272)
at scala.runtime.ScalaRunTime$.stringOf(ScalaRunTime.scala:277)
at scala.runtime.ScalaRunTime$.replStringOf(ScalaRunTime.scala:285)
at .lzycompute(<console>:9)
at .$print(<console>:6)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)
at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1020)
at scala.tools.nsc.interpreter.IMain.$anonfun$interpret$1(IMain.scala:568)
at
scala.reflect.internal.util.ScalaClassLoader.asContext(ScalaClassLoader.scala:36)
at
scala.reflect.internal.util.ScalaClassLoader.asContext$(ScalaClassLoader.scala:116)
at
scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:41)
at scala.tools.nsc.interpreter.IMain.loadAndRunReq$1(IMain.scala:567)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:594)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:564)
at scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:865)
at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:733)
at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:435)
at scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:456)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:239)
at org.apache.spark.repl.Main$.doMain(Main.scala:78)
at org.apache.spark.repl.Main$.main(Main.scala:58)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at
org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:1020)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:192)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:215)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:91)
at
org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1111)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1120)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.util.regex.PatternSyntaxException: Unclosed counted closure
near index 8
[a\d]{0, 2}
^
at java.util.regex.Pattern.error(Pattern.java:1969)
at java.util.regex.Pattern.closure(Pattern.java:3155)
at java.util.regex.Pattern.sequence(Pattern.java:2148)
at java.util.regex.Pattern.expr(Pattern.java:2010)
at java.util.regex.Pattern.compile(Pattern.java:1702)
at java.util.regex.Pattern.<init>(Pattern.java:1352)
at java.util.regex.Pattern.compile(Pattern.java:1028)
at
org.apache.spark.sql.catalyst.expressions.RegExpReplace.nullSafeEval(regexpExpressions.scala:643)
... 99 more
scala> val qe = df.queryExecution.e
ensuring eq equals executedPlan explainString
scala> val qe =
df.queryExecution.explainString(org.apache.spark.sql.execution.FormattedMode)
org.apache.spark.SparkException: [INTERNAL_ERROR] The Spark SQL phase
optimization failed with an internal error. You hit a bug in Spark or the Spark
plugins you use. Please, report this bug to the corresponding communities or
vendors, and provide the full stack trace.
at org.apache.spark.SparkException$.internalError(SparkException.scala:88)
at
org.apache.spark.sql.execution.QueryExecution$.toInternalError(QueryExecution.scala:516)
at
org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:528)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:202)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
at
org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:201)
at
org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:139)
at
org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:135)
at
org.apache.spark.sql.execution.QueryExecution.assertOptimized(QueryExecution.scala:153)
at
org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:171)
at
org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:168)
at
org.apache.spark.sql.execution.QueryExecution.simpleString(QueryExecution.scala:221)
at
org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$explainString(QueryExecution.scala:266)
at
org.apache.spark.sql.execution.QueryExecution.explainString(QueryExecution.scala:235)
... 47 elided
Caused by: java.lang.NullPointerException
at
org.apache.spark.sql.catalyst.expressions.RegExpReplace.nullSafeEval(regexpExpressions.scala:657)
at
org.apache.spark.sql.catalyst.expressions.QuaternaryExpression.eval(Expression.scala:920)
at
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.org$apache$spark$sql$catalyst$optimizer$ConstantFolding$$constantFolding(expressions.scala:80)
at
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.$anonfun$constantFolding$4(expressions.scala:90)
at
org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1249)
at
org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1248)
at
org.apache.spark.sql.catalyst.expressions.UnaryExpression.mapChildren(Expression.scala:532)
at
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.org$apache$spark$sql$catalyst$optimizer$ConstantFolding$$constantFolding(expressions.scala:90)
at
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1.$anonfun$applyOrElse$1(expressions.scala:94)
at
org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$1(QueryPlan.scala:207)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:104)
at
org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpression$1(QueryPlan.scala:207)
at
org.apache.spark.sql.catalyst.plans.QueryPlan.recursiveTransform$1(QueryPlan.scala:218)
at
org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$3(QueryPlan.scala:223)
at
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
at scala.collection.immutable.List.foreach(List.scala:431)
at scala.collection.TraversableLike.map(TraversableLike.scala:286)
at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
at scala.collection.immutable.List.map(List.scala:305)
at
org.apache.spark.sql.catalyst.plans.QueryPlan.recursiveTransform$1(QueryPlan.scala:223)
at
org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$4(QueryPlan.scala:228)
at
org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:355)
at
org.apache.spark.sql.catalyst.plans.QueryPlan.mapExpressions(QueryPlan.scala:228)
at
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1.applyOrElse(expressions.scala:94)
at
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1.applyOrElse(expressions.scala:93)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:512)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:104)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:512)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:31)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformWithPruning(TreeNode.scala:478)
at
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.apply(expressions.scala:93)
at
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.apply(expressions.scala:46)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222)
at
scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
at
scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
at scala.collection.immutable.List.foldLeft(List.scala:91)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211)
at scala.collection.immutable.List.foreach(List.scala:431)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:88)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$optimizedPlan$1(QueryExecution.scala:143)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:202)
at
org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:526)
... 58 more
```
After some investigations with spark 3.4.1, this PR just hit a potential bug
that we already have
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]