yaooqinn commented on PR #42481:
URL: https://github.com/apache/spark/pull/42481#issuecomment-1687474449

   ```scala
   scala> val df =spark.sql("select regexp_replace('', '[a\\\\d]{0, 2}', 'x')")
   df: org.apache.spark.sql.DataFrame = [regexp_replace(, [a\d]{0, 2}, x, 1): 
string]
   
   scala> val qe = df.queryExecution
   org.apache.spark.SparkRuntimeException: [INVALID_PARAMETER_VALUE.PATTERN] 
The value of parameter(s) `regexp` in `regexp_replace` is invalid: '[a\\d]{0, 
2}'.
     at 
org.apache.spark.sql.errors.QueryExecutionErrors$.invalidPatternError(QueryExecutionErrors.scala:2754)
     at 
org.apache.spark.sql.catalyst.expressions.RegExpReplace.nullSafeEval(regexpExpressions.scala:646)
     at 
org.apache.spark.sql.catalyst.expressions.QuaternaryExpression.eval(Expression.scala:920)
     at 
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.org$apache$spark$sql$catalyst$optimizer$ConstantFolding$$constantFolding(expressions.scala:80)
     at 
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.$anonfun$constantFolding$4(expressions.scala:90)
     at 
org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1249)
     at 
org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1248)
     at 
org.apache.spark.sql.catalyst.expressions.UnaryExpression.mapChildren(Expression.scala:532)
     at 
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.org$apache$spark$sql$catalyst$optimizer$ConstantFolding$$constantFolding(expressions.scala:90)
     at 
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1.$anonfun$applyOrElse$1(expressions.scala:94)
     at 
org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$1(QueryPlan.scala:207)
     at 
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:104)
     at 
org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpression$1(QueryPlan.scala:207)
     at 
org.apache.spark.sql.catalyst.plans.QueryPlan.recursiveTransform$1(QueryPlan.scala:218)
     at 
org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$3(QueryPlan.scala:223)
     at 
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
     at scala.collection.immutable.List.foreach(List.scala:431)
     at scala.collection.TraversableLike.map(TraversableLike.scala:286)
     at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
     at scala.collection.immutable.List.map(List.scala:305)
     at 
org.apache.spark.sql.catalyst.plans.QueryPlan.recursiveTransform$1(QueryPlan.scala:223)
     at 
org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$4(QueryPlan.scala:228)
     at 
org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:355)
     at 
org.apache.spark.sql.catalyst.plans.QueryPlan.mapExpressions(QueryPlan.scala:228)
     at 
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1.applyOrElse(expressions.scala:94)
     at 
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1.applyOrElse(expressions.scala:93)
     at 
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:512)
     at 
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:104)
     at 
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:512)
     at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:31)
     at 
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
     at 
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
     at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
     at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
     at 
org.apache.spark.sql.catalyst.trees.TreeNode.transformWithPruning(TreeNode.scala:478)
     at 
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.apply(expressions.scala:93)
     at 
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.apply(expressions.scala:46)
     at 
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222)
     at 
scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
     at 
scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
     at scala.collection.immutable.List.foldLeft(List.scala:91)
     at 
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219)
     at 
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211)
     at scala.collection.immutable.List.foreach(List.scala:431)
     at 
org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211)
     at 
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182)
     at 
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:88)
     at 
org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182)
     at 
org.apache.spark.sql.execution.QueryExecution.$anonfun$optimizedPlan$1(QueryExecution.scala:143)
     at 
org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
     at 
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:202)
     at 
org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:526)
     at 
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:202)
     at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
     at 
org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:201)
     at 
org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:139)
     at 
org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:135)
     at 
org.apache.spark.sql.execution.QueryExecution.$anonfun$writePlans$4(QueryExecution.scala:285)
     at 
org.apache.spark.sql.catalyst.plans.QueryPlan$.append(QueryPlan.scala:671)
     at 
org.apache.spark.sql.execution.QueryExecution.writePlans(QueryExecution.scala:285)
     at 
org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:302)
     at 
org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:295)
     at scala.runtime.ScalaRunTime$.inner$1(ScalaRunTime.scala:272)
     at scala.runtime.ScalaRunTime$.stringOf(ScalaRunTime.scala:277)
     at scala.runtime.ScalaRunTime$.replStringOf(ScalaRunTime.scala:285)
     at .lzycompute(<console>:9)
     at .$print(<console>:6)
     at $print(<console>)
     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
     at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
     at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
     at java.lang.reflect.Method.invoke(Method.java:498)
     at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)
     at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1020)
     at scala.tools.nsc.interpreter.IMain.$anonfun$interpret$1(IMain.scala:568)
     at 
scala.reflect.internal.util.ScalaClassLoader.asContext(ScalaClassLoader.scala:36)
     at 
scala.reflect.internal.util.ScalaClassLoader.asContext$(ScalaClassLoader.scala:116)
     at 
scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:41)
     at scala.tools.nsc.interpreter.IMain.loadAndRunReq$1(IMain.scala:567)
     at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:594)
     at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:564)
     at scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:865)
     at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:733)
     at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:435)
     at scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:456)
     at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:239)
     at org.apache.spark.repl.Main$.doMain(Main.scala:78)
     at org.apache.spark.repl.Main$.main(Main.scala:58)
     at org.apache.spark.repl.Main.main(Main.scala)
     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
     at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
     at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
     at java.lang.reflect.Method.invoke(Method.java:498)
     at 
org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
     at 
org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:1020)
     at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:192)
     at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:215)
     at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:91)
     at 
org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1111)
     at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1120)
     at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
   Caused by: java.util.regex.PatternSyntaxException: Unclosed counted closure 
near index 8
   [a\d]{0, 2}
           ^
     at java.util.regex.Pattern.error(Pattern.java:1969)
     at java.util.regex.Pattern.closure(Pattern.java:3155)
     at java.util.regex.Pattern.sequence(Pattern.java:2148)
     at java.util.regex.Pattern.expr(Pattern.java:2010)
     at java.util.regex.Pattern.compile(Pattern.java:1702)
     at java.util.regex.Pattern.<init>(Pattern.java:1352)
     at java.util.regex.Pattern.compile(Pattern.java:1028)
     at 
org.apache.spark.sql.catalyst.expressions.RegExpReplace.nullSafeEval(regexpExpressions.scala:643)
     ... 99 more
   
   scala> val qe = df.queryExecution.e
   ensuring   eq   equals   executedPlan   explainString
   
   scala> val qe = 
df.queryExecution.explainString(org.apache.spark.sql.execution.FormattedMode)
   org.apache.spark.SparkException: [INTERNAL_ERROR] The Spark SQL phase 
optimization failed with an internal error. You hit a bug in Spark or the Spark 
plugins you use. Please, report this bug to the corresponding communities or 
vendors, and provide the full stack trace.
     at org.apache.spark.SparkException$.internalError(SparkException.scala:88)
     at 
org.apache.spark.sql.execution.QueryExecution$.toInternalError(QueryExecution.scala:516)
     at 
org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:528)
     at 
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:202)
     at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
     at 
org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:201)
     at 
org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:139)
     at 
org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:135)
     at 
org.apache.spark.sql.execution.QueryExecution.assertOptimized(QueryExecution.scala:153)
     at 
org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:171)
     at 
org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:168)
     at 
org.apache.spark.sql.execution.QueryExecution.simpleString(QueryExecution.scala:221)
     at 
org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$explainString(QueryExecution.scala:266)
     at 
org.apache.spark.sql.execution.QueryExecution.explainString(QueryExecution.scala:235)
     ... 47 elided
   Caused by: java.lang.NullPointerException
     at 
org.apache.spark.sql.catalyst.expressions.RegExpReplace.nullSafeEval(regexpExpressions.scala:657)
     at 
org.apache.spark.sql.catalyst.expressions.QuaternaryExpression.eval(Expression.scala:920)
     at 
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.org$apache$spark$sql$catalyst$optimizer$ConstantFolding$$constantFolding(expressions.scala:80)
     at 
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.$anonfun$constantFolding$4(expressions.scala:90)
     at 
org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1249)
     at 
org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1248)
     at 
org.apache.spark.sql.catalyst.expressions.UnaryExpression.mapChildren(Expression.scala:532)
     at 
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.org$apache$spark$sql$catalyst$optimizer$ConstantFolding$$constantFolding(expressions.scala:90)
     at 
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1.$anonfun$applyOrElse$1(expressions.scala:94)
     at 
org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$1(QueryPlan.scala:207)
     at 
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:104)
     at 
org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpression$1(QueryPlan.scala:207)
     at 
org.apache.spark.sql.catalyst.plans.QueryPlan.recursiveTransform$1(QueryPlan.scala:218)
     at 
org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$3(QueryPlan.scala:223)
     at 
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
     at scala.collection.immutable.List.foreach(List.scala:431)
     at scala.collection.TraversableLike.map(TraversableLike.scala:286)
     at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
     at scala.collection.immutable.List.map(List.scala:305)
     at 
org.apache.spark.sql.catalyst.plans.QueryPlan.recursiveTransform$1(QueryPlan.scala:223)
     at 
org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$4(QueryPlan.scala:228)
     at 
org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:355)
     at 
org.apache.spark.sql.catalyst.plans.QueryPlan.mapExpressions(QueryPlan.scala:228)
     at 
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1.applyOrElse(expressions.scala:94)
     at 
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1.applyOrElse(expressions.scala:93)
     at 
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:512)
     at 
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:104)
     at 
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:512)
     at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:31)
     at 
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
     at 
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
     at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
     at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
     at 
org.apache.spark.sql.catalyst.trees.TreeNode.transformWithPruning(TreeNode.scala:478)
     at 
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.apply(expressions.scala:93)
     at 
org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.apply(expressions.scala:46)
     at 
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222)
     at 
scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
     at 
scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
     at scala.collection.immutable.List.foldLeft(List.scala:91)
     at 
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219)
     at 
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211)
     at scala.collection.immutable.List.foreach(List.scala:431)
     at 
org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211)
     at 
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182)
     at 
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:88)
     at 
org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182)
     at 
org.apache.spark.sql.execution.QueryExecution.$anonfun$optimizedPlan$1(QueryExecution.scala:143)
     at 
org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
     at 
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:202)
     at 
org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:526)
     ... 58 more
   ```
   
   After some investigations with spark 3.4.1, this PR just hit a potential bug 
that we already have


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to