Tanel Kiis created SPARK-32157: ---------------------------------- Summary: Integer overflow when constructing large query plan string Key: SPARK-32157 URL: https://issues.apache.org/jira/browse/SPARK-32157 Project: Spark Issue Type: Bug Components: SQL Affects Versions: 3.0.0 Reporter: Tanel Kiis
When the length of the string representation of the query plan in org.apache.spark.sql.catalyst.util.StringUtils.PlanStringConcat goes above Integer.MAX_VALUE, then the query can end with either of these two exception: "spark.sql.maxPlanStringLength" was set to 0: {noformat} java.lang.NegativeArraySizeException at java.lang.AbstractStringBuilder.<init>(AbstractStringBuilder.java:68) at java.lang.StringBuilder.<init>(StringBuilder.java:101) at org.apache.spark.sql.catalyst.util.StringUtils$StringConcat.toString(StringUtils.scala:136) at org.apache.spark.sql.catalyst.util.StringUtils$PlanStringConcat.toString(StringUtils.scala:163) at org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:208) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:95) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:944) at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:396) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:380) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:269) at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:829) {noformat} "spark.sql.maxPlanStringLength" was at the default value: {noformat} java.lang.StringIndexOutOfBoundsException: String index out of range: -47 at java.lang.String.substring(String.java:1967) at org.apache.spark.sql.catalyst.util.StringUtils$StringConcat.append(StringUtils.scala:123) at org.apache.spark.sql.execution.QueryExecution.$anonfun$toString$1(QueryExecution.scala:207) at org.apache.spark.sql.execution.QueryExecution.$anonfun$toString$1$adapted(QueryExecution.scala:207) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$1(TreeNode.scala:663) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$1$adapted(TreeNode.scala:662) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:662) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3(TreeNode.scala:693) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3$adapted(TreeNode.scala:691) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:691) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3(TreeNode.scala:693) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3$adapted(TreeNode.scala:691) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:691) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:687) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:687) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:687) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:687) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3(TreeNode.scala:693) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3$adapted(TreeNode.scala:691) at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:691) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:687) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:687) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3(TreeNode.scala:693) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3$adapted(TreeNode.scala:691) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:691) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3(TreeNode.scala:693) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3$adapted(TreeNode.scala:691) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:691) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:687) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:687) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3(TreeNode.scala:693) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3$adapted(TreeNode.scala:691) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:691) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:687) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:687) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.InputAdapter.generateTreeString(WholeStageCodegenExec.scala:550) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.execution.WholeStageCodegenExec.generateTreeString(WholeStageCodegenExec.scala:795) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:687) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3(TreeNode.scala:693) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3$adapted(TreeNode.scala:691) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:691) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3(TreeNode.scala:693) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3$adapted(TreeNode.scala:691) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:691) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3(TreeNode.scala:693) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3$adapted(TreeNode.scala:691) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:691) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3(TreeNode.scala:693) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3$adapted(TreeNode.scala:691) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:691) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3(TreeNode.scala:693) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3$adapted(TreeNode.scala:691) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:691) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3(TreeNode.scala:693) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$generateTreeString$3$adapted(TreeNode.scala:691) at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:691) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:697) at org.apache.spark.sql.catalyst.trees.TreeNode.treeString(TreeNode.scala:591) at org.apache.spark.sql.catalyst.plans.QueryPlan$.append(QueryPlan.scala:381) at org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$writePlans(QueryExecution.scala:197) at org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:207) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:95) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:944) at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:396) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:380) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:269) at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:829) {noformat} Either way it seems that the "length" field in the "StringConcat" class overflows. -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org