[
https://issues.apache.org/jira/browse/SPARK-56919?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
loong updated SPARK-56919:
--------------------------
Summary: When AQE is enabled, a "Path does not exist" exception may occur
after an insert overwrite operation fails. (was: Path does not exist when
insert overwrite failed.)
> When AQE is enabled, a "Path does not exist" exception may occur after an
> insert overwrite operation fails.
> -----------------------------------------------------------------------------------------------------------
>
> Key: SPARK-56919
> URL: https://issues.apache.org/jira/browse/SPARK-56919
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 3.3.2
> Reporter: loong
> Priority: Major
> Attachments: screenshot-1.png
>
>
> Exception stack trace as follows:
> {panel}
> org.apache.spark.sql.AnalysisException: Path does not exist:
> hdfs://hdfsHACluster/user/hive/warehouse/xxx.db/xxxx
> at
> org.apache.spark.sql.errors.QueryCompilationErrors$.dataPathNotExistError(QueryCompilationErrors.scala:1011)
> ~[spark-catalyst_2.12-3.3.2.jar:3.3.2]
> at
> org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$checkAndGlobPathIfNecessary$4(DataSource.scala:785)
> ~[spark-sql_2.12.jar:3.3.2]
> at
> org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$checkAndGlobPathIfNecessary$4$adapted(DataSource.scala:782)
> ~[spark-sql_2.12-3.3.2.jar:3.3.2]
> at
> org.apache.spark.util.ThreadUtils$.$anonfun$parmap$2(ThreadUtils.scala:372)
> ~[spark-core_2.12-3.3.2.jar:3.3.2]
> at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
> ~[scala-library-2.12.15.jar:?]
> at scala.util.Success.$anonfun$map$1(Try.scala:255)
> ~[scala-library-2.12.15.jar:?]
> at scala.util.Success.map(Try.scala:213) ~[scala-library-2.12.15.jar:?]
> at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
> ~[scala-library-2.12.15.jar:?]
> at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
> ~[scala-library-2.12.15.jar:?]
> at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
> ~[scala-library-2.12.15.jar:?]
> at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
> ~[scala-library-2.12.15.jar:?]
> at
> java.util.concurrent.ForkJoinTask$RunnableExecuteAction.exec(ForkJoinTask.java:1402)
> ~[?:1.8.0_342]
> at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289)
> ~[?:1.8.0_342]
> at
> java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056)
> ~[?:1.8.0_342]
> at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692)
> ~[?:1.8.0_342]
> at
> java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175)
> ~[?:1.8.0_342]
> {panel}
> How to reproduce?
> {code:sql}
> create table test_insertinto(id int,name string,day int, month int,year int)
> stored as parquet;
> {code}
> {code:scala}
> // Schema
> val schema = StructType(
> List(
> StructField("id", IntegerType, true),
> StructField("name", StringType, true),
> StructField("day", IntegerType, true),
> StructField("month", IntegerType, true),
> StructField("year", IntegerType, true)
> )
> )
> val df: DataFrame = spark.createDataFrame(rowRDD, schema)
> val test: Boolean = args(0).toBoolean
> df.filter(r => if (r.get(0) == 4) r.get(5) != null else true) // produce
> exception
> .repartition(col("day"), col("month"), col("year")) // mutli stages
> .sortWithinPartitions("year", "month", "day", "id")
> .write
> .mode(SaveMode.Overwrite)
> .insertInto("test_insertinto")
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]