loong created SPARK-56919:
-----------------------------
Summary: Path does not exist when insert overwrite.
Key: SPARK-56919
URL: https://issues.apache.org/jira/browse/SPARK-56919
Project: Spark
Issue Type: Bug
Components: SQL
Affects Versions: 3.3.2
Reporter: loong
Exception stack trace as follows:
{panel}
org.apache.spark.sql.AnalysisException: Path does not exist:
hdfs://hdfsHACluster/user/hive/warehouse/rljl.db/fc_face_identified_dossier
at
org.apache.spark.sql.errors.QueryCompilationErrors$.dataPathNotExistError(QueryCompilationErrors.scala:1011)
~[spark-catalyst_2.12-3.3.2.jar:3.3.2]
at
org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$checkAndGlobPathIfNecessary$4(DataSource.scala:785)
~[spark-sql_2.12-3.3.2-HDP.jar:3.3.2]
at
org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$checkAndGlobPathIfNecessary$4$adapted(DataSource.scala:782)
~[spark-sql_2.12-3.3.2.jar:3.3.2]
at
org.apache.spark.util.ThreadUtils$.$anonfun$parmap$2(ThreadUtils.scala:372)
~[spark-core_2.12-3.3.2.jar:3.3.2]
at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
~[scala-library-2.12.15.jar:?]
at scala.util.Success.$anonfun$map$1(Try.scala:255)
~[scala-library-2.12.15.jar:?]
at scala.util.Success.map(Try.scala:213) ~[scala-library-2.12.15.jar:?]
at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
~[scala-library-2.12.15.jar:?]
at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
~[scala-library-2.12.15.jar:?]
at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
~[scala-library-2.12.15.jar:?]
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
~[scala-library-2.12.15.jar:?]
at
java.util.concurrent.ForkJoinTask$RunnableExecuteAction.exec(ForkJoinTask.java:1402)
~[?:1.8.0_342]
at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289)
~[?:1.8.0_342]
at
java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056)
~[?:1.8.0_342]
at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692)
~[?:1.8.0_342]
at
java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175)
~[?:1.8.0_342]
{panel}
How to reproduce?
{code:sql}
create table test_insertinto(id int,name string,day int, month int,year int)
stored as parquet;
{code}
{code:scala}
// 定义Schema
val schema = StructType(
List(
StructField("id", IntegerType, true),
StructField("name", StringType, true),
StructField("day", IntegerType, true),
StructField("month", IntegerType, true),
StructField("year", IntegerType, true)
)
)
val df: DataFrame = spark.createDataFrame(rowRDD, schema)
val test: Boolean = args(0).toBoolean
df.filter(r => if (r.get(0) == 4) r.get(5) != null else true) // produce
exception
.repartition(col("day"), col("month"), col("year")) // mutli stages
.sortWithinPartitions("year", "month", "day", "id")
.write
.mode(SaveMode.Overwrite)
.insertInto("test_insertinto")
{code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]