[ 
https://issues.apache.org/jira/browse/SPARK-56919?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

loong updated SPARK-56919:
--------------------------
    Description: 
Exception stack trace as follows:

{panel}
org.apache.spark.sql.AnalysisException: Path does not exist: 
hdfs://hdfsHACluster/user/hive/warehouse/rljl.db/fc_face_identified_dossier
        at 
org.apache.spark.sql.errors.QueryCompilationErrors$.dataPathNotExistError(QueryCompilationErrors.scala:1011)
 ~[spark-catalyst_2.12-3.3.2.jar:3.3.2]
        at 
org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$checkAndGlobPathIfNecessary$4(DataSource.scala:785)
 ~[spark-sql_2.12.jar:3.3.2]
        at 
org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$checkAndGlobPathIfNecessary$4$adapted(DataSource.scala:782)
 ~[spark-sql_2.12-3.3.2.jar:3.3.2]
        at 
org.apache.spark.util.ThreadUtils$.$anonfun$parmap$2(ThreadUtils.scala:372) 
~[spark-core_2.12-3.3.2.jar:3.3.2]
        at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659) 
~[scala-library-2.12.15.jar:?]
        at scala.util.Success.$anonfun$map$1(Try.scala:255) 
~[scala-library-2.12.15.jar:?]
        at scala.util.Success.map(Try.scala:213) ~[scala-library-2.12.15.jar:?]
        at scala.concurrent.Future.$anonfun$map$1(Future.scala:292) 
~[scala-library-2.12.15.jar:?]
        at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33) 
~[scala-library-2.12.15.jar:?]
        at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33) 
~[scala-library-2.12.15.jar:?]
        at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64) 
~[scala-library-2.12.15.jar:?]
        at 
java.util.concurrent.ForkJoinTask$RunnableExecuteAction.exec(ForkJoinTask.java:1402)
 ~[?:1.8.0_342]
        at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289) 
~[?:1.8.0_342]
        at 
java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056) 
~[?:1.8.0_342]
        at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692) 
~[?:1.8.0_342]
        at 
java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175) 
~[?:1.8.0_342]
{panel}
How to reproduce?

{code:sql}
create table test_insertinto(id int,name string,day int, month int,year int) 
stored as parquet;
{code}
{code:scala}
    // Schema
    val schema = StructType(
      List(
        StructField("id", IntegerType, true),
        StructField("name", StringType, true),
        StructField("day", IntegerType, true),
        StructField("month", IntegerType, true),
        StructField("year", IntegerType, true)
      )
    )
    val df: DataFrame = spark.createDataFrame(rowRDD, schema)

    val test: Boolean = args(0).toBoolean
    df.filter(r => if (r.get(0) == 4) r.get(5) != null else true) // produce 
exception
      .repartition(col("day"), col("month"), col("year")) // mutli stages
      .sortWithinPartitions("year", "month", "day", "id")
      .write
      .mode(SaveMode.Overwrite)
      .insertInto("test_insertinto")
{code}

  was:
Exception stack trace as follows:

{panel}
org.apache.spark.sql.AnalysisException: Path does not exist: 
hdfs://hdfsHACluster/user/hive/warehouse/rljl.db/fc_face_identified_dossier
        at 
org.apache.spark.sql.errors.QueryCompilationErrors$.dataPathNotExistError(QueryCompilationErrors.scala:1011)
 ~[spark-catalyst_2.12-3.3.2.jar:3.3.2]
        at 
org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$checkAndGlobPathIfNecessary$4(DataSource.scala:785)
 ~[spark-sql_2.12-3.3.2-HDP.jar:3.3.2]
        at 
org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$checkAndGlobPathIfNecessary$4$adapted(DataSource.scala:782)
 ~[spark-sql_2.12-3.3.2.jar:3.3.2]
        at 
org.apache.spark.util.ThreadUtils$.$anonfun$parmap$2(ThreadUtils.scala:372) 
~[spark-core_2.12-3.3.2.jar:3.3.2]
        at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659) 
~[scala-library-2.12.15.jar:?]
        at scala.util.Success.$anonfun$map$1(Try.scala:255) 
~[scala-library-2.12.15.jar:?]
        at scala.util.Success.map(Try.scala:213) ~[scala-library-2.12.15.jar:?]
        at scala.concurrent.Future.$anonfun$map$1(Future.scala:292) 
~[scala-library-2.12.15.jar:?]
        at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33) 
~[scala-library-2.12.15.jar:?]
        at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33) 
~[scala-library-2.12.15.jar:?]
        at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64) 
~[scala-library-2.12.15.jar:?]
        at 
java.util.concurrent.ForkJoinTask$RunnableExecuteAction.exec(ForkJoinTask.java:1402)
 ~[?:1.8.0_342]
        at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289) 
~[?:1.8.0_342]
        at 
java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056) 
~[?:1.8.0_342]
        at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692) 
~[?:1.8.0_342]
        at 
java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175) 
~[?:1.8.0_342]
{panel}
How to reproduce?

{code:sql}
create table test_insertinto(id int,name string,day int, month int,year int) 
stored as parquet;
{code}
{code:scala}
    // Schema
    val schema = StructType(
      List(
        StructField("id", IntegerType, true),
        StructField("name", StringType, true),
        StructField("day", IntegerType, true),
        StructField("month", IntegerType, true),
        StructField("year", IntegerType, true)
      )
    )
    val df: DataFrame = spark.createDataFrame(rowRDD, schema)

    val test: Boolean = args(0).toBoolean
    df.filter(r => if (r.get(0) == 4) r.get(5) != null else true) // produce 
exception
      .repartition(col("day"), col("month"), col("year")) // mutli stages
      .sortWithinPartitions("year", "month", "day", "id")
      .write
      .mode(SaveMode.Overwrite)
      .insertInto("test_insertinto")
{code}


> Path does not exist when insert overwrite failed.
> -------------------------------------------------
>
>                 Key: SPARK-56919
>                 URL: https://issues.apache.org/jira/browse/SPARK-56919
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 3.3.2
>            Reporter: loong
>            Priority: Major
>
> Exception stack trace as follows:
> {panel}
> org.apache.spark.sql.AnalysisException: Path does not exist: 
> hdfs://hdfsHACluster/user/hive/warehouse/rljl.db/fc_face_identified_dossier
>       at 
> org.apache.spark.sql.errors.QueryCompilationErrors$.dataPathNotExistError(QueryCompilationErrors.scala:1011)
>  ~[spark-catalyst_2.12-3.3.2.jar:3.3.2]
>       at 
> org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$checkAndGlobPathIfNecessary$4(DataSource.scala:785)
>  ~[spark-sql_2.12.jar:3.3.2]
>       at 
> org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$checkAndGlobPathIfNecessary$4$adapted(DataSource.scala:782)
>  ~[spark-sql_2.12-3.3.2.jar:3.3.2]
>       at 
> org.apache.spark.util.ThreadUtils$.$anonfun$parmap$2(ThreadUtils.scala:372) 
> ~[spark-core_2.12-3.3.2.jar:3.3.2]
>       at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659) 
> ~[scala-library-2.12.15.jar:?]
>       at scala.util.Success.$anonfun$map$1(Try.scala:255) 
> ~[scala-library-2.12.15.jar:?]
>       at scala.util.Success.map(Try.scala:213) ~[scala-library-2.12.15.jar:?]
>       at scala.concurrent.Future.$anonfun$map$1(Future.scala:292) 
> ~[scala-library-2.12.15.jar:?]
>       at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33) 
> ~[scala-library-2.12.15.jar:?]
>       at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33) 
> ~[scala-library-2.12.15.jar:?]
>       at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64) 
> ~[scala-library-2.12.15.jar:?]
>       at 
> java.util.concurrent.ForkJoinTask$RunnableExecuteAction.exec(ForkJoinTask.java:1402)
>  ~[?:1.8.0_342]
>       at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289) 
> ~[?:1.8.0_342]
>       at 
> java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056) 
> ~[?:1.8.0_342]
>       at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692) 
> ~[?:1.8.0_342]
>       at 
> java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175) 
> ~[?:1.8.0_342]
> {panel}
> How to reproduce?
> {code:sql}
> create table test_insertinto(id int,name string,day int, month int,year int) 
> stored as parquet;
> {code}
> {code:scala}
>     // Schema
>     val schema = StructType(
>       List(
>         StructField("id", IntegerType, true),
>         StructField("name", StringType, true),
>         StructField("day", IntegerType, true),
>         StructField("month", IntegerType, true),
>         StructField("year", IntegerType, true)
>       )
>     )
>     val df: DataFrame = spark.createDataFrame(rowRDD, schema)
>     val test: Boolean = args(0).toBoolean
>     df.filter(r => if (r.get(0) == 4) r.get(5) != null else true) // produce 
> exception
>       .repartition(col("day"), col("month"), col("year")) // mutli stages
>       .sortWithinPartitions("year", "month", "day", "id")
>       .write
>       .mode(SaveMode.Overwrite)
>       .insertInto("test_insertinto")
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to