[ https://issues.apache.org/jira/browse/SPARK-52612?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Yang Jie updated SPARK-52612: ----------------------------- Description: After the merge of SPARK-48763, when we execute the command `./build/sbt clean Test/package`, we will find that the `spark-avro-*.jar` file is being collected into the `assembly/target/scala-2.13/jars` directory (which did not happen before). This will lead to potential errors like the following when we run benchmarks using Github Actions: {code:java} 25/06/28 07:03:45 ERROR SparkContext: Failed to add file:///home/runner/work/spark/spark/assembly/target/scala-2.13/jars/spark-avro_2.13-4.1.0-SNAPSHOT.jar to Spark environment java.lang.IllegalArgumentException: requirement failed: File spark-avro_2.13-4.1.0-SNAPSHOT.jar was already registered with a different path (old path = /home/runner/work/spark/spark/connector/avro/target/scala-2.13/spark-avro_2.13-4.1.0-SNAPSHOT.jar, new path = /home/runner/work/spark/spark/assembly/target/scala-2.13/jars/spark-avro_2.13-4.1.0-SNAPSHOT.jar at scala.Predef$.require(Predef.scala:337) at org.apache.spark.rpc.netty.NettyStreamManager.addJar(NettyStreamManager.scala:85) at org.apache.spark.SparkContext.addLocalJarFile$1(SparkContext.scala:2184) at org.apache.spark.SparkContext.addJar(SparkContext.scala:2233) at org.apache.spark.SparkContext.$anonfun$new$15(SparkContext.scala:538) at org.apache.spark.SparkContext.$anonfun$new$15$adapted(SparkContext.scala:538) at scala.collection.IterableOnceOps.foreach(IterableOnce.scala:619) at scala.collection.IterableOnceOps.foreach$(IterableOnce.scala:617) at scala.collection.AbstractIterable.foreach(Iterable.scala:935) at org.apache.spark.SparkContext.<init>(SparkContext.scala:538) at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:3062) at org.apache.spark.sql.classic.SparkSession$Builder.$anonfun$build$2(SparkSession.scala:839) at scala.Option.getOrElse(Option.scala:201) at org.apache.spark.sql.classic.SparkSession$Builder.build(SparkSession.scala:830) at org.apache.spark.sql.classic.SparkSession$Builder.getOrCreate(SparkSession.scala:859) at org.apache.spark.sql.classic.SparkSession$Builder.getOrCreate(SparkSession.scala:732) at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:923) at org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark.getSparkSession(SqlBasedBenchmark.scala:45) at org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark.getSparkSession$(SqlBasedBenchmark.scala:38) at org.apache.spark.sql.execution.benchmark.StateStoreBasicOperationsBenchmark$.getSparkSession(StateStoreBasicOperationsBenchmark.scala:48) at org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark.$init$(SqlBasedBenchmark.scala:35) at org.apache.spark.sql.execution.benchmark.StateStoreBasicOperationsBenchmark$.<clinit>(StateStoreBasicOperationsBenchmark.scala:48) at org.apache.spark.sql.execution.benchmark.StateStoreBasicOperationsBenchmark.main(StateStoreBasicOperationsBenchmark.scala) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:75) at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:52) at java.base/java.lang.reflect.Method.invoke(Method.java:580) at org.apache.spark.benchmark.Benchmarks$.$anonfun$main$7(Benchmarks.scala:128) at scala.collection.ArrayOps$.foreach$extension(ArrayOps.scala:1324) at org.apache.spark.benchmark.Benchmarks$.main(Benchmarks.scala:91) at org.apache.spark.benchmark.Benchmarks.main(Benchmarks.scala) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:75) at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:52) at java.base/java.lang.reflect.Method.invoke(Method.java:580) at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:1027) at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:204) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:227) at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:96) at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1132) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1141) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) 25/06/28 07:03:45 WARN SparkContext: The JAR file:/home/runner/work/spark/spark/core/target/scala-2.13/spark-core_2.13-4.1.0-SNAPSHOT-tests.jar at spark://localhost:39213/jars/spark-core_2.13-4.1.0-SNAPSHOT-tests.jar has been added already. Overwriting of added jar is not supported in the current version. {code} was: After the merge of SPARK-48763, when we execute the command `./build/sbt clean Test/package`, we will find that the `spark-avro-*.jar` file is being collected into the `assembly/target/scala-2.13/jars` directory (which did not happen before). This will lead to potential errors like the following when we run benchmarks using Github Actions: ``` 25/06/28 07:03:45 ERROR SparkContext: Failed to add file:///home/runner/work/spark/spark/assembly/target/scala-2.13/jars/spark-avro_2.13-4.1.0-SNAPSHOT.jar to Spark environment java.lang.IllegalArgumentException: requirement failed: File spark-avro_2.13-4.1.0-SNAPSHOT.jar was already registered with a different path (old path = /home/runner/work/spark/spark/connector/avro/target/scala-2.13/spark-avro_2.13-4.1.0-SNAPSHOT.jar, new path = /home/runner/work/spark/spark/assembly/target/scala-2.13/jars/spark-avro_2.13-4.1.0-SNAPSHOT.jar at scala.Predef$.require(Predef.scala:337) at org.apache.spark.rpc.netty.NettyStreamManager.addJar(NettyStreamManager.scala:85) at org.apache.spark.SparkContext.addLocalJarFile$1(SparkContext.scala:2184) at org.apache.spark.SparkContext.addJar(SparkContext.scala:2233) at org.apache.spark.SparkContext.$anonfun$new$15(SparkContext.scala:538) at org.apache.spark.SparkContext.$anonfun$new$15$adapted(SparkContext.scala:538) at scala.collection.IterableOnceOps.foreach(IterableOnce.scala:619) at scala.collection.IterableOnceOps.foreach$(IterableOnce.scala:617) at scala.collection.AbstractIterable.foreach(Iterable.scala:935) at org.apache.spark.SparkContext.<init>(SparkContext.scala:538) at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:3062) at org.apache.spark.sql.classic.SparkSession$Builder.$anonfun$build$2(SparkSession.scala:839) at scala.Option.getOrElse(Option.scala:201) at org.apache.spark.sql.classic.SparkSession$Builder.build(SparkSession.scala:830) at org.apache.spark.sql.classic.SparkSession$Builder.getOrCreate(SparkSession.scala:859) at org.apache.spark.sql.classic.SparkSession$Builder.getOrCreate(SparkSession.scala:732) at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:923) at org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark.getSparkSession(SqlBasedBenchmark.scala:45) at org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark.getSparkSession$(SqlBasedBenchmark.scala:38) at org.apache.spark.sql.execution.benchmark.StateStoreBasicOperationsBenchmark$.getSparkSession(StateStoreBasicOperationsBenchmark.scala:48) at org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark.$init$(SqlBasedBenchmark.scala:35) at org.apache.spark.sql.execution.benchmark.StateStoreBasicOperationsBenchmark$.<clinit>(StateStoreBasicOperationsBenchmark.scala:48) at org.apache.spark.sql.execution.benchmark.StateStoreBasicOperationsBenchmark.main(StateStoreBasicOperationsBenchmark.scala) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:75) at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:52) at java.base/java.lang.reflect.Method.invoke(Method.java:580) at org.apache.spark.benchmark.Benchmarks$.$anonfun$main$7(Benchmarks.scala:128) at scala.collection.ArrayOps$.foreach$extension(ArrayOps.scala:1324) at org.apache.spark.benchmark.Benchmarks$.main(Benchmarks.scala:91) at org.apache.spark.benchmark.Benchmarks.main(Benchmarks.scala) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:75) at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:52) at java.base/java.lang.reflect.Method.invoke(Method.java:580) at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:1027) at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:204) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:227) at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:96) at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1132) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1141) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) 25/06/28 07:03:45 WARN SparkContext: The JAR file:/home/runner/work/spark/spark/core/target/scala-2.13/spark-core_2.13-4.1.0-SNAPSHOT-tests.jar at spark://localhost:39213/jars/spark-core_2.13-4.1.0-SNAPSHOT-tests.jar has been added already. Overwriting of added jar is not supported in the current version. ``` > When executing `./build/sbt clean Test/package`, `spark-avro-*.jar` should > not be collected into the `assembly/target/scala-2.13/jars` directory > ------------------------------------------------------------------------------------------------------------------------------------------------ > > Key: SPARK-52612 > URL: https://issues.apache.org/jira/browse/SPARK-52612 > Project: Spark > Issue Type: Bug > Components: Build > Affects Versions: 4.1.0, 4.0.0 > Reporter: Yang Jie > Priority: Major > > After the merge of SPARK-48763, when we execute the command `./build/sbt > clean Test/package`, we will find that the `spark-avro-*.jar` file is being > collected into the `assembly/target/scala-2.13/jars` directory (which did not > happen before). This will lead to potential errors like the following when we > run benchmarks using Github Actions: > {code:java} > 25/06/28 07:03:45 ERROR SparkContext: Failed to add > file:///home/runner/work/spark/spark/assembly/target/scala-2.13/jars/spark-avro_2.13-4.1.0-SNAPSHOT.jar > to Spark environment > java.lang.IllegalArgumentException: requirement failed: File > spark-avro_2.13-4.1.0-SNAPSHOT.jar was already registered with a different > path (old path = > /home/runner/work/spark/spark/connector/avro/target/scala-2.13/spark-avro_2.13-4.1.0-SNAPSHOT.jar, > new path = > /home/runner/work/spark/spark/assembly/target/scala-2.13/jars/spark-avro_2.13-4.1.0-SNAPSHOT.jar > at scala.Predef$.require(Predef.scala:337) > at > org.apache.spark.rpc.netty.NettyStreamManager.addJar(NettyStreamManager.scala:85) > at > org.apache.spark.SparkContext.addLocalJarFile$1(SparkContext.scala:2184) > at org.apache.spark.SparkContext.addJar(SparkContext.scala:2233) > at org.apache.spark.SparkContext.$anonfun$new$15(SparkContext.scala:538) > at > org.apache.spark.SparkContext.$anonfun$new$15$adapted(SparkContext.scala:538) > at scala.collection.IterableOnceOps.foreach(IterableOnce.scala:619) > at scala.collection.IterableOnceOps.foreach$(IterableOnce.scala:617) > at scala.collection.AbstractIterable.foreach(Iterable.scala:935) > at org.apache.spark.SparkContext.<init>(SparkContext.scala:538) > at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:3062) > at > org.apache.spark.sql.classic.SparkSession$Builder.$anonfun$build$2(SparkSession.scala:839) > at scala.Option.getOrElse(Option.scala:201) > at > org.apache.spark.sql.classic.SparkSession$Builder.build(SparkSession.scala:830) > at > org.apache.spark.sql.classic.SparkSession$Builder.getOrCreate(SparkSession.scala:859) > at > org.apache.spark.sql.classic.SparkSession$Builder.getOrCreate(SparkSession.scala:732) > at > org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:923) > at > org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark.getSparkSession(SqlBasedBenchmark.scala:45) > at > org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark.getSparkSession$(SqlBasedBenchmark.scala:38) > at > org.apache.spark.sql.execution.benchmark.StateStoreBasicOperationsBenchmark$.getSparkSession(StateStoreBasicOperationsBenchmark.scala:48) > at > org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark.$init$(SqlBasedBenchmark.scala:35) > at > org.apache.spark.sql.execution.benchmark.StateStoreBasicOperationsBenchmark$.<clinit>(StateStoreBasicOperationsBenchmark.scala:48) > at > org.apache.spark.sql.execution.benchmark.StateStoreBasicOperationsBenchmark.main(StateStoreBasicOperationsBenchmark.scala) > at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native > Method) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:75) > at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:52) > at java.base/java.lang.reflect.Method.invoke(Method.java:580) > at > org.apache.spark.benchmark.Benchmarks$.$anonfun$main$7(Benchmarks.scala:128) > at scala.collection.ArrayOps$.foreach$extension(ArrayOps.scala:1324) > at org.apache.spark.benchmark.Benchmarks$.main(Benchmarks.scala:91) > at org.apache.spark.benchmark.Benchmarks.main(Benchmarks.scala) > at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native > Method) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:75) > at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:52) > at java.base/java.lang.reflect.Method.invoke(Method.java:580) > at > org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) > at > org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:1027) > at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:204) > at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:227) > at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:96) > at > org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1132) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1141) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > 25/06/28 07:03:45 WARN SparkContext: The JAR > file:/home/runner/work/spark/spark/core/target/scala-2.13/spark-core_2.13-4.1.0-SNAPSHOT-tests.jar > at spark://localhost:39213/jars/spark-core_2.13-4.1.0-SNAPSHOT-tests.jar has > been added already. Overwriting of added jar is not supported in the current > version. {code} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org