This is an automated email from the ASF dual-hosted git repository.
yangjie01 pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push:
new bc215aba54b Revert "[SPARK-43646][CONNECT][TESTS] Make both SBT and
Maven use `spark-proto` uber jar to test the `connect` module"
bc215aba54b is described below
commit bc215aba54b3f7b46d9ae9b121f2d0af6010a08f
Author: yangjie01 <[email protected]>
AuthorDate: Thu Aug 31 19:42:41 2023 +0800
Revert "[SPARK-43646][CONNECT][TESTS] Make both SBT and Maven use
`spark-proto` uber jar to test the `connect` module"
### What changes were proposed in this pull request?
This reverts commit df63adf734370f5c2d71a348f9d36658718b302c.
### Why are the changes needed?
As
[reported](https://github.com/apache/spark/pull/42236#issuecomment-1700493815)
by MaxGekk , the solution for https://github.com/apache/spark/pull/42236 is not
perfect, and it breaks the usability of importing Spark as a Maven project into
idea. On the other hand, if `mvn clean test` is executed, test failures will
also occur like
```
[ERROR] [Error]
/tmp/spark-3.5.0/connector/connect/server/target/generated-test-sources/protobuf/java/org/apache/spark/sql/protobuf/protos/TestProto.java:9:46:
error: package org.sparkproject.spark_protobuf.protobuf does not exist
```
Therefore, this pr will revert the change of SPARK-43646, and
`from_protobuf messageClassName` and `from_protobuf messageClassName options`
in `PlanGenerationTestSuite` will be ignored in a follow-up.
At present, it is difficult to make the maven testing of the
`spark-protobuf` function in the `connect` module as good as possible.
### Does this PR introduce _any_ user-facing change?
NO
### How was this patch tested?
Pass GitHub Actions
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #42746 from LuciferYang/Revert-SPARK-43646.
Authored-by: yangjie01 <[email protected]>
Signed-off-by: yangjie01 <[email protected]>
(cherry picked from commit 723a0aa30f9a901140d0f97d580d39db56b0729f)
Signed-off-by: yangjie01 <[email protected]>
---
.../apache/spark/sql/PlanGenerationTestSuite.scala | 5 +-
.../from_protobuf_messageClassName.explain | 2 +-
.../from_protobuf_messageClassName_options.explain | 2 +-
.../queries/from_protobuf_messageClassName.json | 2 +-
.../from_protobuf_messageClassName.proto.bin | Bin 131 -> 125 bytes
.../from_protobuf_messageClassName_options.json | 2 +-
...rom_protobuf_messageClassName_options.proto.bin | Bin 182 -> 174 bytes
connector/connect/server/pom.xml | 88 ---------------------
.../connect/server/src/test/protobuf/test.proto | 27 -------
project/SparkBuild.scala | 55 +------------
10 files changed, 8 insertions(+), 175 deletions(-)
diff --git
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index 801c8cb0263..4916ff1f597 100644
---
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -3232,15 +3232,14 @@ class PlanGenerationTestSuite
"connect/common/src/test/resources/protobuf-tests/common.desc"
test("from_protobuf messageClassName") {
- binary.select(
- pbFn.from_protobuf(fn.col("bytes"),
"org.apache.spark.sql.protobuf.protos.TestProtoObj"))
+ binary.select(pbFn.from_protobuf(fn.col("bytes"),
classOf[StorageLevel].getName))
}
test("from_protobuf messageClassName options") {
binary.select(
pbFn.from_protobuf(
fn.col("bytes"),
- "org.apache.spark.sql.protobuf.protos.TestProtoObj",
+ classOf[StorageLevel].getName,
Map("recursive.fields.max.depth" -> "2").asJava))
}
diff --git
a/connector/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName.explain
b/connector/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName.explain
index 6f48cb090cd..e7a1867fe90 100644
---
a/connector/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName.explain
+++
b/connector/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName.explain
@@ -1,2 +1,2 @@
-Project [from_protobuf(bytes#0,
org.apache.spark.sql.protobuf.protos.TestProtoObj, None) AS
from_protobuf(bytes)#0]
+Project [from_protobuf(bytes#0, org.apache.spark.connect.proto.StorageLevel,
None) AS from_protobuf(bytes)#0]
+- LocalRelation <empty>, [id#0L, bytes#0]
diff --git
a/connector/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName_options.explain
b/connector/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName_options.explain
index ba87e4774f1..c02d829fcac 100644
---
a/connector/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName_options.explain
+++
b/connector/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName_options.explain
@@ -1,2 +1,2 @@
-Project [from_protobuf(bytes#0,
org.apache.spark.sql.protobuf.protos.TestProtoObj, None,
(recursive.fields.max.depth,2)) AS from_protobuf(bytes)#0]
+Project [from_protobuf(bytes#0, org.apache.spark.connect.proto.StorageLevel,
None, (recursive.fields.max.depth,2)) AS from_protobuf(bytes)#0]
+- LocalRelation <empty>, [id#0L, bytes#0]
diff --git
a/connector/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName.json
b/connector/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName.json
index 6c5891e7016..dc23ac2a117 100644
---
a/connector/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName.json
+++
b/connector/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName.json
@@ -20,7 +20,7 @@
}
}, {
"literal": {
- "string": "org.apache.spark.sql.protobuf.protos.TestProtoObj"
+ "string": "org.apache.spark.connect.proto.StorageLevel"
}
}]
}
diff --git
a/connector/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName.proto.bin
b/connector/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName.proto.bin
index 9d7aeaf3089..cc46234b747 100644
Binary files
a/connector/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName.proto.bin
and
b/connector/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName.proto.bin
differ
diff --git
a/connector/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_options.json
b/connector/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_options.json
index 691e144eabd..36f69646ef8 100644
---
a/connector/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_options.json
+++
b/connector/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_options.json
@@ -20,7 +20,7 @@
}
}, {
"literal": {
- "string": "org.apache.spark.sql.protobuf.protos.TestProtoObj"
+ "string": "org.apache.spark.connect.proto.StorageLevel"
}
}, {
"unresolvedFunction": {
diff --git
a/connector/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_options.proto.bin
b/connector/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_options.proto.bin
index 89000e473fa..72a1c6b8207 100644
Binary files
a/connector/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_options.proto.bin
and
b/connector/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_options.proto.bin
differ
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 3e8b83a3428..403255c5437 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -248,13 +248,6 @@
</dependency>
</dependencies>
<build>
- <extensions>
- <extension>
- <groupId>kr.motd.maven</groupId>
- <artifactId>os-maven-plugin</artifactId>
- <version>1.6.2</version>
- </extension>
- </extensions>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
<plugins>
@@ -410,87 +403,6 @@
</transformers>
</configuration>
</plugin>
- <!--
- SPARK-43646: In order for `ProtoToParsedPlanTestSuite` to successfully
test `from_protobuf_messageClassName`
- and `from_protobuf_messageClassName_options`, `maven-antrun-plugin` is
used to replace all
- `com.google.protobuf.` with
`org.sparkproject.spark_protobuf.protobuf.` in the Java files
- generated by `protobuf-maven-plugin`.
- -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-antrun-plugin</artifactId>
- <executions>
- <execution>
- <phase>process-test-sources</phase>
- <configuration>
- <target>
- <replace dir="${project.basedir}/target/generated-test-sources"
- includes="**/*.java"
- token="com.google.protobuf."
- value="org.sparkproject.spark_protobuf.protobuf."/>
- </target>
- </configuration>
- <goals>
- <goal>run</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
</plugins>
</build>
- <profiles>
- <profile>
- <id>default-protoc</id>
- <activation>
- <activeByDefault>true</activeByDefault>
- </activation>
- <build>
- <plugins>
- <!-- Add protobuf-maven-plugin and provide ScalaPB as a code
generation plugin -->
- <plugin>
- <groupId>org.xolstice.maven.plugins</groupId>
- <artifactId>protobuf-maven-plugin</artifactId>
- <version>0.6.1</version>
- <configuration>
-
<protocArtifact>com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}</protocArtifact>
- <protoTestSourceRoot>src/test/protobuf</protoTestSourceRoot>
- </configuration>
- <executions>
- <execution>
- <goals>
- <goal>test-compile</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- </plugins>
- </build>
- </profile>
- <profile>
- <id>user-defined-protoc</id>
- <properties>
-
<spark.protoc.executable.path>${env.SPARK_PROTOC_EXEC_PATH}</spark.protoc.executable.path>
- </properties>
- <build>
- <plugins>
- <plugin>
- <groupId>org.xolstice.maven.plugins</groupId>
- <artifactId>protobuf-maven-plugin</artifactId>
- <version>0.6.1</version>
- <configuration>
-
<protocExecutable>${spark.protoc.executable.path}</protocExecutable>
- <protoTestSourceRoot>src/test/protobuf</protoTestSourceRoot>
- </configuration>
- <executions>
- <execution>
- <goals>
- <goal>test-compile</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- </plugins>
- </build>
- </profile>
- </profiles>
</project>
diff --git a/connector/connect/server/src/test/protobuf/test.proto
b/connector/connect/server/src/test/protobuf/test.proto
deleted file mode 100644
index 844f89ba81f..00000000000
--- a/connector/connect/server/src/test/protobuf/test.proto
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-syntax = "proto3";
-package org.apache.spark.sql.protobuf.protos;
-
-option java_multiple_files = true;
-option java_outer_classname = "TestProto";
-
-message TestProtoObj {
- int64 v1 = 1;
- int32 v2 = 2;
-}
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 51bcbb09109..563d5357754 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -17,7 +17,7 @@
import java.io._
import java.nio.charset.StandardCharsets.UTF_8
-import java.nio.file.{Files, StandardOpenOption}
+import java.nio.file.Files
import java.util.Locale
import scala.io.Source
@@ -763,13 +763,7 @@ object SparkConnectCommon {
object SparkConnect {
import BuildCommons.protoVersion
- val rewriteJavaFile = TaskKey[Unit]("rewriteJavaFile",
- "Rewrite the generated Java PB files.")
- val genPBAndRewriteJavaFile = TaskKey[Unit]("genPBAndRewriteJavaFile",
- "Generate Java PB files and overwrite their contents.")
-
lazy val settings = Seq(
- PB.protocVersion := BuildCommons.protoVersion,
// For some reason the resolution from the imported Maven build does not
work for some
// of these dependendencies that we need to shade later on.
libraryDependencies ++= {
@@ -800,42 +794,6 @@ object SparkConnect {
)
},
- // SPARK-43646: The following 3 statements are used to make the connect
module use the
- // Spark-proto assembly jar when compiling and testing using SBT, which
can be keep same
- // behavior of Maven testing.
- (Test / unmanagedJars) += (LocalProject("protobuf") / assembly).value,
- (Test / fullClasspath) :=
- (Test / fullClasspath).value.filterNot { f =>
f.toString.contains("spark-protobuf") },
- (Test / fullClasspath) += (LocalProject("protobuf") / assembly).value,
-
- (Test / PB.protoSources) += (Test / sourceDirectory).value / "resources" /
"protobuf",
-
- (Test / PB.targets) := Seq(
- PB.gens.java -> target.value / "generated-test-sources",
- ),
-
- // SPARK-43646: Create a custom task to replace all `com.google.protobuf.`
with
- // `org.sparkproject.spark_protobuf.protobuf.` in the generated Java PB
files.
- // This is to generate Java files that can be used to test spark-protobuf
functions
- // in `ProtoToParsedPlanTestSuite`.
- rewriteJavaFile := {
- val protobufDir = target.value /
"generated-test-sources"/"org"/"apache"/"spark"/"sql"/"protobuf"/"protos"
- protobufDir.listFiles().foreach { f =>
- if (f.getName.endsWith(".java")) {
- val contents = Files.readAllLines(f.toPath, UTF_8)
- val replaced = contents.asScala.map { line =>
- line.replaceAll("com.google.protobuf.",
"org.sparkproject.spark_protobuf.protobuf.")
- }
- Files.write(f.toPath, replaced.asJava,
StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)
- }
- }
- },
- // SPARK-43646: `genPBAndRewriteJavaFile` is used to specify the execution
order of `PB.generate`
- // and `rewriteJavaFile`, and makes `Test / compile` dependent on
`genPBAndRewriteJavaFile`
- // being executed first.
- genPBAndRewriteJavaFile := Def.sequential(Test / PB.generate,
rewriteJavaFile).value,
- (Test / compile) := (Test /
compile).dependsOn(genPBAndRewriteJavaFile).value,
-
(assembly / test) := { },
(assembly / logLevel) := Level.Info,
@@ -881,16 +839,7 @@ object SparkConnect {
case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") =>
MergeStrategy.discard
case _ => MergeStrategy.first
}
- ) ++ {
- val sparkProtocExecPath = sys.props.get("spark.protoc.executable.path")
- if (sparkProtocExecPath.isDefined) {
- Seq(
- PB.protocExecutable := file(sparkProtocExecPath.get)
- )
- } else {
- Seq.empty
- }
- }
+ )
}
object SparkConnectClient {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]