This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 57c7db2c4c1d [SPARK-47760][SPARK-47763][CONNECT][TESTS] Reeanble Avro and Protobuf function doctests 57c7db2c4c1d is described below commit 57c7db2c4c1dbeeba062fe28ab58245e0a3098eb Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Wed Apr 17 08:47:01 2024 +0900 [SPARK-47760][SPARK-47763][CONNECT][TESTS] Reeanble Avro and Protobuf function doctests ### What changes were proposed in this pull request? This PR proposes to reeanble Avro and Protobuf function doctests by providing the required jars into Spark Connect server. ### Why are the changes needed? For test coverages of Avro and Protobuf functions. ### Does this PR introduce _any_ user-facing change? No, test-only. ### How was this patch tested? Tested in my fork: https://github.com/HyukjinKwon/spark/actions/runs/8704014674/job/23871383802 ### Was this patch authored or co-authored using generative AI tooling? No. Closes #46055 from HyukjinKwon/SPARK-47763-SPARK-47760. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .github/workflows/build_python_connect.yml | 11 ++++++----- python/pyspark/sql/connect/avro/functions.py | 7 ------- python/pyspark/sql/connect/protobuf/functions.py | 7 ------- 3 files changed, 6 insertions(+), 19 deletions(-) diff --git a/.github/workflows/build_python_connect.yml b/.github/workflows/build_python_connect.yml index 965e839b6b2b..863980b0c2e5 100644 --- a/.github/workflows/build_python_connect.yml +++ b/.github/workflows/build_python_connect.yml @@ -29,7 +29,6 @@ jobs: name: "Build modules: pyspark-connect" runs-on: ubuntu-latest timeout-minutes: 300 - if: github.repository == 'apache/spark' steps: - name: Checkout Spark repository uses: actions/checkout@v4 @@ -63,7 +62,7 @@ jobs: architecture: x64 - name: Build Spark run: | - ./build/sbt -Phive test:package + ./build/sbt -Phive Test/package - name: Install pure Python package (pyspark-connect) env: SPARK_TESTING: 1 @@ -82,7 +81,9 @@ jobs: cp conf/log4j2.properties.template conf/log4j2.properties sed -i 's/rootLogger.level = info/rootLogger.level = warn/g' conf/log4j2.properties # Start a Spark Connect server - PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh --driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" --jars `find connector/connect/server/target -name spark-connect*SNAPSHOT.jar` + PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \ + --driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \ + --jars "`find connector/connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`" # Make sure running Python workers that contains pyspark.core once. They will be reused. python -c "from pyspark.sql import SparkSession; _ = SparkSession.builder.remote('sc://localhost').getOrCreate().range(100).repartition(100).mapInPandas(lambda x: x, 'id INT').collect()" # Remove Py4J and PySpark zipped library to make sure there is no JVM connection @@ -98,9 +99,9 @@ jobs: with: name: test-results-spark-connect-python-only path: "**/target/test-reports/*.xml" - - name: Upload unit tests log files + - name: Upload Spark Connect server log file if: failure() uses: actions/upload-artifact@v4 with: name: unit-tests-log-spark-connect-python-only - path: "**/target/unit-tests.log" + path: logs/*.out diff --git a/python/pyspark/sql/connect/avro/functions.py b/python/pyspark/sql/connect/avro/functions.py index 43088333b108..f153b17acf58 100644 --- a/python/pyspark/sql/connect/avro/functions.py +++ b/python/pyspark/sql/connect/avro/functions.py @@ -80,15 +80,8 @@ def _test() -> None: import doctest from pyspark.sql import SparkSession as PySparkSession import pyspark.sql.connect.avro.functions - from pyspark.util import is_remote_only globs = pyspark.sql.connect.avro.functions.__dict__.copy() - - # TODO(SPARK-47760): Reeanble Avro function doctests - if is_remote_only(): - del pyspark.sql.connect.avro.functions.from_avro - del pyspark.sql.connect.avro.functions.to_avro - globs["spark"] = ( PySparkSession.builder.appName("sql.connect.avro.functions tests") .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]")) diff --git a/python/pyspark/sql/connect/protobuf/functions.py b/python/pyspark/sql/connect/protobuf/functions.py index fcf1ed1ee02e..058925942fa0 100644 --- a/python/pyspark/sql/connect/protobuf/functions.py +++ b/python/pyspark/sql/connect/protobuf/functions.py @@ -120,7 +120,6 @@ def _read_descriptor_set_file(filePath: str) -> bytes: def _test() -> None: import os import sys - from pyspark.util import is_remote_only from pyspark.testing.utils import search_jar protobuf_jar = search_jar("connector/protobuf", "spark-protobuf-assembly-", "spark-protobuf") @@ -142,12 +141,6 @@ def _test() -> None: import pyspark.sql.connect.protobuf.functions globs = pyspark.sql.connect.protobuf.functions.__dict__.copy() - - # TODO(SPARK-47763): Reeanble Protobuf function doctests - if is_remote_only(): - del pyspark.sql.connect.protobuf.functions.from_protobuf - del pyspark.sql.connect.protobuf.functions.to_protobuf - globs["spark"] = ( PySparkSession.builder.appName("sql.protobuf.functions tests") .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]")) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org