This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 57c7db2c4c1d [SPARK-47760][SPARK-47763][CONNECT][TESTS] Reeanble Avro 
and Protobuf function doctests
57c7db2c4c1d is described below

commit 57c7db2c4c1dbeeba062fe28ab58245e0a3098eb
Author: Hyukjin Kwon <gurwls...@apache.org>
AuthorDate: Wed Apr 17 08:47:01 2024 +0900

    [SPARK-47760][SPARK-47763][CONNECT][TESTS] Reeanble Avro and Protobuf 
function doctests
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to reeanble Avro and Protobuf function doctests by 
providing the required jars into Spark Connect server.
    
    ### Why are the changes needed?
    
    For test coverages of Avro and Protobuf functions.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, test-only.
    
    ### How was this patch tested?
    
    Tested in my fork: 
https://github.com/HyukjinKwon/spark/actions/runs/8704014674/job/23871383802
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #46055 from HyukjinKwon/SPARK-47763-SPARK-47760.
    
    Authored-by: Hyukjin Kwon <gurwls...@apache.org>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 .github/workflows/build_python_connect.yml       | 11 ++++++-----
 python/pyspark/sql/connect/avro/functions.py     |  7 -------
 python/pyspark/sql/connect/protobuf/functions.py |  7 -------
 3 files changed, 6 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/build_python_connect.yml 
b/.github/workflows/build_python_connect.yml
index 965e839b6b2b..863980b0c2e5 100644
--- a/.github/workflows/build_python_connect.yml
+++ b/.github/workflows/build_python_connect.yml
@@ -29,7 +29,6 @@ jobs:
     name: "Build modules: pyspark-connect"
     runs-on: ubuntu-latest
     timeout-minutes: 300
-    if: github.repository == 'apache/spark'
     steps:
       - name: Checkout Spark repository
         uses: actions/checkout@v4
@@ -63,7 +62,7 @@ jobs:
           architecture: x64
       - name: Build Spark
         run: |
-          ./build/sbt -Phive test:package
+          ./build/sbt -Phive Test/package
       - name: Install pure Python package (pyspark-connect)
         env:
           SPARK_TESTING: 1
@@ -82,7 +81,9 @@ jobs:
           cp conf/log4j2.properties.template conf/log4j2.properties
           sed -i 's/rootLogger.level = info/rootLogger.level = warn/g' 
conf/log4j2.properties
           # Start a Spark Connect server
-          
PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH"
 ./sbin/start-connect-server.sh --driver-java-options 
"-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" 
--jars `find connector/connect/server/target -name spark-connect*SNAPSHOT.jar`
+          
PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH"
 ./sbin/start-connect-server.sh \
+            --driver-java-options 
"-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
+            --jars "`find connector/connect/server/target -name 
spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name 
spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name 
spark-avro*SNAPSHOT.jar`"
           # Make sure running Python workers that contains pyspark.core once. 
They will be reused.
           python -c "from pyspark.sql import SparkSession; _ = 
SparkSession.builder.remote('sc://localhost').getOrCreate().range(100).repartition(100).mapInPandas(lambda
 x: x, 'id INT').collect()"
           # Remove Py4J and PySpark zipped library to make sure there is no 
JVM connection
@@ -98,9 +99,9 @@ jobs:
         with:
           name: test-results-spark-connect-python-only
           path: "**/target/test-reports/*.xml"
-      - name: Upload unit tests log files
+      - name: Upload Spark Connect server log file
         if: failure()
         uses: actions/upload-artifact@v4
         with:
           name: unit-tests-log-spark-connect-python-only
-          path: "**/target/unit-tests.log"
+          path: logs/*.out
diff --git a/python/pyspark/sql/connect/avro/functions.py 
b/python/pyspark/sql/connect/avro/functions.py
index 43088333b108..f153b17acf58 100644
--- a/python/pyspark/sql/connect/avro/functions.py
+++ b/python/pyspark/sql/connect/avro/functions.py
@@ -80,15 +80,8 @@ def _test() -> None:
     import doctest
     from pyspark.sql import SparkSession as PySparkSession
     import pyspark.sql.connect.avro.functions
-    from pyspark.util import is_remote_only
 
     globs = pyspark.sql.connect.avro.functions.__dict__.copy()
-
-    # TODO(SPARK-47760): Reeanble Avro function doctests
-    if is_remote_only():
-        del pyspark.sql.connect.avro.functions.from_avro
-        del pyspark.sql.connect.avro.functions.to_avro
-
     globs["spark"] = (
         PySparkSession.builder.appName("sql.connect.avro.functions tests")
         .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
diff --git a/python/pyspark/sql/connect/protobuf/functions.py 
b/python/pyspark/sql/connect/protobuf/functions.py
index fcf1ed1ee02e..058925942fa0 100644
--- a/python/pyspark/sql/connect/protobuf/functions.py
+++ b/python/pyspark/sql/connect/protobuf/functions.py
@@ -120,7 +120,6 @@ def _read_descriptor_set_file(filePath: str) -> bytes:
 def _test() -> None:
     import os
     import sys
-    from pyspark.util import is_remote_only
     from pyspark.testing.utils import search_jar
 
     protobuf_jar = search_jar("connector/protobuf", 
"spark-protobuf-assembly-", "spark-protobuf")
@@ -142,12 +141,6 @@ def _test() -> None:
     import pyspark.sql.connect.protobuf.functions
 
     globs = pyspark.sql.connect.protobuf.functions.__dict__.copy()
-
-    # TODO(SPARK-47763): Reeanble Protobuf function doctests
-    if is_remote_only():
-        del pyspark.sql.connect.protobuf.functions.from_protobuf
-        del pyspark.sql.connect.protobuf.functions.to_protobuf
-
     globs["spark"] = (
         PySparkSession.builder.appName("sql.protobuf.functions tests")
         .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]"))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to