This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 60da3179fa61 [SPARK-51701][PYTHON][TESTS] Move test objects to a 
separate file
60da3179fa61 is described below

commit 60da3179fa61b3928d2dd15d97fd757e680019e1
Author: Takuya Ueshin <ues...@databricks.com>
AuthorDate: Thu Apr 3 11:20:36 2025 +0800

    [SPARK-51701][PYTHON][TESTS] Move test objects to a separate file
    
    Moves test objects to a separate file.
    
    Some classes for tests should be placed in a clean separate file to avoid 
unnecessary dependencies.
    
    No, test only.
    
    The existing tests should pass.
    
    No.
    
    Closes #50503 from ueshin/issues/SPARK-51701/test_objects.
    
    Authored-by: Takuya Ueshin <ues...@databricks.com>
    Signed-off-by: Ruifeng Zheng <ruife...@apache.org>
    (cherry picked from commit 295d37fad3b67ac0c73629d5eaebb3baefaeea7e)
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 python/pyspark/sql/tests/arrow/test_arrow.py       |   3 +-
 .../sql/tests/connect/test_connect_creation.py     |   2 +-
 .../sql/tests/connect/test_connect_readwriter.py   |   2 +-
 python/pyspark/sql/tests/test_serde.py             |   6 +-
 python/pyspark/sql/tests/test_types.py             |   4 +-
 python/pyspark/sql/tests/test_udf.py               |   3 +-
 python/pyspark/testing/objects.py                  | 121 +++++++++++++++++++++
 python/pyspark/testing/sqlutils.py                 | 105 +-----------------
 .../apache/spark/sql/test/ExamplePointUDT.scala    |   2 +-
 9 files changed, 131 insertions(+), 117 deletions(-)

diff --git a/python/pyspark/sql/tests/arrow/test_arrow.py 
b/python/pyspark/sql/tests/arrow/test_arrow.py
index 065f97fcf7c7..5a770a947889 100644
--- a/python/pyspark/sql/tests/arrow/test_arrow.py
+++ b/python/pyspark/sql/tests/arrow/test_arrow.py
@@ -45,14 +45,13 @@ from pyspark.sql.types import (
     NullType,
     DayTimeIntervalType,
 )
+from pyspark.testing.objects import ExamplePoint, ExamplePointUDT
 from pyspark.testing.sqlutils import (
     ReusedSQLTestCase,
     have_pandas,
     have_pyarrow,
     pandas_requirement_message,
     pyarrow_requirement_message,
-    ExamplePoint,
-    ExamplePointUDT,
 )
 from pyspark.errors import ArithmeticException, PySparkTypeError, 
UnsupportedOperationException
 from pyspark.loose_version import LooseVersion
diff --git a/python/pyspark/sql/tests/connect/test_connect_creation.py 
b/python/pyspark/sql/tests/connect/test_connect_creation.py
index 5352913f6609..3d67c33a5834 100644
--- a/python/pyspark/sql/tests/connect/test_connect_creation.py
+++ b/python/pyspark/sql/tests/connect/test_connect_creation.py
@@ -32,7 +32,7 @@ from pyspark.sql.types import (
     ArrayType,
     Row,
 )
-from pyspark.testing.sqlutils import MyObject, PythonOnlyUDT
+from pyspark.testing.objects import MyObject, PythonOnlyUDT
 
 from pyspark.testing.connectutils import should_test_connect
 from pyspark.sql.tests.connect.test_connect_basic import 
SparkConnectSQLTestCase
diff --git a/python/pyspark/sql/tests/connect/test_connect_readwriter.py 
b/python/pyspark/sql/tests/connect/test_connect_readwriter.py
index 06266b86de3f..dc82d93f9581 100644
--- a/python/pyspark/sql/tests/connect/test_connect_readwriter.py
+++ b/python/pyspark/sql/tests/connect/test_connect_readwriter.py
@@ -30,7 +30,7 @@ from pyspark.sql.types import (
     MapType,
     Row,
 )
-from pyspark.testing.sqlutils import (
+from pyspark.testing.objects import (
     PythonOnlyUDT,
     ExamplePoint,
     PythonOnlyPoint,
diff --git a/python/pyspark/sql/tests/test_serde.py 
b/python/pyspark/sql/tests/test_serde.py
index 01cf3c51d7de..eab1ad043ef3 100644
--- a/python/pyspark/sql/tests/test_serde.py
+++ b/python/pyspark/sql/tests/test_serde.py
@@ -23,7 +23,8 @@ import time
 from pyspark.sql import Row
 from pyspark.sql.functions import lit
 from pyspark.sql.types import StructType, StructField, DecimalType, BinaryType
-from pyspark.testing.sqlutils import ReusedSQLTestCase, UTCOffsetTimezone
+from pyspark.testing.objects import UTCOffsetTimezone
+from pyspark.testing.sqlutils import ReusedSQLTestCase
 
 
 class SerdeTestsMixin:
@@ -82,9 +83,6 @@ class SerdeTestsMixin:
         day = datetime.date.today()
         now = datetime.datetime.now()
         ts = time.mktime(now.timetuple())
-        # class in __main__ is not serializable
-        from pyspark.testing.sqlutils import UTCOffsetTimezone
-
         utc = UTCOffsetTimezone()
         utcnow = datetime.datetime.utcfromtimestamp(ts)  # without microseconds
         # add microseconds to utcnow (keeping 
year,month,day,hour,minute,second)
diff --git a/python/pyspark/sql/tests/test_types.py 
b/python/pyspark/sql/tests/test_types.py
index d920aec6d8d2..15247b97664d 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -71,14 +71,14 @@ from pyspark.sql.types import (
     _make_type_verifier,
     _merge_type,
 )
-from pyspark.testing.sqlutils import (
-    ReusedSQLTestCase,
+from pyspark.testing.objects import (
     ExamplePointUDT,
     PythonOnlyUDT,
     ExamplePoint,
     PythonOnlyPoint,
     MyObject,
 )
+from pyspark.testing.sqlutils import ReusedSQLTestCase
 from pyspark.testing.utils import PySparkErrorTestUtils
 
 
diff --git a/python/pyspark/sql/tests/test_udf.py 
b/python/pyspark/sql/tests/test_udf.py
index 067cc0b06a8f..01da0a018367 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -44,9 +44,8 @@ from pyspark.sql.types import (
     VariantVal,
 )
 from pyspark.errors import AnalysisException, PythonException, PySparkTypeError
+from pyspark.testing.objects import ExamplePoint, ExamplePointUDT
 from pyspark.testing.sqlutils import (
-    ExamplePoint,
-    ExamplePointUDT,
     ReusedSQLTestCase,
     test_compiled,
     test_not_compiled_message,
diff --git a/python/pyspark/testing/objects.py 
b/python/pyspark/testing/objects.py
new file mode 100644
index 000000000000..5b97664afbdd
--- /dev/null
+++ b/python/pyspark/testing/objects.py
@@ -0,0 +1,121 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import datetime
+
+from pyspark.sql.types import ArrayType, DoubleType, UserDefinedType
+
+
+class UTCOffsetTimezone(datetime.tzinfo):
+    """
+    Specifies timezone in UTC offset
+    """
+
+    def __init__(self, offset=0):
+        self.ZERO = datetime.timedelta(hours=offset)
+
+    def utcoffset(self, dt):
+        return self.ZERO
+
+    def dst(self, dt):
+        return self.ZERO
+
+
+class ExamplePointUDT(UserDefinedType):
+    """
+    User-defined type (UDT) for ExamplePoint.
+    """
+
+    @classmethod
+    def sqlType(cls):
+        return ArrayType(DoubleType(), False)
+
+    @classmethod
+    def module(cls):
+        return "pyspark.sql.tests"
+
+    @classmethod
+    def scalaUDT(cls):
+        return "org.apache.spark.sql.test.ExamplePointUDT"
+
+    def serialize(self, obj):
+        return [obj.x, obj.y]
+
+    def deserialize(self, datum):
+        return ExamplePoint(datum[0], datum[1])
+
+
+class ExamplePoint:
+    """
+    An example class to demonstrate UDT in Scala, Java, and Python.
+    """
+
+    __UDT__ = ExamplePointUDT()
+
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+    def __repr__(self):
+        return "ExamplePoint(%s,%s)" % (self.x, self.y)
+
+    def __str__(self):
+        return "(%s,%s)" % (self.x, self.y)
+
+    def __eq__(self, other):
+        return isinstance(other, self.__class__) and other.x == self.x and 
other.y == self.y
+
+
+class PythonOnlyUDT(UserDefinedType):
+    """
+    User-defined type (UDT) for ExamplePoint.
+    """
+
+    @classmethod
+    def sqlType(cls):
+        return ArrayType(DoubleType(), False)
+
+    @classmethod
+    def module(cls):
+        return "__main__"
+
+    def serialize(self, obj):
+        return [obj.x, obj.y]
+
+    def deserialize(self, datum):
+        return PythonOnlyPoint(datum[0], datum[1])
+
+    @staticmethod
+    def foo():
+        pass
+
+    @property
+    def props(self):
+        return {}
+
+
+class PythonOnlyPoint(ExamplePoint):
+    """
+    An example class to demonstrate UDT in only Python
+    """
+
+    __UDT__ = PythonOnlyUDT()  # type: ignore
+
+
+class MyObject:
+    def __init__(self, key, value):
+        self.key = key
+        self.value = value
diff --git a/python/pyspark/testing/sqlutils.py 
b/python/pyspark/testing/sqlutils.py
index 4151dfd90459..98d04e7d5b1a 100644
--- a/python/pyspark/testing/sqlutils.py
+++ b/python/pyspark/testing/sqlutils.py
@@ -16,7 +16,6 @@
 #
 
 import glob
-import datetime
 import math
 import os
 import shutil
@@ -24,7 +23,7 @@ import tempfile
 from contextlib import contextmanager
 
 from pyspark.sql import SparkSession
-from pyspark.sql.types import ArrayType, DoubleType, UserDefinedType, Row
+from pyspark.sql.types import Row
 from pyspark.testing.utils import (
     ReusedPySparkTestCase,
     PySparkErrorTestUtils,
@@ -75,108 +74,6 @@ except Exception as e:
 test_compiled = test_not_compiled_message is None
 
 
-class UTCOffsetTimezone(datetime.tzinfo):
-    """
-    Specifies timezone in UTC offset
-    """
-
-    def __init__(self, offset=0):
-        self.ZERO = datetime.timedelta(hours=offset)
-
-    def utcoffset(self, dt):
-        return self.ZERO
-
-    def dst(self, dt):
-        return self.ZERO
-
-
-class ExamplePointUDT(UserDefinedType):
-    """
-    User-defined type (UDT) for ExamplePoint.
-    """
-
-    @classmethod
-    def sqlType(cls):
-        return ArrayType(DoubleType(), False)
-
-    @classmethod
-    def module(cls):
-        return "pyspark.sql.tests"
-
-    @classmethod
-    def scalaUDT(cls):
-        return "org.apache.spark.sql.test.ExamplePointUDT"
-
-    def serialize(self, obj):
-        return [obj.x, obj.y]
-
-    def deserialize(self, datum):
-        return ExamplePoint(datum[0], datum[1])
-
-
-class ExamplePoint:
-    """
-    An example class to demonstrate UDT in Scala, Java, and Python.
-    """
-
-    __UDT__ = ExamplePointUDT()
-
-    def __init__(self, x, y):
-        self.x = x
-        self.y = y
-
-    def __repr__(self):
-        return "ExamplePoint(%s,%s)" % (self.x, self.y)
-
-    def __str__(self):
-        return "(%s,%s)" % (self.x, self.y)
-
-    def __eq__(self, other):
-        return isinstance(other, self.__class__) and other.x == self.x and 
other.y == self.y
-
-
-class PythonOnlyUDT(UserDefinedType):
-    """
-    User-defined type (UDT) for ExamplePoint.
-    """
-
-    @classmethod
-    def sqlType(cls):
-        return ArrayType(DoubleType(), False)
-
-    @classmethod
-    def module(cls):
-        return "__main__"
-
-    def serialize(self, obj):
-        return [obj.x, obj.y]
-
-    def deserialize(self, datum):
-        return PythonOnlyPoint(datum[0], datum[1])
-
-    @staticmethod
-    def foo():
-        pass
-
-    @property
-    def props(self):
-        return {}
-
-
-class PythonOnlyPoint(ExamplePoint):
-    """
-    An example class to demonstrate UDT in only Python
-    """
-
-    __UDT__ = PythonOnlyUDT()  # type: ignore
-
-
-class MyObject:
-    def __init__(self, key, value):
-        self.key = key
-        self.value = value
-
-
 class SQLTestUtils:
     """
     This util assumes the instance of this to have 'spark' attribute, having a 
spark session.
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala
index 7beac16599de..e5e3b17e08d2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala
@@ -45,7 +45,7 @@ private[sql] class ExamplePointUDT extends 
UserDefinedType[ExamplePoint] {
 
   override def sqlType: DataType = ArrayType(DoubleType, false)
 
-  override def pyUDT: String = "pyspark.testing.sqlutils.ExamplePointUDT"
+  override def pyUDT: String = "pyspark.testing.objects.ExamplePointUDT"
 
   override def serialize(p: ExamplePoint): GenericArrayData = {
     val output = new Array[Any](2)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to