This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-4.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push: new 60da3179fa61 [SPARK-51701][PYTHON][TESTS] Move test objects to a separate file 60da3179fa61 is described below commit 60da3179fa61b3928d2dd15d97fd757e680019e1 Author: Takuya Ueshin <ues...@databricks.com> AuthorDate: Thu Apr 3 11:20:36 2025 +0800 [SPARK-51701][PYTHON][TESTS] Move test objects to a separate file Moves test objects to a separate file. Some classes for tests should be placed in a clean separate file to avoid unnecessary dependencies. No, test only. The existing tests should pass. No. Closes #50503 from ueshin/issues/SPARK-51701/test_objects. Authored-by: Takuya Ueshin <ues...@databricks.com> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> (cherry picked from commit 295d37fad3b67ac0c73629d5eaebb3baefaeea7e) Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/tests/arrow/test_arrow.py | 3 +- .../sql/tests/connect/test_connect_creation.py | 2 +- .../sql/tests/connect/test_connect_readwriter.py | 2 +- python/pyspark/sql/tests/test_serde.py | 6 +- python/pyspark/sql/tests/test_types.py | 4 +- python/pyspark/sql/tests/test_udf.py | 3 +- python/pyspark/testing/objects.py | 121 +++++++++++++++++++++ python/pyspark/testing/sqlutils.py | 105 +----------------- .../apache/spark/sql/test/ExamplePointUDT.scala | 2 +- 9 files changed, 131 insertions(+), 117 deletions(-) diff --git a/python/pyspark/sql/tests/arrow/test_arrow.py b/python/pyspark/sql/tests/arrow/test_arrow.py index 065f97fcf7c7..5a770a947889 100644 --- a/python/pyspark/sql/tests/arrow/test_arrow.py +++ b/python/pyspark/sql/tests/arrow/test_arrow.py @@ -45,14 +45,13 @@ from pyspark.sql.types import ( NullType, DayTimeIntervalType, ) +from pyspark.testing.objects import ExamplePoint, ExamplePointUDT from pyspark.testing.sqlutils import ( ReusedSQLTestCase, have_pandas, have_pyarrow, pandas_requirement_message, pyarrow_requirement_message, - ExamplePoint, - ExamplePointUDT, ) from pyspark.errors import ArithmeticException, PySparkTypeError, UnsupportedOperationException from pyspark.loose_version import LooseVersion diff --git a/python/pyspark/sql/tests/connect/test_connect_creation.py b/python/pyspark/sql/tests/connect/test_connect_creation.py index 5352913f6609..3d67c33a5834 100644 --- a/python/pyspark/sql/tests/connect/test_connect_creation.py +++ b/python/pyspark/sql/tests/connect/test_connect_creation.py @@ -32,7 +32,7 @@ from pyspark.sql.types import ( ArrayType, Row, ) -from pyspark.testing.sqlutils import MyObject, PythonOnlyUDT +from pyspark.testing.objects import MyObject, PythonOnlyUDT from pyspark.testing.connectutils import should_test_connect from pyspark.sql.tests.connect.test_connect_basic import SparkConnectSQLTestCase diff --git a/python/pyspark/sql/tests/connect/test_connect_readwriter.py b/python/pyspark/sql/tests/connect/test_connect_readwriter.py index 06266b86de3f..dc82d93f9581 100644 --- a/python/pyspark/sql/tests/connect/test_connect_readwriter.py +++ b/python/pyspark/sql/tests/connect/test_connect_readwriter.py @@ -30,7 +30,7 @@ from pyspark.sql.types import ( MapType, Row, ) -from pyspark.testing.sqlutils import ( +from pyspark.testing.objects import ( PythonOnlyUDT, ExamplePoint, PythonOnlyPoint, diff --git a/python/pyspark/sql/tests/test_serde.py b/python/pyspark/sql/tests/test_serde.py index 01cf3c51d7de..eab1ad043ef3 100644 --- a/python/pyspark/sql/tests/test_serde.py +++ b/python/pyspark/sql/tests/test_serde.py @@ -23,7 +23,8 @@ import time from pyspark.sql import Row from pyspark.sql.functions import lit from pyspark.sql.types import StructType, StructField, DecimalType, BinaryType -from pyspark.testing.sqlutils import ReusedSQLTestCase, UTCOffsetTimezone +from pyspark.testing.objects import UTCOffsetTimezone +from pyspark.testing.sqlutils import ReusedSQLTestCase class SerdeTestsMixin: @@ -82,9 +83,6 @@ class SerdeTestsMixin: day = datetime.date.today() now = datetime.datetime.now() ts = time.mktime(now.timetuple()) - # class in __main__ is not serializable - from pyspark.testing.sqlutils import UTCOffsetTimezone - utc = UTCOffsetTimezone() utcnow = datetime.datetime.utcfromtimestamp(ts) # without microseconds # add microseconds to utcnow (keeping year,month,day,hour,minute,second) diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index d920aec6d8d2..15247b97664d 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -71,14 +71,14 @@ from pyspark.sql.types import ( _make_type_verifier, _merge_type, ) -from pyspark.testing.sqlutils import ( - ReusedSQLTestCase, +from pyspark.testing.objects import ( ExamplePointUDT, PythonOnlyUDT, ExamplePoint, PythonOnlyPoint, MyObject, ) +from pyspark.testing.sqlutils import ReusedSQLTestCase from pyspark.testing.utils import PySparkErrorTestUtils diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py index 067cc0b06a8f..01da0a018367 100644 --- a/python/pyspark/sql/tests/test_udf.py +++ b/python/pyspark/sql/tests/test_udf.py @@ -44,9 +44,8 @@ from pyspark.sql.types import ( VariantVal, ) from pyspark.errors import AnalysisException, PythonException, PySparkTypeError +from pyspark.testing.objects import ExamplePoint, ExamplePointUDT from pyspark.testing.sqlutils import ( - ExamplePoint, - ExamplePointUDT, ReusedSQLTestCase, test_compiled, test_not_compiled_message, diff --git a/python/pyspark/testing/objects.py b/python/pyspark/testing/objects.py new file mode 100644 index 000000000000..5b97664afbdd --- /dev/null +++ b/python/pyspark/testing/objects.py @@ -0,0 +1,121 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import datetime + +from pyspark.sql.types import ArrayType, DoubleType, UserDefinedType + + +class UTCOffsetTimezone(datetime.tzinfo): + """ + Specifies timezone in UTC offset + """ + + def __init__(self, offset=0): + self.ZERO = datetime.timedelta(hours=offset) + + def utcoffset(self, dt): + return self.ZERO + + def dst(self, dt): + return self.ZERO + + +class ExamplePointUDT(UserDefinedType): + """ + User-defined type (UDT) for ExamplePoint. + """ + + @classmethod + def sqlType(cls): + return ArrayType(DoubleType(), False) + + @classmethod + def module(cls): + return "pyspark.sql.tests" + + @classmethod + def scalaUDT(cls): + return "org.apache.spark.sql.test.ExamplePointUDT" + + def serialize(self, obj): + return [obj.x, obj.y] + + def deserialize(self, datum): + return ExamplePoint(datum[0], datum[1]) + + +class ExamplePoint: + """ + An example class to demonstrate UDT in Scala, Java, and Python. + """ + + __UDT__ = ExamplePointUDT() + + def __init__(self, x, y): + self.x = x + self.y = y + + def __repr__(self): + return "ExamplePoint(%s,%s)" % (self.x, self.y) + + def __str__(self): + return "(%s,%s)" % (self.x, self.y) + + def __eq__(self, other): + return isinstance(other, self.__class__) and other.x == self.x and other.y == self.y + + +class PythonOnlyUDT(UserDefinedType): + """ + User-defined type (UDT) for ExamplePoint. + """ + + @classmethod + def sqlType(cls): + return ArrayType(DoubleType(), False) + + @classmethod + def module(cls): + return "__main__" + + def serialize(self, obj): + return [obj.x, obj.y] + + def deserialize(self, datum): + return PythonOnlyPoint(datum[0], datum[1]) + + @staticmethod + def foo(): + pass + + @property + def props(self): + return {} + + +class PythonOnlyPoint(ExamplePoint): + """ + An example class to demonstrate UDT in only Python + """ + + __UDT__ = PythonOnlyUDT() # type: ignore + + +class MyObject: + def __init__(self, key, value): + self.key = key + self.value = value diff --git a/python/pyspark/testing/sqlutils.py b/python/pyspark/testing/sqlutils.py index 4151dfd90459..98d04e7d5b1a 100644 --- a/python/pyspark/testing/sqlutils.py +++ b/python/pyspark/testing/sqlutils.py @@ -16,7 +16,6 @@ # import glob -import datetime import math import os import shutil @@ -24,7 +23,7 @@ import tempfile from contextlib import contextmanager from pyspark.sql import SparkSession -from pyspark.sql.types import ArrayType, DoubleType, UserDefinedType, Row +from pyspark.sql.types import Row from pyspark.testing.utils import ( ReusedPySparkTestCase, PySparkErrorTestUtils, @@ -75,108 +74,6 @@ except Exception as e: test_compiled = test_not_compiled_message is None -class UTCOffsetTimezone(datetime.tzinfo): - """ - Specifies timezone in UTC offset - """ - - def __init__(self, offset=0): - self.ZERO = datetime.timedelta(hours=offset) - - def utcoffset(self, dt): - return self.ZERO - - def dst(self, dt): - return self.ZERO - - -class ExamplePointUDT(UserDefinedType): - """ - User-defined type (UDT) for ExamplePoint. - """ - - @classmethod - def sqlType(cls): - return ArrayType(DoubleType(), False) - - @classmethod - def module(cls): - return "pyspark.sql.tests" - - @classmethod - def scalaUDT(cls): - return "org.apache.spark.sql.test.ExamplePointUDT" - - def serialize(self, obj): - return [obj.x, obj.y] - - def deserialize(self, datum): - return ExamplePoint(datum[0], datum[1]) - - -class ExamplePoint: - """ - An example class to demonstrate UDT in Scala, Java, and Python. - """ - - __UDT__ = ExamplePointUDT() - - def __init__(self, x, y): - self.x = x - self.y = y - - def __repr__(self): - return "ExamplePoint(%s,%s)" % (self.x, self.y) - - def __str__(self): - return "(%s,%s)" % (self.x, self.y) - - def __eq__(self, other): - return isinstance(other, self.__class__) and other.x == self.x and other.y == self.y - - -class PythonOnlyUDT(UserDefinedType): - """ - User-defined type (UDT) for ExamplePoint. - """ - - @classmethod - def sqlType(cls): - return ArrayType(DoubleType(), False) - - @classmethod - def module(cls): - return "__main__" - - def serialize(self, obj): - return [obj.x, obj.y] - - def deserialize(self, datum): - return PythonOnlyPoint(datum[0], datum[1]) - - @staticmethod - def foo(): - pass - - @property - def props(self): - return {} - - -class PythonOnlyPoint(ExamplePoint): - """ - An example class to demonstrate UDT in only Python - """ - - __UDT__ = PythonOnlyUDT() # type: ignore - - -class MyObject: - def __init__(self, key, value): - self.key = key - self.value = value - - class SQLTestUtils: """ This util assumes the instance of this to have 'spark' attribute, having a spark session. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala b/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala index 7beac16599de..e5e3b17e08d2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala @@ -45,7 +45,7 @@ private[sql] class ExamplePointUDT extends UserDefinedType[ExamplePoint] { override def sqlType: DataType = ArrayType(DoubleType, false) - override def pyUDT: String = "pyspark.testing.sqlutils.ExamplePointUDT" + override def pyUDT: String = "pyspark.testing.objects.ExamplePointUDT" override def serialize(p: ExamplePoint): GenericArrayData = { val output = new Array[Any](2) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org