This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new b1b29d9eeb76 [SPARK-46781][PYTHON][TESTS] Test custom data source and
input partition (pyspark.sql.datasource)
b1b29d9eeb76 is described below
commit b1b29d9eeb76951a0129529f2075046cde91937a
Author: Xinrong Meng <[email protected]>
AuthorDate: Tue Jan 23 09:14:31 2024 +0900
[SPARK-46781][PYTHON][TESTS] Test custom data source and input partition
(pyspark.sql.datasource)
### What changes were proposed in this pull request?
Test custom data source and input partition (pyspark.sql.datasource)
### Why are the changes needed?
Subtasks of
[SPARK-46041](https://issues.apache.org/jira/browse/SPARK-46041) to improve
test coverage
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Test change only.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #44808 from xinrong-meng/test_datasource.
Authored-by: Xinrong Meng <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/tests/test_datasources.py | 50 ++++++++++++++++++++++++++++
1 file changed, 50 insertions(+)
diff --git a/python/pyspark/sql/tests/test_datasources.py
b/python/pyspark/sql/tests/test_datasources.py
index 8c16904544b2..ece4839d88a8 100644
--- a/python/pyspark/sql/tests/test_datasources.py
+++ b/python/pyspark/sql/tests/test_datasources.py
@@ -21,7 +21,9 @@ import uuid
import os
from pyspark.sql import Row
+from pyspark.sql.datasource import InputPartition, DataSource
from pyspark.sql.types import IntegerType, StructField, StructType, LongType,
StringType
+from pyspark.errors import PySparkNotImplementedError
from pyspark.testing.sqlutils import ReusedSQLTestCase
@@ -283,6 +285,54 @@ class DataSourcesTestsMixin:
url=f"{url};drop=true", dbtable=dbtable
).load().collect()
+ def test_custom_data_source(self):
+ class MyCustomDataSource(DataSource):
+ pass
+
+ custom_data_source = MyCustomDataSource(options={"path":
"/path/to/custom/data"})
+
+ with self.assertRaises(PySparkNotImplementedError) as pe:
+ custom_data_source.schema()
+
+ self.check_error(
+ exception=pe.exception,
+ error_class="NOT_IMPLEMENTED",
+ message_parameters={"feature": "schema"},
+ )
+
+ with self.assertRaises(PySparkNotImplementedError) as pe:
+ custom_data_source.reader(schema=None)
+
+ self.check_error(
+ exception=pe.exception,
+ error_class="NOT_IMPLEMENTED",
+ message_parameters={"feature": "reader"},
+ )
+
+ with self.assertRaises(PySparkNotImplementedError) as pe:
+ custom_data_source.writer(schema=None, overwrite=False)
+
+ self.check_error(
+ exception=pe.exception,
+ error_class="NOT_IMPLEMENTED",
+ message_parameters={"feature": "writer"},
+ )
+
+ def test_input_partition(self):
+ partition = InputPartition(1)
+ expected_repr = "InputPartition(value=1)"
+ actual_repr = repr(partition)
+ self.assertEqual(expected_repr, actual_repr)
+
+ class RangeInputPartition(InputPartition):
+ def __init__(self, start, end):
+ super().__init__((start, end))
+
+ partition = RangeInputPartition(1, 3)
+ expected_repr = "RangeInputPartition(value=(1, 3))"
+ actual_repr = repr(partition)
+ self.assertEqual(expected_repr, actual_repr)
+
class DataSourcesTests(DataSourcesTestsMixin, ReusedSQLTestCase):
pass
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]