HyukjinKwon commented on code in PR #46826:
URL: https://github.com/apache/spark/pull/46826#discussion_r1623682264
##########
python/pyspark/sql/tests/test_python_datasource.py:
##########
@@ -373,6 +373,57 @@ def test_case_insensitive_dict(self):
self.assertEqual(d2["BaR"], 3)
self.assertEqual(d2["baz"], 3)
+ def test_arrow_batch_data_source(self):
+ import pyarrow as pa
+
+ class ArrowBatchDataSource(DataSource):
+ """
+ A data source testing Arrow Batch Serialization
+ """
+
+ @classmethod
+ def name(cls):
+ return "arrowbatch"
+
+ def schema(self):
+ return "key int, value string"
+
+ def reader(self, schema: StructType):
+ return ArrowBatchDataSourceReader(schema, self.options)
+
+ class ArrowBatchDataSourceReader(DataSourceReader):
Review Comment:
We should probably change the type hint of `DataSourceReader.read`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]