zero323 commented on a change in pull request #34466:
URL: https://github.com/apache/spark/pull/34466#discussion_r753829058
##########
File path: python/pyspark/context.py
##########
@@ -609,16 +658,20 @@ def f(split, iterator):
batchSize = max(1, min(len(c) // numSlices, self._batchSize or 1024))
serializer = BatchedSerializer(self._unbatched_serializer, batchSize)
- def reader_func(temp_filename):
- return self._jvm.PythonRDD.readRDDFromFile(self._jsc,
temp_filename, numSlices)
+ def reader_func(temp_filename: str) -> JavaObject:
+ return cast(JVMView, self._jvm).PythonRDD.readRDDFromFile(
+ self._jsc, temp_filename, numSlices
+ )
- def createRDDServer():
- return self._jvm.PythonParallelizeServer(self._jsc.sc(), numSlices)
+ def createRDDServer() -> JavaObject:
+ return cast(JVMView,
self._jvm).PythonParallelizeServer(self._jsc.sc(), numSlices)
jrdd = self._serialize_to_jvm(c, serializer, reader_func,
createRDDServer)
return RDD(jrdd, self, serializer)
- def _serialize_to_jvm(self, data, serializer, reader_func,
createRDDServer):
+ def _serialize_to_jvm(
+ self, data: Any, serializer: Serializer, reader_func: Callable,
createRDDServer: Callable
Review comment:
We might try to make it more specific. For example we know that
`createRDDServer` is nullary, and should be consistent with
https://github.com/apache/spark/blob/ef4f2546c58ef5fe67be7047f9aa2a793519fd54/python/pyspark/context.py#L615-L616
and `reader_func` should be consistent with `reader_func`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]