Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/22009#discussion_r211660836
--- Diff:
sql/core/src/main/java/org/apache/spark/sql/sources/v2/BatchReadSupportProvider.java
---
@@ -18,48 +18,44 @@
package org.apache.spark.sql.sources.v2;
import org.apache.spark.annotation.InterfaceStability;
-import org.apache.spark.sql.sources.DataSourceRegister;
-import org.apache.spark.sql.sources.v2.reader.DataSourceReader;
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils;
+import org.apache.spark.sql.sources.v2.reader.BatchReadSupport;
import org.apache.spark.sql.types.StructType;
/**
* A mix-in interface for {@link DataSourceV2}. Data sources can implement
this interface to
- * provide data reading ability and scan the data from the data source.
+ * provide data reading ability for batch processing.
+ *
+ * This interface is used to create {@link BatchReadSupport} instances
when end users run
+ * {@code SparkSession.read.format(...).option(...).load()}.
*/
@InterfaceStability.Evolving
-public interface ReadSupport extends DataSourceV2 {
+public interface BatchReadSupportProvider extends DataSourceV2 {
/**
- * Creates a {@link DataSourceReader} to scan the data from this data
source.
+ * Creates a {@link BatchReadSupport} instance to load the data from
this data source with a user
--- End diff --
although Spark is OK to use a same `ReadSupport` instance across queries,
this interface (`BatchReadSupportProvider`) is not going to support it. It's
instantiated by reflection everytime `DataFrameReader.load` is called, so the
implementation can not reuse a same `ReadSupport` instance.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]