This is an automated email from the ASF dual-hosted git repository.
lixiao pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 45d834c [SPARK-30779][SS] Fix some API issues found when reviewing
Structured Streaming API docs
45d834c is described below
commit 45d834cb8cc2c30f902d0dec1cdf561b993521d0
Author: Shixiong Zhu <[email protected]>
AuthorDate: Mon Feb 10 14:26:14 2020 -0800
[SPARK-30779][SS] Fix some API issues found when reviewing Structured
Streaming API docs
### What changes were proposed in this pull request?
- Fix the scope of `Logging.initializeForcefully` so that it doesn't appear
in subclasses' public methods. Right now, `sc.initializeForcefully(false,
false)` is allowed to called.
- Don't show classes under `org.apache.spark.internal` package in API docs.
- Add missing `since` annotation.
- Fix the scope of `ArrowUtils` to remove it from the API docs.
### Why are the changes needed?
Avoid leaking APIs unintentionally in Spark 3.0.0.
### Does this PR introduce any user-facing change?
No. All these changes are to avoid leaking APIs unintentionally in Spark
3.0.0.
### How was this patch tested?
Manually generated the API docs and verified the above issues have been
fixed.
Closes #27528 from zsxwing/audit-ss-apis.
Authored-by: Shixiong Zhu <[email protected]>
Signed-off-by: Xiao Li <[email protected]>
---
core/src/main/scala/org/apache/spark/internal/Logging.scala | 2 +-
project/SparkBuild.scala | 1 +
.../spark/sql/connector/read/streaming/ContinuousPartitionReader.java | 2 ++
.../sql/connector/read/streaming/ContinuousPartitionReaderFactory.java | 2 ++
.../org/apache/spark/sql/connector/read/streaming/ContinuousStream.java | 2 ++
.../org/apache/spark/sql/connector/read/streaming/MicroBatchStream.java | 2 ++
.../main/java/org/apache/spark/sql/connector/read/streaming/Offset.java | 2 ++
.../org/apache/spark/sql/connector/read/streaming/PartitionOffset.java | 2 ++
.../java/org/apache/spark/sql/connector/read/streaming/ReadLimit.java | 1 +
.../org/apache/spark/sql/connector/read/streaming/SparkDataStream.java | 2 ++
.../spark/sql/connector/write/streaming/StreamingDataWriterFactory.java | 2 ++
.../org/apache/spark/sql/connector/write/streaming/StreamingWrite.java | 2 ++
sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala | 2 +-
13 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/internal/Logging.scala
b/core/src/main/scala/org/apache/spark/internal/Logging.scala
index 2e4846b..0c1d963 100644
--- a/core/src/main/scala/org/apache/spark/internal/Logging.scala
+++ b/core/src/main/scala/org/apache/spark/internal/Logging.scala
@@ -117,7 +117,7 @@ trait Logging {
}
// For testing
- def initializeForcefully(isInterpreter: Boolean, silent: Boolean): Unit = {
+ private[spark] def initializeForcefully(isInterpreter: Boolean, silent:
Boolean): Unit = {
initializeLogging(isInterpreter, silent)
}
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 707c31d..9d0af3a 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -819,6 +819,7 @@ object Unidoc {
.map(_.filterNot(_.getName.contains("$")))
.map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/deploy")))
.map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/examples")))
+
.map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/internal")))
.map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/memory")))
.map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/network")))
.map(_.filterNot(f =>
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReader.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReader.java
index 8bd5273..c2ad9ec 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReader.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReader.java
@@ -22,6 +22,8 @@ import org.apache.spark.sql.connector.read.PartitionReader;
/**
* A variation on {@link PartitionReader} for use with continuous streaming
processing.
+ *
+ * @since 3.0.0
*/
@Evolving
public interface ContinuousPartitionReader<T> extends PartitionReader<T> {
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReaderFactory.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReaderFactory.java
index 962864d..385c6f6 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReaderFactory.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReaderFactory.java
@@ -27,6 +27,8 @@ import org.apache.spark.sql.vectorized.ColumnarBatch;
/**
* A variation on {@link PartitionReaderFactory} that returns {@link
ContinuousPartitionReader}
* instead of {@link PartitionReader}. It's used for continuous streaming
processing.
+ *
+ * @since 3.0.0
*/
@Evolving
public interface ContinuousPartitionReaderFactory extends
PartitionReaderFactory {
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousStream.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousStream.java
index ee01a25..a84578f 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousStream.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousStream.java
@@ -23,6 +23,8 @@ import org.apache.spark.sql.connector.read.Scan;
/**
* A {@link SparkDataStream} for streaming queries with continuous mode.
+ *
+ * @since 3.0.0
*/
@Evolving
public interface ContinuousStream extends SparkDataStream {
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/MicroBatchStream.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/MicroBatchStream.java
index ceab0f7..40ecbf0 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/MicroBatchStream.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/MicroBatchStream.java
@@ -25,6 +25,8 @@ import org.apache.spark.sql.connector.read.Scan;
/**
* A {@link SparkDataStream} for streaming queries with micro-batch mode.
+ *
+ * @since 3.0.0
*/
@Evolving
public interface MicroBatchStream extends SparkDataStream {
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/Offset.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/Offset.java
index 400de2a..efb8ebb 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/Offset.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/Offset.java
@@ -25,6 +25,8 @@ import org.apache.spark.annotation.Evolving;
* During execution, offsets provided by the data source implementation will
be logged and used as
* restart checkpoints. Each source should provide an offset implementation
which the source can use
* to reconstruct a position in the stream up to which data has been
seen/processed.
+ *
+ * @since 3.0.0
*/
@Evolving
public abstract class Offset {
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/PartitionOffset.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/PartitionOffset.java
index 35ad3bb..faee230 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/PartitionOffset.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/PartitionOffset.java
@@ -26,6 +26,8 @@ import org.apache.spark.annotation.Evolving;
* provide a method to merge these into a global Offset.
*
* These offsets must be serializable.
+ *
+ * @since 3.0.0
*/
@Evolving
public interface PartitionOffset extends Serializable {
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadLimit.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadLimit.java
index 121ed1a..36f6e05 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadLimit.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadLimit.java
@@ -27,6 +27,7 @@ import org.apache.spark.annotation.Evolving;
* @see SupportsAdmissionControl#latestOffset(Offset, ReadLimit)
* @see ReadAllAvailable
* @see ReadMaxRows
+ * @since 3.0.0
*/
@Evolving
public interface ReadLimit {
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SparkDataStream.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SparkDataStream.java
index 1ba0c25..95703e2 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SparkDataStream.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SparkDataStream.java
@@ -25,6 +25,8 @@ import org.apache.spark.annotation.Evolving;
*
* Data sources should implement concrete data stream interfaces:
* {@link MicroBatchStream} and {@link ContinuousStream}.
+ *
+ * @since 3.0.0
*/
@Evolving
public interface SparkDataStream {
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingDataWriterFactory.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingDataWriterFactory.java
index 9946867..0923d07 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingDataWriterFactory.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingDataWriterFactory.java
@@ -33,6 +33,8 @@ import org.apache.spark.sql.connector.write.PhysicalWriteInfo;
* Note that, the writer factory will be serialized and sent to executors,
then the data writer
* will be created on executors and do the actual writing. So this interface
must be
* serializable and {@link DataWriter} doesn't need to be.
+ *
+ * @since 3.0.0
*/
@Evolving
public interface StreamingDataWriterFactory extends Serializable {
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingWrite.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingWrite.java
index 4f930e1..e3dec3b 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingWrite.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingWrite.java
@@ -40,6 +40,8 @@ import
org.apache.spark.sql.connector.write.WriterCommitMessage;
* do it manually in their Spark applications if they want to retry.
*
* Please refer to the documentation of commit/abort methods for detailed
specifications.
+ *
+ * @since 3.0.0
*/
@Evolving
public interface StreamingWrite {
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
index 2da0d1a..003ce85 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
@@ -27,7 +27,7 @@ import org.apache.arrow.vector.types.pojo.{ArrowType, Field,
FieldType, Schema}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
-object ArrowUtils {
+private[sql] object ArrowUtils {
val rootAllocator = new RootAllocator(Long.MaxValue)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]