This is an automated email from the ASF dual-hosted git repository. lixiao pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 45d834c [SPARK-30779][SS] Fix some API issues found when reviewing Structured Streaming API docs 45d834c is described below commit 45d834cb8cc2c30f902d0dec1cdf561b993521d0 Author: Shixiong Zhu <zsxw...@gmail.com> AuthorDate: Mon Feb 10 14:26:14 2020 -0800 [SPARK-30779][SS] Fix some API issues found when reviewing Structured Streaming API docs ### What changes were proposed in this pull request? - Fix the scope of `Logging.initializeForcefully` so that it doesn't appear in subclasses' public methods. Right now, `sc.initializeForcefully(false, false)` is allowed to called. - Don't show classes under `org.apache.spark.internal` package in API docs. - Add missing `since` annotation. - Fix the scope of `ArrowUtils` to remove it from the API docs. ### Why are the changes needed? Avoid leaking APIs unintentionally in Spark 3.0.0. ### Does this PR introduce any user-facing change? No. All these changes are to avoid leaking APIs unintentionally in Spark 3.0.0. ### How was this patch tested? Manually generated the API docs and verified the above issues have been fixed. Closes #27528 from zsxwing/audit-ss-apis. Authored-by: Shixiong Zhu <zsxw...@gmail.com> Signed-off-by: Xiao Li <gatorsm...@gmail.com> --- core/src/main/scala/org/apache/spark/internal/Logging.scala | 2 +- project/SparkBuild.scala | 1 + .../spark/sql/connector/read/streaming/ContinuousPartitionReader.java | 2 ++ .../sql/connector/read/streaming/ContinuousPartitionReaderFactory.java | 2 ++ .../org/apache/spark/sql/connector/read/streaming/ContinuousStream.java | 2 ++ .../org/apache/spark/sql/connector/read/streaming/MicroBatchStream.java | 2 ++ .../main/java/org/apache/spark/sql/connector/read/streaming/Offset.java | 2 ++ .../org/apache/spark/sql/connector/read/streaming/PartitionOffset.java | 2 ++ .../java/org/apache/spark/sql/connector/read/streaming/ReadLimit.java | 1 + .../org/apache/spark/sql/connector/read/streaming/SparkDataStream.java | 2 ++ .../spark/sql/connector/write/streaming/StreamingDataWriterFactory.java | 2 ++ .../org/apache/spark/sql/connector/write/streaming/StreamingWrite.java | 2 ++ sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala | 2 +- 13 files changed, 22 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/internal/Logging.scala b/core/src/main/scala/org/apache/spark/internal/Logging.scala index 2e4846b..0c1d963 100644 --- a/core/src/main/scala/org/apache/spark/internal/Logging.scala +++ b/core/src/main/scala/org/apache/spark/internal/Logging.scala @@ -117,7 +117,7 @@ trait Logging { } // For testing - def initializeForcefully(isInterpreter: Boolean, silent: Boolean): Unit = { + private[spark] def initializeForcefully(isInterpreter: Boolean, silent: Boolean): Unit = { initializeLogging(isInterpreter, silent) } diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 707c31d..9d0af3a 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -819,6 +819,7 @@ object Unidoc { .map(_.filterNot(_.getName.contains("$"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/deploy"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/examples"))) + .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/internal"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/memory"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/network"))) .map(_.filterNot(f => diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReader.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReader.java index 8bd5273..c2ad9ec 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReader.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReader.java @@ -22,6 +22,8 @@ import org.apache.spark.sql.connector.read.PartitionReader; /** * A variation on {@link PartitionReader} for use with continuous streaming processing. + * + * @since 3.0.0 */ @Evolving public interface ContinuousPartitionReader<T> extends PartitionReader<T> { diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReaderFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReaderFactory.java index 962864d..385c6f6 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReaderFactory.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReaderFactory.java @@ -27,6 +27,8 @@ import org.apache.spark.sql.vectorized.ColumnarBatch; /** * A variation on {@link PartitionReaderFactory} that returns {@link ContinuousPartitionReader} * instead of {@link PartitionReader}. It's used for continuous streaming processing. + * + * @since 3.0.0 */ @Evolving public interface ContinuousPartitionReaderFactory extends PartitionReaderFactory { diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousStream.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousStream.java index ee01a25..a84578f 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousStream.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousStream.java @@ -23,6 +23,8 @@ import org.apache.spark.sql.connector.read.Scan; /** * A {@link SparkDataStream} for streaming queries with continuous mode. + * + * @since 3.0.0 */ @Evolving public interface ContinuousStream extends SparkDataStream { diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/MicroBatchStream.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/MicroBatchStream.java index ceab0f7..40ecbf0 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/MicroBatchStream.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/MicroBatchStream.java @@ -25,6 +25,8 @@ import org.apache.spark.sql.connector.read.Scan; /** * A {@link SparkDataStream} for streaming queries with micro-batch mode. + * + * @since 3.0.0 */ @Evolving public interface MicroBatchStream extends SparkDataStream { diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/Offset.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/Offset.java index 400de2a..efb8ebb 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/Offset.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/Offset.java @@ -25,6 +25,8 @@ import org.apache.spark.annotation.Evolving; * During execution, offsets provided by the data source implementation will be logged and used as * restart checkpoints. Each source should provide an offset implementation which the source can use * to reconstruct a position in the stream up to which data has been seen/processed. + * + * @since 3.0.0 */ @Evolving public abstract class Offset { diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/PartitionOffset.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/PartitionOffset.java index 35ad3bb..faee230 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/PartitionOffset.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/PartitionOffset.java @@ -26,6 +26,8 @@ import org.apache.spark.annotation.Evolving; * provide a method to merge these into a global Offset. * * These offsets must be serializable. + * + * @since 3.0.0 */ @Evolving public interface PartitionOffset extends Serializable { diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadLimit.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadLimit.java index 121ed1a..36f6e05 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadLimit.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadLimit.java @@ -27,6 +27,7 @@ import org.apache.spark.annotation.Evolving; * @see SupportsAdmissionControl#latestOffset(Offset, ReadLimit) * @see ReadAllAvailable * @see ReadMaxRows + * @since 3.0.0 */ @Evolving public interface ReadLimit { diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SparkDataStream.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SparkDataStream.java index 1ba0c25..95703e2 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SparkDataStream.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SparkDataStream.java @@ -25,6 +25,8 @@ import org.apache.spark.annotation.Evolving; * * Data sources should implement concrete data stream interfaces: * {@link MicroBatchStream} and {@link ContinuousStream}. + * + * @since 3.0.0 */ @Evolving public interface SparkDataStream { diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingDataWriterFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingDataWriterFactory.java index 9946867..0923d07 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingDataWriterFactory.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingDataWriterFactory.java @@ -33,6 +33,8 @@ import org.apache.spark.sql.connector.write.PhysicalWriteInfo; * Note that, the writer factory will be serialized and sent to executors, then the data writer * will be created on executors and do the actual writing. So this interface must be * serializable and {@link DataWriter} doesn't need to be. + * + * @since 3.0.0 */ @Evolving public interface StreamingDataWriterFactory extends Serializable { diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingWrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingWrite.java index 4f930e1..e3dec3b 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingWrite.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingWrite.java @@ -40,6 +40,8 @@ import org.apache.spark.sql.connector.write.WriterCommitMessage; * do it manually in their Spark applications if they want to retry. * * Please refer to the documentation of commit/abort methods for detailed specifications. + * + * @since 3.0.0 */ @Evolving public interface StreamingWrite { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala index 2da0d1a..003ce85 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala @@ -27,7 +27,7 @@ import org.apache.arrow.vector.types.pojo.{ArrowType, Field, FieldType, Schema} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ -object ArrowUtils { +private[sql] object ArrowUtils { val rootAllocator = new RootAllocator(Long.MaxValue) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org