Repository: spark
Updated Branches:
refs/heads/branch-2.4 1a335444e -> 0f58b989d
[STREAMING][DOC] Fix typo & formatting for JavaDoc
## What changes were proposed in this pull request?
- Fixed typo for function outputMode
- OutputMode.Complete(), changed `these is some updates` to `there are
some updates`
- Replaced hyphenized list by HTML unordered list tags in comments to fix the
Javadoc documentation.
Current render from most recent [Spark API
Docs](https://spark.apache.org/docs/2.3.1/api/java/org/apache/spark/sql/streaming/DataStreamWriter.html):
#### outputMode(OutputMode) - List formatted as a prose.

#### outputMode(String) - List formatted as a prose.

#### partitionBy(String*) - List formatted as a prose.

## How was this patch tested?
This PR contains a document patch ergo no functional testing is required.
Closes #22593 from niofire/fix-typo-datastreamwriter.
Authored-by: Mathieu St-Louis <[email protected]>
Signed-off-by: Sean Owen <[email protected]>
(cherry picked from commit 4e141a416082cb978396ffbd6bf529b168652b9d)
Signed-off-by: Sean Owen <[email protected]>
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0f58b989
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0f58b989
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0f58b989
Branch: refs/heads/branch-2.4
Commit: 0f58b989d4ebc8218880f8a4a32dc8189e7fbb43
Parents: 1a33544
Author: Mathieu St-Louis <[email protected]>
Authored: Fri Oct 12 14:09:10 2018 -0500
Committer: Sean Owen <[email protected]>
Committed: Fri Oct 12 14:09:24 2018 -0500
----------------------------------------------------------------------
.../scala/org/apache/spark/sql/Column.scala | 14 +++----
.../org/apache/spark/sql/DataFrameWriter.scala | 27 +++++++------
.../org/apache/spark/sql/ForeachWriter.scala | 7 +++-
.../spark/sql/SparkSessionExtensions.scala | 15 ++++---
.../spark/sql/streaming/DataStreamWriter.scala | 41 ++++++++++++--------
5 files changed, 61 insertions(+), 43 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/0f58b989/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index ae27690..a046127 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -199,13 +199,13 @@ class Column(val expr: Expression) extends Logging {
/**
* Extracts a value or values from a complex type.
* The following types of extraction are supported:
- *
- * - Given an Array, an integer ordinal can be used to retrieve a single
value.
- * - Given a Map, a key of the correct type can be used to retrieve an
individual value.
- * - Given a Struct, a string fieldName can be used to extract that field.
- * - Given an Array of Structs, a string fieldName can be used to extract
filed
- * of every struct in that array, and return an Array of fields
- *
+ * <ul>
+ * <li>Given an Array, an integer ordinal can be used to retrieve a single
value.</li>
+ * <li>Given a Map, a key of the correct type can be used to retrieve an
individual value.</li>
+ * <li>Given a Struct, a string fieldName can be used to extract that
field.</li>
+ * <li>Given an Array of Structs, a string fieldName can be used to extract
filed
+ * of every struct in that array, and return an Array of fields.</li>
+ * </ul>
* @group expr_ops
* @since 1.4.0
*/
http://git-wip-us.apache.org/repos/asf/spark/blob/0f58b989/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 80ade7c..c1e2f49 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -47,10 +47,12 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T])
{
/**
* Specifies the behavior when data or table already exists. Options include:
- * - `SaveMode.Overwrite`: overwrite the existing data.
- * - `SaveMode.Append`: append the data.
- * - `SaveMode.Ignore`: ignore the operation (i.e. no-op).
- * - `SaveMode.ErrorIfExists`: default option, throw an exception at
runtime.
+ * <ul>
+ * <li>`SaveMode.Overwrite`: overwrite the existing data.</li>
+ * <li>`SaveMode.Append`: append the data.</li>
+ * <li>`SaveMode.Ignore`: ignore the operation (i.e. no-op).</li>
+ * <li>`SaveMode.ErrorIfExists`: default option, throw an exception at
runtime.</li>
+ * </ul>
*
* @since 1.4.0
*/
@@ -61,10 +63,12 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T])
{
/**
* Specifies the behavior when data or table already exists. Options include:
- * - `overwrite`: overwrite the existing data.
- * - `append`: append the data.
- * - `ignore`: ignore the operation (i.e. no-op).
- * - `error` or `errorifexists`: default option, throw an exception at
runtime.
+ * <ul>
+ * <li>`overwrite`: overwrite the existing data.</li>
+ * <li>`append`: append the data.</li>
+ * <li>`ignore`: ignore the operation (i.e. no-op).</li>
+ * <li>`error` or `errorifexists`: default option, throw an exception at
runtime.</li>
+ * </ul>
*
* @since 1.4.0
*/
@@ -163,9 +167,10 @@ final class DataFrameWriter[T] private[sql](ds:
Dataset[T]) {
* Partitions the output by the given columns on the file system. If
specified, the output is
* laid out on the file system similar to Hive's partitioning scheme. As an
example, when we
* partition a dataset by year and then month, the directory layout would
look like:
- *
- * - year=2016/month=01/
- * - year=2016/month=02/
+ * <ul>
+ * <li>year=2016/month=01/</li>
+ * <li>year=2016/month=02/</li>
+ * </ul>
*
* Partitioning is one of the most widely used techniques to optimize
physical data layout.
* It provides a coarse-grained index for skipping unnecessary data reads
when queries have
http://git-wip-us.apache.org/repos/asf/spark/blob/0f58b989/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
b/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
index b21c50a..52b8c83 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
@@ -130,8 +130,11 @@ abstract class ForeachWriter[T] extends Serializable {
* Called when stopping to process one partition of new data in the executor
side. This is
* guaranteed to be called either `open` returns `true` or `false`. However,
* `close` won't be called in the following cases:
- * - JVM crashes without throwing a `Throwable`
- * - `open` throws a `Throwable`.
+ *
+ * <ul>
+ * <li>JVM crashes without throwing a `Throwable`</li>
+ * <li>`open` throws a `Throwable`.</li>
+ * </ul>
*
* @param errorOrNull the error thrown during processing data or null if
there was no error.
*/
http://git-wip-us.apache.org/repos/asf/spark/blob/0f58b989/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
----------------------------------------------------------------------
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
index f99c108..6b02ac2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
@@ -30,12 +30,15 @@ import org.apache.spark.sql.catalyst.rules.Rule
* regarding binary compatibility and source compatibility of methods here.
*
* This current provides the following extension points:
- * - Analyzer Rules.
- * - Check Analysis Rules
- * - Optimizer Rules.
- * - Planning Strategies.
- * - Customized Parser.
- * - (External) Catalog listeners.
+ *
+ * <ul>
+ * <li>Analyzer Rules.</li>
+ * <li>Check Analysis Rules.</li>
+ * <li>Optimizer Rules.</li>
+ * <li>Planning Strategies.</li>
+ * <li>Customized Parser.</li>
+ * <li>(External) Catalog listeners.</li>
+ * </ul>
*
* The extensions can be used by calling withExtension on the
[[SparkSession.Builder]], for
* example:
http://git-wip-us.apache.org/repos/asf/spark/blob/0f58b989/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
----------------------------------------------------------------------
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 4eb2918..ec7eb1d 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -46,14 +46,16 @@ final class DataStreamWriter[T] private[sql](ds:
Dataset[T]) {
/**
* Specifies how data of a streaming DataFrame/Dataset is written to a
streaming sink.
- * - `OutputMode.Append()`: only the new rows in the streaming
DataFrame/Dataset will be
- * written to the sink
- * - `OutputMode.Complete()`: all the rows in the streaming
DataFrame/Dataset will be written
- * to the sink every time these is some updates
- * - `OutputMode.Update()`: only the rows that were updated in the
streaming DataFrame/Dataset
- * will be written to the sink every time there
are some updates. If
- * the query doesn't contain aggregations, it
will be equivalent to
- * `OutputMode.Append()` mode.
+ * <ul>
+ * <li> `OutputMode.Append()`: only the new rows in the streaming
DataFrame/Dataset will be
+ * written to the sink.</li>
+ * <li> `OutputMode.Complete()`: all the rows in the streaming
DataFrame/Dataset will be written
+ * to the sink every time there are some updates.</li>
+ * <li> `OutputMode.Update()`: only the rows that were updated in the
streaming
+ * DataFrame/Dataset will be written to the sink every time there are some
updates.
+ * If the query doesn't contain aggregations, it will be equivalent to
+ * `OutputMode.Append()` mode.</li>
+ * </ul>
*
* @since 2.0.0
*/
@@ -64,13 +66,16 @@ final class DataStreamWriter[T] private[sql](ds:
Dataset[T]) {
/**
* Specifies how data of a streaming DataFrame/Dataset is written to a
streaming sink.
- * - `append`: only the new rows in the streaming DataFrame/Dataset will
be written to
- * the sink
- * - `complete`: all the rows in the streaming DataFrame/Dataset will be
written to the sink
- * every time these is some updates
- * - `update`: only the rows that were updated in the streaming
DataFrame/Dataset will
- * be written to the sink every time there are some updates.
If the query doesn't
- * contain aggregations, it will be equivalent to `append`
mode.
+ * <ul>
+ * <li> `append`: only the new rows in the streaming DataFrame/Dataset will
be written to
+ * the sink.</li>
+ * <li> `complete`: all the rows in the streaming DataFrame/Dataset will be
written to the sink
+ * every time there are some updates.</li>
+ * <li> `update`: only the rows that were updated in the streaming
DataFrame/Dataset will
+ * be written to the sink every time there are some updates. If the query
doesn't
+ * contain aggregations, it will be equivalent to `append` mode.</li>
+ * </ul>
+ *
* @since 2.0.0
*/
def outputMode(outputMode: String): DataStreamWriter[T] = {
@@ -131,8 +136,10 @@ final class DataStreamWriter[T] private[sql](ds:
Dataset[T]) {
* laid out on the file system similar to Hive's partitioning scheme. As an
example, when we
* partition a dataset by year and then month, the directory layout would
look like:
*
- * - year=2016/month=01/
- * - year=2016/month=02/
+ * <ul>
+ * <li> year=2016/month=01/</li>
+ * <li> year=2016/month=02/</li>
+ * </ul>
*
* Partitioning is one of the most widely used techniques to optimize
physical data layout.
* It provides a coarse-grained index for skipping unnecessary data reads
when queries have
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]