spark git commit: [DOCS] Docs-only improvements

srowen Thu, 30 Mar 2017 08:07:42 -0700

Repository: spark
Updated Branches:
  refs/heads/master b454d4402 -> 0197262a3



[DOCS] Docs-only improvements

â¦adoc

## What changes were proposed in this pull request?

Use recommended values for row boundaries in Window's scaladoc, i.e. 
`Window.unboundedPreceding`, `Window.unboundedFollowing`, and 
`Window.currentRow` (that were introduced in 2.1.0).

## How was this patch tested?

Local build

Author: Jacek Laskowski <[email protected]>

Closes #17417 from jaceklaskowski/window-expression-scaladoc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0197262a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0197262a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0197262a

Branch: refs/heads/master
Commit: 0197262a358fd174a188f8246ae777e53157610e
Parents: b454d44
Author: Jacek Laskowski <[email protected]>
Authored: Thu Mar 30 16:07:27 2017 +0100
Committer: Sean Owen <[email protected]>
Committed: Thu Mar 30 16:07:27 2017 +0100

----------------------------------------------------------------------
 .../org/apache/spark/memory/MemoryConsumer.java |  2 --
 .../sort/BypassMergeSortShuffleWriter.java      |  5 ++---
 .../apache/spark/ExecutorAllocationClient.scala |  5 ++---
 .../scala/org/apache/spark/scheduler/Task.scala |  2 +-
 .../apache/spark/serializer/Serializer.scala    |  2 +-
 .../spark/shuffle/BlockStoreShuffleReader.scala |  3 +--
 .../shuffle/IndexShuffleBlockResolver.scala     |  4 ++--
 .../spark/shuffle/sort/SortShuffleManager.scala |  4 ++--
 .../org/apache/spark/util/AccumulatorV2.scala   |  2 +-
 .../spark/examples/ml/DataFrameExample.scala    |  2 +-
 .../org/apache/spark/ml/stat/Correlation.scala  |  2 +-
 .../sql/catalyst/analysis/ResolveHints.scala    |  2 +-
 .../catalyst/encoders/ExpressionEncoder.scala   |  6 ++---
 .../sql/catalyst/expressions/Expression.scala   |  2 +-
 .../expressions/windowExpressions.scala         |  2 +-
 .../spark/sql/catalyst/optimizer/objects.scala  |  2 +-
 .../spark/sql/catalyst/parser/AstBuilder.scala  |  6 ++---
 .../spark/sql/catalyst/plans/QueryPlan.scala    |  5 +++--
 .../catalyst/plans/logical/LogicalPlan.scala    |  2 +-
 .../catalyst/parser/ExpressionParserSuite.scala |  3 ++-
 .../scala/org/apache/spark/sql/Column.scala     | 18 +++++++--------
 .../org/apache/spark/sql/DatasetHolder.scala    |  3 ++-
 .../org/apache/spark/sql/SparkSession.scala     |  2 +-
 .../spark/sql/execution/command/databases.scala |  2 +-
 .../spark/sql/execution/streaming/Source.scala  |  2 +-
 .../apache/spark/sql/expressions/Window.scala   | 23 ++++++++++----------
 .../spark/sql/expressions/WindowSpec.scala      | 20 ++++++++---------
 .../scala/org/apache/spark/sql/functions.scala  |  2 +-
 .../sql/hive/HiveSessionStateBuilder.scala      |  2 +-
 .../streaming/scheduler/InputInfoTracker.scala  |  2 +-
 30 files changed, 68 insertions(+), 71 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java 
b/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
index fc1f3a8..48cf4b9 100644
--- a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
+++ b/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
@@ -60,8 +60,6 @@ public abstract class MemoryConsumer {
 
   /**
    * Force spill during building.
-   *
-   * For testing.
    */
   public void spill() throws IOException {
     spill(Long.MAX_VALUE, this);

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
 
b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
index 4a15559..323a5d3 100644
--- 
a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
+++ 
b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
@@ -52,8 +52,7 @@ import org.apache.spark.util.Utils;
  * This class implements sort-based shuffle's hash-style shuffle fallback 
path. This write path
  * writes incoming records to separate files, one file per reduce partition, 
then concatenates these
  * per-partition files to form a single output file, regions of which are 
served to reducers.
- * Records are not buffered in memory. This is essentially identical to
- * {@link org.apache.spark.shuffle.hash.HashShuffleWriter}, except that it 
writes output in a format
+ * Records are not buffered in memory. It writes output in a format
  * that can be served / consumed via {@link 
org.apache.spark.shuffle.IndexShuffleBlockResolver}.
  * <p>
  * This write path is inefficient for shuffles with large numbers of reduce 
partitions because it
@@ -61,7 +60,7 @@ import org.apache.spark.util.Utils;
  * {@link SortShuffleManager} only selects this write path when
  * <ul>
  *    <li>no Ordering is specified,</li>
- *    <li>no Aggregator is specific, and</li>
+ *    <li>no Aggregator is specified, and</li>
  *    <li>the number of partitions is less than
  *      <code>spark.shuffle.sort.bypassMergeThreshold</code>.</li>
  * </ul>

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala 
b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
index e4b9f81..9112d93 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
@@ -71,13 +71,12 @@ private[spark] trait ExecutorAllocationClient {
 
   /**
    * Request that the cluster manager kill every executor on the specified 
host.
-   * Results in a call to killExecutors for each executor on the host, with 
the replace
-   * and force arguments set to true.
+   *
    * @return whether the request is acknowledged by the cluster manager.
    */
   def killExecutorsOnHost(host: String): Boolean
 
-    /**
+  /**
    * Request that the cluster manager kill the specified executor.
    * @return whether the request is acknowledged by the cluster manager.
    */

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala 
b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index 46ef23f..7fd2918 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -149,7 +149,7 @@ private[spark] abstract class Task[T](
 
   def preferredLocations: Seq[TaskLocation] = Nil
 
-  // Map output tracker epoch. Will be set by TaskScheduler.
+  // Map output tracker epoch. Will be set by TaskSetManager.
   var epoch: Long = -1
 
   // Task context, to be initialized in run().

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala 
b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
index 008b038..01bbda0 100644
--- a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
@@ -77,7 +77,7 @@ abstract class Serializer {
    * position = 0
    * serOut.write(obj1)
    * serOut.flush()
-   * position = # of bytes writen to stream so far
+   * position = # of bytes written to stream so far
    * obj1Bytes = output[0:position-1]
    * serOut.write(obj2)
    * serOut.flush()

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala 
b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
index 8b2e26c..ba3e0e3 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
@@ -95,8 +95,7 @@ private[spark] class BlockStoreShuffleReader[K, C](
     // Sort the output if there is a sort ordering defined.
     dep.keyOrdering match {
       case Some(keyOrd: Ordering[K]) =>
-        // Create an ExternalSorter to sort the data. Note that if 
spark.shuffle.spill is disabled,
-        // the ExternalSorter won't spill to disk.
+        // Create an ExternalSorter to sort the data.
         val sorter =
           new ExternalSorter[K, C, C](context, ordering = Some(keyOrd), 
serializer = dep.serializer)
         sorter.insertAll(aggregatedIter)

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala 
b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
index 91858f0..1554048 100644
--- 
a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+++ 
b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
@@ -61,7 +61,7 @@ private[spark] class IndexShuffleBlockResolver(
 
   /**
    * Remove data file and index file that contain the output data from one map.
-   * */
+   */
   def removeDataByMap(shuffleId: Int, mapId: Int): Unit = {
     var file = getDataFile(shuffleId, mapId)
     if (file.exists()) {
@@ -132,7 +132,7 @@ private[spark] class IndexShuffleBlockResolver(
    * replace them with new ones.
    *
    * Note: the `lengths` will be updated to match the existing index file if 
use the existing ones.
-   * */
+   */
   def writeIndexFileAndCommit(
       shuffleId: Int,
       mapId: Int,

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala 
b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index 5e977a1..bfb4dc6 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -82,13 +82,13 @@ private[spark] class SortShuffleManager(conf: SparkConf) 
extends ShuffleManager
   override val shuffleBlockResolver = new IndexShuffleBlockResolver(conf)
 
   /**
-   * Register a shuffle with the manager and obtain a handle for it to pass to 
tasks.
+   * Obtains a [[ShuffleHandle]] to pass to tasks.
    */
   override def registerShuffle[K, V, C](
       shuffleId: Int,
       numMaps: Int,
       dependency: ShuffleDependency[K, V, C]): ShuffleHandle = {
-    if (SortShuffleWriter.shouldBypassMergeSort(SparkEnv.get.conf, 
dependency)) {
+    if (SortShuffleWriter.shouldBypassMergeSort(conf, dependency)) {
       // If there are fewer than spark.shuffle.sort.bypassMergeThreshold 
partitions and we don't
       // need map-side aggregation, then write numPartitions files directly 
and just concatenate
       // them at the end. This avoids doing serialization and deserialization 
twice to merge

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala 
b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
index 00e0cf2..7479de5 100644
--- a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
+++ b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
@@ -279,7 +279,7 @@ private[spark] object AccumulatorContext {
 
 
 /**
- * An [[AccumulatorV2 accumulator]] for computing sum, count, and averages for 
64-bit integers.
+ * An [[AccumulatorV2 accumulator]] for computing sum, count, and average of 
64-bit integers.
  *
  * @since 2.0.0
  */

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
----------------------------------------------------------------------
diff --git 
a/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala 
b/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
index e07c9a4..0658bdd 100644
--- 
a/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
+++ 
b/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.{DataFrame, Row, SparkSession}
 import org.apache.spark.util.Utils
 
 /**
- * An example of how to use [[org.apache.spark.sql.DataFrame]] for ML. Run with
+ * An example of how to use [[DataFrame]] for ML. Run with
  * {{{
  * ./bin/run-example ml.DataFrameExample [options]
  * }}}

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala 
b/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
index a7243cc..d3c84b7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.types.{StructField, StructType}
 /**
  * API for correlation functions in MLlib, compatible with Dataframes and 
Datasets.
  *
- * The functions in this package generalize the functions in 
[[org.apache.spark.sql.Dataset.stat]]
+ * The functions in this package generalize the functions in 
[[org.apache.spark.sql.Dataset#stat]]
  * to spark.ml's Vector types.
  */
 @Since("2.2.0")

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
index 70438eb..920033a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.trees.CurrentOrigin
 /**
  * Collection of rules related to hints. The only hint currently available is 
broadcast join hint.
  *
- * Note that this is separatedly into two rules because in the future we might 
introduce new hint
+ * Note that this is separately into two rules because in the future we might 
introduce new hint
  * rules that have different ordering requirements from broadcast.
  */
 object ResolveHints {

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index 93fc565..ec003cd 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -229,9 +229,9 @@ case class ExpressionEncoder[T](
   // serializer expressions are used to encode an object to a row, while the 
object is usually an
   // intermediate value produced inside an operator, not from the output of 
the child operator. This
   // is quite different from normal expressions, and `AttributeReference` 
doesn't work here
-  // (intermediate value is not an attribute). We assume that all serializer 
expressions use a same
-  // `BoundReference` to refer to the object, and throw exception if they 
don't.
-  assert(serializer.forall(_.references.isEmpty), "serializer cannot reference 
to any attributes.")
+  // (intermediate value is not an attribute). We assume that all serializer 
expressions use the
+  // same `BoundReference` to refer to the object, and throw exception if they 
don't.
+  assert(serializer.forall(_.references.isEmpty), "serializer cannot reference 
any attributes.")
   assert(serializer.flatMap { ser =>
     val boundRefs = ser.collect { case b: BoundReference => b }
     assert(boundRefs.nonEmpty,

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index b93a5d0..1db26d9 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -491,7 +491,7 @@ abstract class BinaryExpression extends Expression {
  * A [[BinaryExpression]] that is an operator, with two properties:
  *
  * 1. The string representation is "x symbol y", rather than "funcName(x, y)".
- * 2. Two inputs are expected to the be same type. If the two inputs have 
different types,
+ * 2. Two inputs are expected to be of the same type. If the two inputs have 
different types,
  *    the analyzer will find the tightest common type and do the proper type 
casting.
  */
 abstract class BinaryOperator extends BinaryExpression with ExpectsInputTypes {

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 07d294b..b2a3888 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -695,7 +695,7 @@ case class DenseRank(children: Seq[Expression]) extends 
RankLike {
  *
  * This documentation has been based upon similar documentation for the Hive 
and Presto projects.
  *
- * @param children to base the rank on; a change in the value of one the 
children will trigger a
+ * @param children to base the rank on; a change in the value of one of the 
children will trigger a
  *                 change in rank. This is an internal parameter and will be 
assigned by the
  *                 Analyser.
  */

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
index 174d546..257dbfa 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
@@ -65,7 +65,7 @@ object EliminateSerialization extends Rule[LogicalPlan] {
 
 /**
  * Combines two adjacent [[TypedFilter]]s, which operate on same type object 
in condition, into one,
- * mering the filter functions into one conjunctive function.
+ * merging the filter functions into one conjunctive function.
  */
 object CombineTypedFilters extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index cd238e0..162051a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -492,7 +492,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
Logging {
   }
 
   /**
-   * Add an [[Aggregate]] to a logical plan.
+   * Add an [[Aggregate]] or [[GroupingSets]] to a logical plan.
    */
   private def withAggregation(
       ctx: AggregationContext,
@@ -519,7 +519,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
Logging {
   }
 
   /**
-   * Add a Hint to a logical plan.
+   * Add a [[Hint]] to a logical plan.
    */
   private def withHints(
       ctx: HintContext,
@@ -545,7 +545,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
Logging {
   }
 
   /**
-   * Create a single relation referenced in a FROM claused. This method is 
used when a part of the
+   * Create a single relation referenced in a FROM clause. This method is used 
when a part of the
    * join condition is nested, for example:
    * {{{
    *   select * from t1 join (t2 cross join t3) on col1 = col2

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 9fd95a4..2d8ec20 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -230,14 +230,15 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] 
extends TreeNode[PlanT
   def producedAttributes: AttributeSet = AttributeSet.empty
 
   /**
-   * Attributes that are referenced by expressions but not provided by this 
nodes children.
+   * Attributes that are referenced by expressions but not provided by this 
node's children.
    * Subclasses should override this method if they produce attributes 
internally as it is used by
    * assertions designed to prevent the construction of invalid plans.
    */
   def missingInput: AttributeSet = references -- inputSet -- producedAttributes
 
   /**
-   * Runs [[transform]] with `rule` on all expressions present in this query 
operator.
+   * Runs [[transformExpressionsDown]] with `rule` on all expressions present
+   * in this query operator.
    * Users should not expect a specific directionality. If a specific 
directionality is needed,
    * transformExpressionsDown or transformExpressionsUp should be used.
    *

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index e22b429..f71a976 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -32,7 +32,7 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] 
with Logging {
   private var _analyzed: Boolean = false
 
   /**
-   * Marks this plan as already analyzed.  This should only be called by 
CheckAnalysis.
+   * Marks this plan as already analyzed. This should only be called by 
[[CheckAnalysis]].
    */
   private[catalyst] def setAnalyzed(): Unit = { _analyzed = true }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index c2e62e7..d1c6b50 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -26,7 +26,8 @@ import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
 
 /**
- * Test basic expression parsing. If a type of expression is supported it 
should be tested here.
+ * Test basic expression parsing.
+ * If the type of an expression is supported it should be tested here.
  *
  * Please note that some of the expressions test don't have to be sound 
expressions, only their
  * structure needs to be valid. Unsound expressions should be caught by the 
Analyzer or

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index ae07035..43de2de 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -84,8 +84,8 @@ class TypedColumn[-T, U](
   }
 
   /**
-   * Gives the TypedColumn a name (alias).
-   * If the current TypedColumn has metadata associated with it, this metadata 
will be propagated
+   * Gives the [[TypedColumn]] a name (alias).
+   * If the current `TypedColumn` has metadata associated with it, this 
metadata will be propagated
    * to the new column.
    *
    * @group expr_ops
@@ -99,16 +99,14 @@ class TypedColumn[-T, U](
 /**
  * A column that will be computed based on the data in a `DataFrame`.
  *
- * A new column is constructed based on the input columns present in a 
dataframe:
+ * A new column can be constructed based on the input columns present in a 
DataFrame:
  *
  * {{{
- *   df("columnName")            // On a specific DataFrame.
+ *   df("columnName")            // On a specific `df` DataFrame.
  *   col("columnName")           // A generic column no yet associated with a 
DataFrame.
  *   col("columnName.field")     // Extracting a struct field
  *   col("`a.column.with.dots`") // Escape `.` in column names.
  *   $"columnName"               // Scala short hand for a named column.
- *   expr("a + 1")               // A column that is constructed from a parsed 
SQL Expression.
- *   lit("abc")                  // A column that produces a literal 
(constant) value.
  * }}}
  *
  * [[Column]] objects can be composed to form complex expressions:
@@ -118,7 +116,7 @@ class TypedColumn[-T, U](
  *   $"a" === $"b"
  * }}}
  *
- * @note The internal Catalyst expression can be accessed via "expr", but this 
method is for
+ * @note The internal Catalyst expression can be accessed via [[expr]], but 
this method is for
  * debugging purposes only and can change in any future Spark releases.
  *
  * @groupname java_expr_ops Java-specific expression operators
@@ -1100,7 +1098,7 @@ class Column(val expr: Expression) extends Logging {
   def asc_nulls_last: Column = withExpr { SortOrder(expr, Ascending, 
NullsLast, Set.empty) }
 
   /**
-   * Prints the expression to the console for debugging purpose.
+   * Prints the expression to the console for debugging purposes.
    *
    * @group df_ops
    * @since 1.3.0
@@ -1154,8 +1152,8 @@ class Column(val expr: Expression) extends Logging {
    * {{{
    *   val w = Window.partitionBy("name").orderBy("id")
    *   df.select(
-   *     sum("price").over(w.rangeBetween(Long.MinValue, 2)),
-   *     avg("price").over(w.rowsBetween(0, 4))
+   *     sum("price").over(w.rangeBetween(Window.unboundedPreceding, 2)),
+   *     avg("price").over(w.rowsBetween(Window.currentRow, 4))
    *   )
    * }}}
    *

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/sql/core/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DatasetHolder.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
index 18bccee..582d4a3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
@@ -24,7 +24,8 @@ import org.apache.spark.annotation.InterfaceStability
  *
  * To use this, import implicit conversions in SQL:
  * {{{
- *   import sqlContext.implicits._
+ *   val spark: SparkSession = ...
+ *   import spark.implicits._
  * }}}
  *
  * @since 1.6.0

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index a972978..b604992 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -60,7 +60,7 @@ import org.apache.spark.util.Utils
  * The builder can also be used to create a new session:
  *
  * {{{
- *   SparkSession.builder()
+ *   SparkSession.builder
  *     .master("local")
  *     .appName("Word Count")
  *     .config("spark.some.config.option", "some-value")

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/databases.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/databases.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/databases.scala
index e5a6a5f..470c736 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/databases.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/databases.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.types.StringType
 
 /**
  * A command for users to list the databases/schemas.
- * If a databasePattern is supplied then the databases that only matches the
+ * If a databasePattern is supplied then the databases that only match the
  * pattern would be listed.
  * The syntax of using this command in SQL is:
  * {{{

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
index 75ffe90..311942f 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.types.StructType
  * monotonically increasing notion of progress that can be represented as an 
[[Offset]]. Spark
  * will regularly query each [[Source]] to see if any more data is available.
  */
-trait Source  {
+trait Source {
 
   /** Returns the schema of the data from this source */
   def schema: StructType

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
index f3cf305..0005348 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
@@ -113,7 +113,7 @@ object Window {
    * Creates a [[WindowSpec]] with the frame boundaries defined,
    * from `start` (inclusive) to `end` (inclusive).
    *
-   * Both `start` and `end` are relative positions from the current row. For 
example, "0" means
+   * Both `start` and `end` are positions relative to the current row. For 
example, "0" means
    * "current row", while "-1" means the row before the current row, and "5" 
means the fifth row
    * after the current row.
    *
@@ -131,9 +131,9 @@ object Window {
    *   import org.apache.spark.sql.expressions.Window
    *   val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
    *     .toDF("id", "category")
-   *   df.withColumn("sum",
-   *       sum('id) over 
Window.partitionBy('category).orderBy('id).rowsBetween(0,1))
-   *     .show()
+   *   val byCategoryOrderedById =
+   *     
Window.partitionBy('category).orderBy('id).rowsBetween(Window.currentRow, 1)
+   *   df.withColumn("sum", sum('id) over byCategoryOrderedById).show()
    *
    *   +---+--------+---+
    *   | id|category|sum|
@@ -150,7 +150,7 @@ object Window {
    * @param start boundary start, inclusive. The frame is unbounded if this is
    *              the minimum long value (`Window.unboundedPreceding`).
    * @param end boundary end, inclusive. The frame is unbounded if this is the
-   *            maximum long value  (`Window.unboundedFollowing`).
+   *            maximum long value (`Window.unboundedFollowing`).
    * @since 2.1.0
    */
   // Note: when updating the doc for this method, also update 
WindowSpec.rowsBetween.
@@ -162,7 +162,7 @@ object Window {
    * Creates a [[WindowSpec]] with the frame boundaries defined,
    * from `start` (inclusive) to `end` (inclusive).
    *
-   * Both `start` and `end` are relative from the current row. For example, 
"0" means "current row",
+   * Both `start` and `end` are relative to the current row. For example, "0" 
means "current row",
    * while "-1" means one off before the current row, and "5" means the five 
off after the
    * current row.
    *
@@ -183,9 +183,9 @@ object Window {
    *   import org.apache.spark.sql.expressions.Window
    *   val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
    *     .toDF("id", "category")
-   *   df.withColumn("sum",
-   *       sum('id) over 
Window.partitionBy('category).orderBy('id).rangeBetween(0,1))
-   *     .show()
+   *   val byCategoryOrderedById =
+   *     
Window.partitionBy('category).orderBy('id).rowsBetween(Window.currentRow, 1)
+   *   df.withColumn("sum", sum('id) over byCategoryOrderedById).show()
    *
    *   +---+--------+---+
    *   | id|category|sum|
@@ -202,7 +202,7 @@ object Window {
    * @param start boundary start, inclusive. The frame is unbounded if this is
    *              the minimum long value (`Window.unboundedPreceding`).
    * @param end boundary end, inclusive. The frame is unbounded if this is the
-   *            maximum long value  (`Window.unboundedFollowing`).
+   *            maximum long value (`Window.unboundedFollowing`).
    * @since 2.1.0
    */
   // Note: when updating the doc for this method, also update 
WindowSpec.rangeBetween.
@@ -221,7 +221,8 @@ object Window {
  *
  * {{{
  *   // PARTITION BY country ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING 
AND CURRENT ROW
- *   Window.partitionBy("country").orderBy("date").rowsBetween(Long.MinValue, 
0)
+ *   Window.partitionBy("country").orderBy("date")
+ *     .rowsBetween(Window.unboundedPreceding, Window.currentRow)
  *
  *   // PARTITION BY country ORDER BY date ROWS BETWEEN 3 PRECEDING AND 3 
FOLLOWING
  *   Window.partitionBy("country").orderBy("date").rowsBetween(-3, 3)

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
index de7d7a1..6279d48 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
@@ -86,7 +86,7 @@ class WindowSpec private[sql](
    * after the current row.
    *
    * We recommend users use `Window.unboundedPreceding`, 
`Window.unboundedFollowing`,
-   * and `[Window.currentRow` to specify special boundary values, rather than 
using integral
+   * and `Window.currentRow` to specify special boundary values, rather than 
using integral
    * values directly.
    *
    * A row based boundary is based on the position of the row within the 
partition.
@@ -99,9 +99,9 @@ class WindowSpec private[sql](
    *   import org.apache.spark.sql.expressions.Window
    *   val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
    *     .toDF("id", "category")
-   *   df.withColumn("sum",
-   *       sum('id) over 
Window.partitionBy('category).orderBy('id).rowsBetween(0,1))
-   *     .show()
+   *   val byCategoryOrderedById =
+   *     
Window.partitionBy('category).orderBy('id).rowsBetween(Window.currentRow, 1)
+   *   df.withColumn("sum", sum('id) over byCategoryOrderedById).show()
    *
    *   +---+--------+---+
    *   | id|category|sum|
@@ -118,7 +118,7 @@ class WindowSpec private[sql](
    * @param start boundary start, inclusive. The frame is unbounded if this is
    *              the minimum long value (`Window.unboundedPreceding`).
    * @param end boundary end, inclusive. The frame is unbounded if this is the
-   *            maximum long value  (`Window.unboundedFollowing`).
+   *            maximum long value (`Window.unboundedFollowing`).
    * @since 1.4.0
    */
   // Note: when updating the doc for this method, also update 
Window.rowsBetween.
@@ -134,7 +134,7 @@ class WindowSpec private[sql](
    * current row.
    *
    * We recommend users use `Window.unboundedPreceding`, 
`Window.unboundedFollowing`,
-   * and `[Window.currentRow` to specify special boundary values, rather than 
using integral
+   * and `Window.currentRow` to specify special boundary values, rather than 
using integral
    * values directly.
    *
    * A range based boundary is based on the actual value of the ORDER BY
@@ -150,9 +150,9 @@ class WindowSpec private[sql](
    *   import org.apache.spark.sql.expressions.Window
    *   val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
    *     .toDF("id", "category")
-   *   df.withColumn("sum",
-   *       sum('id) over 
Window.partitionBy('category).orderBy('id).rangeBetween(0,1))
-   *     .show()
+   *   val byCategoryOrderedById =
+   *     
Window.partitionBy('category).orderBy('id).rangeBetween(Window.currentRow, 1)
+   *   df.withColumn("sum", sum('id) over byCategoryOrderedById).show()
    *
    *   +---+--------+---+
    *   | id|category|sum|
@@ -169,7 +169,7 @@ class WindowSpec private[sql](
    * @param start boundary start, inclusive. The frame is unbounded if this is
    *              the minimum long value (`Window.unboundedPreceding`).
    * @param end boundary end, inclusive. The frame is unbounded if this is the
-   *            maximum long value  (`Window.unboundedFollowing`).
+   *            maximum long value (`Window.unboundedFollowing`).
    * @since 1.4.0
    */
   // Note: when updating the doc for this method, also update 
Window.rangeBetween.

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 0f92030..f07e043 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2968,7 +2968,7 @@ object functions {
    *
    * @param e a string column containing JSON data.
    * @param schema the schema to use when parsing the json string
-   * @param options options to control how the json is parsed. accepts the 
same options and the
+   * @param options options to control how the json is parsed. Accepts the 
same options as the
    *                json data source.
    *
    * @group collection_funcs

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 8048c2b..2f3dfa0 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.{BaseSessionStateBuilder, 
SessionResourceLoader, SessionState}
 
 /**
- * Builder that produces a Hive aware [[SessionState]].
+ * Builder that produces a Hive-aware `SessionState`.
  */
 @Experimental
 @InterfaceStability.Unstable

http://git-wip-us.apache.org/repos/asf/spark/blob/0197262a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala
----------------------------------------------------------------------
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala
 
b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala
index 8e1a090..639ac6d 100644
--- 
a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala
+++ 
b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala
@@ -66,7 +66,7 @@ private[streaming] class InputInfoTracker(ssc: 
StreamingContext) extends Logging
       new mutable.HashMap[Int, StreamInputInfo]())
 
     if (inputInfos.contains(inputInfo.inputStreamId)) {
-      throw new IllegalStateException(s"Input stream 
${inputInfo.inputStreamId} for batch" +
+      throw new IllegalStateException(s"Input stream 
${inputInfo.inputStreamId} for batch " +
         s"$batchTime is already added into InputInfoTracker, this is an 
illegal state")
     }
     inputInfos += ((inputInfo.inputStreamId, inputInfo))


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [DOCS] Docs-only improvements

Reply via email to