This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new ecb2052  [MINOR][DOCS] Fix few typos in the java docs
ecb2052 is described below

commit ecb2052bf0cf7dea749cca10d864f7383eeb1224
Author: dengziming <dengzim...@growingio.com>
AuthorDate: Thu Sep 12 09:30:03 2019 +0900

    [MINOR][DOCS] Fix few typos in the java docs
    
    JIRA :https://issues.apache.org/jira/browse/SPARK-29050
    'a hdfs' change into  'an hdfs'
    'an unique' change into 'a unique'
    'an url' change into 'a url'
    'a error' change into 'an error'
    
    Closes #25756 from dengziming/feature_fix_typos.
    
    Authored-by: dengziming <dengzim...@growingio.com>
    Signed-off-by: HyukjinKwon <gurwls...@apache.org>
    (cherry picked from commit 8f632d70455156010f0e87288541304ad2164a52)
    Signed-off-by: HyukjinKwon <gurwls...@apache.org>
---
 R/pkg/R/context.R                                                     | 4 ++--
 core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala  | 2 +-
 core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala      | 2 +-
 core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala     | 2 +-
 core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala    | 2 +-
 docs/spark-standalone.md                                              | 2 +-
 .../org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala      | 2 +-
 python/pyspark/context.py                                             | 2 +-
 .../sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala  | 4 ++--
 .../scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala | 4 ++--
 sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala | 2 +-
 .../src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q  | 2 +-
 .../main/scala/org/apache/spark/streaming/dstream/InputDStream.scala  | 4 ++--
 13 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index b49f7c3..f1a6b84 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -297,7 +297,7 @@ broadcastRDD <- function(sc, object) {
 #' Set the checkpoint directory
 #'
 #' Set the directory under which RDDs are going to be checkpointed. The
-#' directory must be a HDFS path if running on a cluster.
+#' directory must be an HDFS path if running on a cluster.
 #'
 #' @param sc Spark Context to use
 #' @param dirName Directory path
@@ -442,7 +442,7 @@ setLogLevel <- function(level) {
 #' Set checkpoint directory
 #'
 #' Set the directory under which SparkDataFrame are going to be checkpointed. 
The directory must be
-#' a HDFS path if running on a cluster.
+#' an HDFS path if running on a cluster.
 #'
 #' @rdname setCheckpointDir
 #' @param directory Directory path to checkpoint to
diff --git 
a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala 
b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index 09c8384..09e9910 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -713,7 +713,7 @@ class JavaSparkContext(val sc: SparkContext)
 
   /**
    * Set the directory under which RDDs are going to be checkpointed. The 
directory must
-   * be a HDFS path if running on a cluster.
+   * be an HDFS path if running on a cluster.
    */
   def setCheckpointDir(dir: String) {
     sc.setCheckpointDir(dir)
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala 
b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
index 3457a26..657d75c 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -122,7 +122,7 @@ private[spark] class MetricsSystem private (
    * If either ID is not available, this defaults to just using <source name>.
    *
    * @param source Metric source to be named by this method.
-   * @return An unique metric name for each combination of
+   * @return A unique metric name for each combination of
    *         application, executor/driver and metric source.
    */
   private[spark] def buildRegistryName(source: Source): String = {
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala 
b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
index d4a59c3..83cd4f0 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
@@ -27,7 +27,7 @@ import org.apache.spark.util.Utils
 
 /**
  * :: DeveloperApi ::
- * This class represent an unique identifier for a BlockManager.
+ * This class represent a unique identifier for a BlockManager.
  *
  * The first 2 constructors of this class are made private to ensure that 
BlockManagerId objects
  * can be created only using the apply method in the companion object. This 
allows de-duplication
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala 
b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 887a88f..1b6f765 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -586,7 +586,7 @@ class SparkSubmitSuite
   }
 
   // TODO(SPARK-9603): Building a package is flaky on Jenkins Maven builds.
-  // See https://gist.github.com/shivaram/3a2fecce60768a603dac for a error log
+  // See https://gist.github.com/shivaram/3a2fecce60768a603dac for an error log
   ignore("correctly builds R packages included in a jar with --packages") {
     assume(RUtils.isRInstalled, "R isn't installed on this machine.")
     assume(RUtils.isSparkRInstalled, "SparkR is not installed in this build.")
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 49ef2e1..1511c66 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -362,7 +362,7 @@ In addition, detailed log output for each job is also 
written to the work direct
 
 # Running Alongside Hadoop
 
-You can run Spark alongside your existing Hadoop cluster by just launching it 
as a separate service on the same machines. To access Hadoop data from Spark, 
just use a hdfs:// URL (typically `hdfs://<namenode>:9000/path`, but you can 
find the right URL on your Hadoop Namenode's web UI). Alternatively, you can 
set up a separate cluster for Spark, and still have it access HDFS over the 
network; this will be slower than disk-local access, but may not be a concern 
if you are still running in  [...]
+You can run Spark alongside your existing Hadoop cluster by just launching it 
as a separate service on the same machines. To access Hadoop data from Spark, 
just use an hdfs:// URL (typically `hdfs://<namenode>:9000/path`, but you can 
find the right URL on your Hadoop Namenode's web UI). Alternatively, you can 
set up a separate cluster for Spark, and still have it access HDFS over the 
network; this will be slower than disk-local access, but may not be a concern 
if you are still running in [...]
 
 
 # Configuring Ports for Network Security
diff --git 
a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
 
b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
index 5fb83b2..11e9495 100644
--- 
a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
+++ 
b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
@@ -68,7 +68,7 @@ private[kinesis] class KinesisCheckpointer(
     if (checkpointer != null) {
       try {
         // We must call `checkpoint()` with no parameter to finish reading 
shards.
-        // See an URL below for details:
+        // See a URL below for details:
         // https://forums.aws.amazon.com/thread.jspa?threadID=244218
         KinesisRecordProcessor.retryRandom(checkpointer.checkpoint(), 4, 100)
       } catch {
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index aff3635..1e0dd10 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -937,7 +937,7 @@ class SparkContext(object):
     def setCheckpointDir(self, dirName):
         """
         Set the directory under which RDDs are going to be checkpointed. The
-        directory must be a HDFS path if running on a cluster.
+        directory must be an HDFS path if running on a cluster.
         """
         self._jsc.sc().setCheckpointDir(dirName)
 
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index 92a2480..a63b05a 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -43,7 +43,7 @@ import org.apache.spark.util.{SizeEstimator, Utils}
 
 /**
  * An implementation of [[StateStoreProvider]] and [[StateStore]] in which all 
the data is backed
- * by files in a HDFS-compatible file system. All updates to the store has to 
be done in sets
+ * by files in an HDFS-compatible file system. All updates to the store has to 
be done in sets
  * transactionally, and each set of updates increments the store's version. 
These versions can
  * be used to re-execute the updates (by retries in RDD operations) on the 
correct version of
  * the store, and regenerate the store version.
@@ -79,7 +79,7 @@ private[state] class HDFSBackedStateStoreProvider extends 
StateStoreProvider wit
   //   java.util.ConcurrentModificationException
   type MapType = java.util.concurrent.ConcurrentHashMap[UnsafeRow, UnsafeRow]
 
-  /** Implementation of [[StateStore]] API which is backed by a 
HDFS-compatible file system */
+  /** Implementation of [[StateStore]] API which is backed by an 
HDFS-compatible file system */
   class HDFSBackedStateStore(val version: Long, mapToUpdate: MapType)
     extends StateStore {
 
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
index 6aa82b8..ace87b0 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
@@ -79,7 +79,7 @@ object StreamingQueryListener {
 
   /**
    * Event representing the start of a query
-   * @param id An unique query id that persists across restarts. See 
`StreamingQuery.id()`.
+   * @param id A unique query id that persists across restarts. See 
`StreamingQuery.id()`.
    * @param runId A query id that is unique for every start/restart. See 
`StreamingQuery.runId()`.
    * @param name User-specified name of the query, null if not specified.
    * @since 2.1.0
@@ -101,7 +101,7 @@ object StreamingQueryListener {
   /**
    * Event representing that termination of a query.
    *
-   * @param id An unique query id that persists across restarts. See 
`StreamingQuery.id()`.
+   * @param id A unique query id that persists across restarts. See 
`StreamingQuery.id()`.
    * @param runId A query id that is unique for every start/restart. See 
`StreamingQuery.runId()`.
    * @param exception The exception message of the query if the query was 
terminated
    *                  with an exception. Otherwise, it will be `None`.
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index f2173aa..6803f5c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -73,7 +73,7 @@ class StateOperatorProgress private[sql](
  * a trigger. Each event relates to processing done for a single trigger of 
the streaming
  * query. Events are emitted even when no new data is available to be 
processed.
  *
- * @param id An unique query id that persists across restarts. See 
`StreamingQuery.id()`.
+ * @param id A unique query id that persists across restarts. See 
`StreamingQuery.id()`.
  * @param runId A query id that is unique for every start/restart. See 
`StreamingQuery.runId()`.
  * @param name User-specified name of the query, null if not specified.
  * @param timestamp Beginning time of the trigger in ISO8601 format, i.e. UTC 
timestamps.
diff --git 
a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q 
b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
index a75758a..f92cf24 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
@@ -1,4 +1,4 @@
--- HIVE-3300 [jira] LOAD DATA INPATH fails if a hdfs file with same name is 
added to table
+-- HIVE-3300 [jira] LOAD DATA INPATH fails if an hdfs file with same name is 
added to table
 -- 'loader' table is used only for uploading kv1.txt to HDFS (!hdfs -put is 
not working on minMRDriver)
 
 create table result (key string, value string);
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
 
b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
index 931f015..4af4d89 100644
--- 
a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
+++ 
b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
@@ -34,7 +34,7 @@ import org.apache.spark.util.Utils
  * Input streams that can generate RDDs from new data by running a 
service/thread only on
  * the driver node (that is, without running a receiver on worker nodes), can 
be
  * implemented by directly inheriting this InputDStream. For example,
- * FileInputDStream, a subclass of InputDStream, monitors a HDFS directory 
from the driver for
+ * FileInputDStream, a subclass of InputDStream, monitors an HDFS directory 
from the driver for
  * new files and generates RDDs with the new files. For implementing input 
streams
  * that requires running a receiver on the worker nodes, use
  * [[org.apache.spark.streaming.dstream.ReceiverInputDStream]] as the parent 
class.
@@ -48,7 +48,7 @@ abstract class InputDStream[T: ClassTag](_ssc: 
StreamingContext)
 
   ssc.graph.addInputStream(this)
 
-  /** This is an unique identifier for the input stream. */
+  /** This is a unique identifier for the input stream. */
   val id = ssc.getNewInputStreamId()
 
   // Keep track of the freshest rate for this stream using the rateEstimator


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to