This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push:
new ecb2052 [MINOR][DOCS] Fix few typos in the java docs
ecb2052 is described below
commit ecb2052bf0cf7dea749cca10d864f7383eeb1224
Author: dengziming <[email protected]>
AuthorDate: Thu Sep 12 09:30:03 2019 +0900
[MINOR][DOCS] Fix few typos in the java docs
JIRA :https://issues.apache.org/jira/browse/SPARK-29050
'a hdfs' change into 'an hdfs'
'an unique' change into 'a unique'
'an url' change into 'a url'
'a error' change into 'an error'
Closes #25756 from dengziming/feature_fix_typos.
Authored-by: dengziming <[email protected]>
Signed-off-by: HyukjinKwon <[email protected]>
(cherry picked from commit 8f632d70455156010f0e87288541304ad2164a52)
Signed-off-by: HyukjinKwon <[email protected]>
---
R/pkg/R/context.R | 4 ++--
core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala | 2 +-
core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala | 2 +-
core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala | 2 +-
core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala | 2 +-
docs/spark-standalone.md | 2 +-
.../org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala | 2 +-
python/pyspark/context.py | 2 +-
.../sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala | 4 ++--
.../scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala | 4 ++--
sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala | 2 +-
.../src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q | 2 +-
.../main/scala/org/apache/spark/streaming/dstream/InputDStream.scala | 4 ++--
13 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index b49f7c3..f1a6b84 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -297,7 +297,7 @@ broadcastRDD <- function(sc, object) {
#' Set the checkpoint directory
#'
#' Set the directory under which RDDs are going to be checkpointed. The
-#' directory must be a HDFS path if running on a cluster.
+#' directory must be an HDFS path if running on a cluster.
#'
#' @param sc Spark Context to use
#' @param dirName Directory path
@@ -442,7 +442,7 @@ setLogLevel <- function(level) {
#' Set checkpoint directory
#'
#' Set the directory under which SparkDataFrame are going to be checkpointed.
The directory must be
-#' a HDFS path if running on a cluster.
+#' an HDFS path if running on a cluster.
#'
#' @rdname setCheckpointDir
#' @param directory Directory path to checkpoint to
diff --git
a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index 09c8384..09e9910 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -713,7 +713,7 @@ class JavaSparkContext(val sc: SparkContext)
/**
* Set the directory under which RDDs are going to be checkpointed. The
directory must
- * be a HDFS path if running on a cluster.
+ * be an HDFS path if running on a cluster.
*/
def setCheckpointDir(dir: String) {
sc.setCheckpointDir(dir)
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
index 3457a26..657d75c 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -122,7 +122,7 @@ private[spark] class MetricsSystem private (
* If either ID is not available, this defaults to just using <source name>.
*
* @param source Metric source to be named by this method.
- * @return An unique metric name for each combination of
+ * @return A unique metric name for each combination of
* application, executor/driver and metric source.
*/
private[spark] def buildRegistryName(source: Source): String = {
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
index d4a59c3..83cd4f0 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
@@ -27,7 +27,7 @@ import org.apache.spark.util.Utils
/**
* :: DeveloperApi ::
- * This class represent an unique identifier for a BlockManager.
+ * This class represent a unique identifier for a BlockManager.
*
* The first 2 constructors of this class are made private to ensure that
BlockManagerId objects
* can be created only using the apply method in the companion object. This
allows de-duplication
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 887a88f..1b6f765 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -586,7 +586,7 @@ class SparkSubmitSuite
}
// TODO(SPARK-9603): Building a package is flaky on Jenkins Maven builds.
- // See https://gist.github.com/shivaram/3a2fecce60768a603dac for a error log
+ // See https://gist.github.com/shivaram/3a2fecce60768a603dac for an error log
ignore("correctly builds R packages included in a jar with --packages") {
assume(RUtils.isRInstalled, "R isn't installed on this machine.")
assume(RUtils.isSparkRInstalled, "SparkR is not installed in this build.")
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 49ef2e1..1511c66 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -362,7 +362,7 @@ In addition, detailed log output for each job is also
written to the work direct
# Running Alongside Hadoop
-You can run Spark alongside your existing Hadoop cluster by just launching it
as a separate service on the same machines. To access Hadoop data from Spark,
just use a hdfs:// URL (typically `hdfs://<namenode>:9000/path`, but you can
find the right URL on your Hadoop Namenode's web UI). Alternatively, you can
set up a separate cluster for Spark, and still have it access HDFS over the
network; this will be slower than disk-local access, but may not be a concern
if you are still running in [...]
+You can run Spark alongside your existing Hadoop cluster by just launching it
as a separate service on the same machines. To access Hadoop data from Spark,
just use an hdfs:// URL (typically `hdfs://<namenode>:9000/path`, but you can
find the right URL on your Hadoop Namenode's web UI). Alternatively, you can
set up a separate cluster for Spark, and still have it access HDFS over the
network; this will be slower than disk-local access, but may not be a concern
if you are still running in [...]
# Configuring Ports for Network Security
diff --git
a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
index 5fb83b2..11e9495 100644
---
a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
+++
b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
@@ -68,7 +68,7 @@ private[kinesis] class KinesisCheckpointer(
if (checkpointer != null) {
try {
// We must call `checkpoint()` with no parameter to finish reading
shards.
- // See an URL below for details:
+ // See a URL below for details:
// https://forums.aws.amazon.com/thread.jspa?threadID=244218
KinesisRecordProcessor.retryRandom(checkpointer.checkpoint(), 4, 100)
} catch {
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index aff3635..1e0dd10 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -937,7 +937,7 @@ class SparkContext(object):
def setCheckpointDir(self, dirName):
"""
Set the directory under which RDDs are going to be checkpointed. The
- directory must be a HDFS path if running on a cluster.
+ directory must be an HDFS path if running on a cluster.
"""
self._jsc.sc().setCheckpointDir(dirName)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index 92a2480..a63b05a 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -43,7 +43,7 @@ import org.apache.spark.util.{SizeEstimator, Utils}
/**
* An implementation of [[StateStoreProvider]] and [[StateStore]] in which all
the data is backed
- * by files in a HDFS-compatible file system. All updates to the store has to
be done in sets
+ * by files in an HDFS-compatible file system. All updates to the store has to
be done in sets
* transactionally, and each set of updates increments the store's version.
These versions can
* be used to re-execute the updates (by retries in RDD operations) on the
correct version of
* the store, and regenerate the store version.
@@ -79,7 +79,7 @@ private[state] class HDFSBackedStateStoreProvider extends
StateStoreProvider wit
// java.util.ConcurrentModificationException
type MapType = java.util.concurrent.ConcurrentHashMap[UnsafeRow, UnsafeRow]
- /** Implementation of [[StateStore]] API which is backed by a
HDFS-compatible file system */
+ /** Implementation of [[StateStore]] API which is backed by an
HDFS-compatible file system */
class HDFSBackedStateStore(val version: Long, mapToUpdate: MapType)
extends StateStore {
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
index 6aa82b8..ace87b0 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
@@ -79,7 +79,7 @@ object StreamingQueryListener {
/**
* Event representing the start of a query
- * @param id An unique query id that persists across restarts. See
`StreamingQuery.id()`.
+ * @param id A unique query id that persists across restarts. See
`StreamingQuery.id()`.
* @param runId A query id that is unique for every start/restart. See
`StreamingQuery.runId()`.
* @param name User-specified name of the query, null if not specified.
* @since 2.1.0
@@ -101,7 +101,7 @@ object StreamingQueryListener {
/**
* Event representing that termination of a query.
*
- * @param id An unique query id that persists across restarts. See
`StreamingQuery.id()`.
+ * @param id A unique query id that persists across restarts. See
`StreamingQuery.id()`.
* @param runId A query id that is unique for every start/restart. See
`StreamingQuery.runId()`.
* @param exception The exception message of the query if the query was
terminated
* with an exception. Otherwise, it will be `None`.
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index f2173aa..6803f5c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -73,7 +73,7 @@ class StateOperatorProgress private[sql](
* a trigger. Each event relates to processing done for a single trigger of
the streaming
* query. Events are emitted even when no new data is available to be
processed.
*
- * @param id An unique query id that persists across restarts. See
`StreamingQuery.id()`.
+ * @param id A unique query id that persists across restarts. See
`StreamingQuery.id()`.
* @param runId A query id that is unique for every start/restart. See
`StreamingQuery.runId()`.
* @param name User-specified name of the query, null if not specified.
* @param timestamp Beginning time of the trigger in ISO8601 format, i.e. UTC
timestamps.
diff --git
a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
index a75758a..f92cf24 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
@@ -1,4 +1,4 @@
--- HIVE-3300 [jira] LOAD DATA INPATH fails if a hdfs file with same name is
added to table
+-- HIVE-3300 [jira] LOAD DATA INPATH fails if an hdfs file with same name is
added to table
-- 'loader' table is used only for uploading kv1.txt to HDFS (!hdfs -put is
not working on minMRDriver)
create table result (key string, value string);
diff --git
a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
index 931f015..4af4d89 100644
---
a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
+++
b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
@@ -34,7 +34,7 @@ import org.apache.spark.util.Utils
* Input streams that can generate RDDs from new data by running a
service/thread only on
* the driver node (that is, without running a receiver on worker nodes), can
be
* implemented by directly inheriting this InputDStream. For example,
- * FileInputDStream, a subclass of InputDStream, monitors a HDFS directory
from the driver for
+ * FileInputDStream, a subclass of InputDStream, monitors an HDFS directory
from the driver for
* new files and generates RDDs with the new files. For implementing input
streams
* that requires running a receiver on the worker nodes, use
* [[org.apache.spark.streaming.dstream.ReceiverInputDStream]] as the parent
class.
@@ -48,7 +48,7 @@ abstract class InputDStream[T: ClassTag](_ssc:
StreamingContext)
ssc.graph.addInputStream(this)
- /** This is an unique identifier for the input stream. */
+ /** This is a unique identifier for the input stream. */
val id = ssc.getNewInputStreamId()
// Keep track of the freshest rate for this stream using the rateEstimator
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]