This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push: new ecb2052 [MINOR][DOCS] Fix few typos in the java docs ecb2052 is described below commit ecb2052bf0cf7dea749cca10d864f7383eeb1224 Author: dengziming <dengzim...@growingio.com> AuthorDate: Thu Sep 12 09:30:03 2019 +0900 [MINOR][DOCS] Fix few typos in the java docs JIRA :https://issues.apache.org/jira/browse/SPARK-29050 'a hdfs' change into 'an hdfs' 'an unique' change into 'a unique' 'an url' change into 'a url' 'a error' change into 'an error' Closes #25756 from dengziming/feature_fix_typos. Authored-by: dengziming <dengzim...@growingio.com> Signed-off-by: HyukjinKwon <gurwls...@apache.org> (cherry picked from commit 8f632d70455156010f0e87288541304ad2164a52) Signed-off-by: HyukjinKwon <gurwls...@apache.org> --- R/pkg/R/context.R | 4 ++-- core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala | 2 +- core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala | 2 +- core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala | 2 +- core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala | 2 +- docs/spark-standalone.md | 2 +- .../org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala | 2 +- python/pyspark/context.py | 2 +- .../sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala | 4 ++-- .../scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala | 4 ++-- sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala | 2 +- .../src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q | 2 +- .../main/scala/org/apache/spark/streaming/dstream/InputDStream.scala | 4 ++-- 13 files changed, 17 insertions(+), 17 deletions(-) diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R index b49f7c3..f1a6b84 100644 --- a/R/pkg/R/context.R +++ b/R/pkg/R/context.R @@ -297,7 +297,7 @@ broadcastRDD <- function(sc, object) { #' Set the checkpoint directory #' #' Set the directory under which RDDs are going to be checkpointed. The -#' directory must be a HDFS path if running on a cluster. +#' directory must be an HDFS path if running on a cluster. #' #' @param sc Spark Context to use #' @param dirName Directory path @@ -442,7 +442,7 @@ setLogLevel <- function(level) { #' Set checkpoint directory #' #' Set the directory under which SparkDataFrame are going to be checkpointed. The directory must be -#' a HDFS path if running on a cluster. +#' an HDFS path if running on a cluster. #' #' @rdname setCheckpointDir #' @param directory Directory path to checkpoint to diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala index 09c8384..09e9910 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala @@ -713,7 +713,7 @@ class JavaSparkContext(val sc: SparkContext) /** * Set the directory under which RDDs are going to be checkpointed. The directory must - * be a HDFS path if running on a cluster. + * be an HDFS path if running on a cluster. */ def setCheckpointDir(dir: String) { sc.setCheckpointDir(dir) diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala index 3457a26..657d75c 100644 --- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala +++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala @@ -122,7 +122,7 @@ private[spark] class MetricsSystem private ( * If either ID is not available, this defaults to just using <source name>. * * @param source Metric source to be named by this method. - * @return An unique metric name for each combination of + * @return A unique metric name for each combination of * application, executor/driver and metric source. */ private[spark] def buildRegistryName(source: Source): String = { diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala index d4a59c3..83cd4f0 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala @@ -27,7 +27,7 @@ import org.apache.spark.util.Utils /** * :: DeveloperApi :: - * This class represent an unique identifier for a BlockManager. + * This class represent a unique identifier for a BlockManager. * * The first 2 constructors of this class are made private to ensure that BlockManagerId objects * can be created only using the apply method in the companion object. This allows de-duplication diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index 887a88f..1b6f765 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -586,7 +586,7 @@ class SparkSubmitSuite } // TODO(SPARK-9603): Building a package is flaky on Jenkins Maven builds. - // See https://gist.github.com/shivaram/3a2fecce60768a603dac for a error log + // See https://gist.github.com/shivaram/3a2fecce60768a603dac for an error log ignore("correctly builds R packages included in a jar with --packages") { assume(RUtils.isRInstalled, "R isn't installed on this machine.") assume(RUtils.isSparkRInstalled, "SparkR is not installed in this build.") diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md index 49ef2e1..1511c66 100644 --- a/docs/spark-standalone.md +++ b/docs/spark-standalone.md @@ -362,7 +362,7 @@ In addition, detailed log output for each job is also written to the work direct # Running Alongside Hadoop -You can run Spark alongside your existing Hadoop cluster by just launching it as a separate service on the same machines. To access Hadoop data from Spark, just use a hdfs:// URL (typically `hdfs://<namenode>:9000/path`, but you can find the right URL on your Hadoop Namenode's web UI). Alternatively, you can set up a separate cluster for Spark, and still have it access HDFS over the network; this will be slower than disk-local access, but may not be a concern if you are still running in [...] +You can run Spark alongside your existing Hadoop cluster by just launching it as a separate service on the same machines. To access Hadoop data from Spark, just use an hdfs:// URL (typically `hdfs://<namenode>:9000/path`, but you can find the right URL on your Hadoop Namenode's web UI). Alternatively, you can set up a separate cluster for Spark, and still have it access HDFS over the network; this will be slower than disk-local access, but may not be a concern if you are still running in [...] # Configuring Ports for Network Security diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala index 5fb83b2..11e9495 100644 --- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala +++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala @@ -68,7 +68,7 @@ private[kinesis] class KinesisCheckpointer( if (checkpointer != null) { try { // We must call `checkpoint()` with no parameter to finish reading shards. - // See an URL below for details: + // See a URL below for details: // https://forums.aws.amazon.com/thread.jspa?threadID=244218 KinesisRecordProcessor.retryRandom(checkpointer.checkpoint(), 4, 100) } catch { diff --git a/python/pyspark/context.py b/python/pyspark/context.py index aff3635..1e0dd10 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -937,7 +937,7 @@ class SparkContext(object): def setCheckpointDir(self, dirName): """ Set the directory under which RDDs are going to be checkpointed. The - directory must be a HDFS path if running on a cluster. + directory must be an HDFS path if running on a cluster. """ self._jsc.sc().setCheckpointDir(dirName) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala index 92a2480..a63b05a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala @@ -43,7 +43,7 @@ import org.apache.spark.util.{SizeEstimator, Utils} /** * An implementation of [[StateStoreProvider]] and [[StateStore]] in which all the data is backed - * by files in a HDFS-compatible file system. All updates to the store has to be done in sets + * by files in an HDFS-compatible file system. All updates to the store has to be done in sets * transactionally, and each set of updates increments the store's version. These versions can * be used to re-execute the updates (by retries in RDD operations) on the correct version of * the store, and regenerate the store version. @@ -79,7 +79,7 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit // java.util.ConcurrentModificationException type MapType = java.util.concurrent.ConcurrentHashMap[UnsafeRow, UnsafeRow] - /** Implementation of [[StateStore]] API which is backed by a HDFS-compatible file system */ + /** Implementation of [[StateStore]] API which is backed by an HDFS-compatible file system */ class HDFSBackedStateStore(val version: Long, mapToUpdate: MapType) extends StateStore { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala index 6aa82b8..ace87b0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala @@ -79,7 +79,7 @@ object StreamingQueryListener { /** * Event representing the start of a query - * @param id An unique query id that persists across restarts. See `StreamingQuery.id()`. + * @param id A unique query id that persists across restarts. See `StreamingQuery.id()`. * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`. * @param name User-specified name of the query, null if not specified. * @since 2.1.0 @@ -101,7 +101,7 @@ object StreamingQueryListener { /** * Event representing that termination of a query. * - * @param id An unique query id that persists across restarts. See `StreamingQuery.id()`. + * @param id A unique query id that persists across restarts. See `StreamingQuery.id()`. * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`. * @param exception The exception message of the query if the query was terminated * with an exception. Otherwise, it will be `None`. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala index f2173aa..6803f5c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala @@ -73,7 +73,7 @@ class StateOperatorProgress private[sql]( * a trigger. Each event relates to processing done for a single trigger of the streaming * query. Events are emitted even when no new data is available to be processed. * - * @param id An unique query id that persists across restarts. See `StreamingQuery.id()`. + * @param id A unique query id that persists across restarts. See `StreamingQuery.id()`. * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`. * @param name User-specified name of the query, null if not specified. * @param timestamp Beginning time of the trigger in ISO8601 format, i.e. UTC timestamps. diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q index a75758a..f92cf24 100644 --- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q +++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q @@ -1,4 +1,4 @@ --- HIVE-3300 [jira] LOAD DATA INPATH fails if a hdfs file with same name is added to table +-- HIVE-3300 [jira] LOAD DATA INPATH fails if an hdfs file with same name is added to table -- 'loader' table is used only for uploading kv1.txt to HDFS (!hdfs -put is not working on minMRDriver) create table result (key string, value string); diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala index 931f015..4af4d89 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala @@ -34,7 +34,7 @@ import org.apache.spark.util.Utils * Input streams that can generate RDDs from new data by running a service/thread only on * the driver node (that is, without running a receiver on worker nodes), can be * implemented by directly inheriting this InputDStream. For example, - * FileInputDStream, a subclass of InputDStream, monitors a HDFS directory from the driver for + * FileInputDStream, a subclass of InputDStream, monitors an HDFS directory from the driver for * new files and generates RDDs with the new files. For implementing input streams * that requires running a receiver on the worker nodes, use * [[org.apache.spark.streaming.dstream.ReceiverInputDStream]] as the parent class. @@ -48,7 +48,7 @@ abstract class InputDStream[T: ClassTag](_ssc: StreamingContext) ssc.graph.addInputStream(this) - /** This is an unique identifier for the input stream. */ + /** This is a unique identifier for the input stream. */ val id = ssc.getNewInputStreamId() // Keep track of the freshest rate for this stream using the rateEstimator --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org