This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 8f632d7 [MINOR][DOCS] Fix few typos in the java docs 8f632d7 is described below commit 8f632d70455156010f0e87288541304ad2164a52 Author: dengziming <dengzim...@growingio.com> AuthorDate: Thu Sep 12 09:30:03 2019 +0900 [MINOR][DOCS] Fix few typos in the java docs JIRA :https://issues.apache.org/jira/browse/SPARK-29050 'a hdfs' change into 'an hdfs' 'an unique' change into 'a unique' 'an url' change into 'a url' 'a error' change into 'an error' Closes #25756 from dengziming/feature_fix_typos. Authored-by: dengziming <dengzim...@growingio.com> Signed-off-by: HyukjinKwon <gurwls...@apache.org> --- R/pkg/R/context.R | 4 ++-- core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala | 2 +- core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala | 2 +- core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala | 2 +- core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala | 2 +- docs/spark-standalone.md | 2 +- .../org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala | 2 +- python/pyspark/context.py | 2 +- .../sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala | 4 ++-- .../scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala | 4 ++-- sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala | 2 +- .../src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q | 2 +- .../main/scala/org/apache/spark/streaming/dstream/InputDStream.scala | 4 ++-- 13 files changed, 17 insertions(+), 17 deletions(-) diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R index 51ae2d2..93ba130 100644 --- a/R/pkg/R/context.R +++ b/R/pkg/R/context.R @@ -301,7 +301,7 @@ broadcastRDD <- function(sc, object) { #' Set the checkpoint directory #' #' Set the directory under which RDDs are going to be checkpointed. The -#' directory must be a HDFS path if running on a cluster. +#' directory must be an HDFS path if running on a cluster. #' #' @param sc Spark Context to use #' @param dirName Directory path @@ -446,7 +446,7 @@ setLogLevel <- function(level) { #' Set checkpoint directory #' #' Set the directory under which SparkDataFrame are going to be checkpointed. The directory must be -#' a HDFS path if running on a cluster. +#' an HDFS path if running on a cluster. #' #' @rdname setCheckpointDir #' @param directory Directory path to checkpoint to diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala index 330c2f6..3485128 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala @@ -609,7 +609,7 @@ class JavaSparkContext(val sc: SparkContext) extends Closeable { /** * Set the directory under which RDDs are going to be checkpointed. The directory must - * be a HDFS path if running on a cluster. + * be an HDFS path if running on a cluster. */ def setCheckpointDir(dir: String) { sc.setCheckpointDir(dir) diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala index c96640a..b552444 100644 --- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala +++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala @@ -124,7 +124,7 @@ private[spark] class MetricsSystem private ( * If either ID is not available, this defaults to just using <source name>. * * @param source Metric source to be named by this method. - * @return An unique metric name for each combination of + * @return A unique metric name for each combination of * application, executor/driver and metric source. */ private[spark] def buildRegistryName(source: Source): String = { diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala index d188bdd..49e32d0 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala @@ -27,7 +27,7 @@ import org.apache.spark.util.Utils /** * :: DeveloperApi :: - * This class represent an unique identifier for a BlockManager. + * This class represent a unique identifier for a BlockManager. * * The first 2 constructors of this class are made private to ensure that BlockManagerId objects * can be created only using the apply method in the companion object. This allows de-duplication diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index 385f549..d44f480 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -600,7 +600,7 @@ class SparkSubmitSuite } // TODO(SPARK-9603): Building a package is flaky on Jenkins Maven builds. - // See https://gist.github.com/shivaram/3a2fecce60768a603dac for a error log + // See https://gist.github.com/shivaram/3a2fecce60768a603dac for an error log ignore("correctly builds R packages included in a jar with --packages") { assume(RUtils.isRInstalled, "R isn't installed on this machine.") assume(RUtils.isSparkRInstalled, "SparkR is not installed in this build.") diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md index 1af0bef..1264951 100644 --- a/docs/spark-standalone.md +++ b/docs/spark-standalone.md @@ -432,7 +432,7 @@ In addition, detailed log output for each job is also written to the work direct # Running Alongside Hadoop -You can run Spark alongside your existing Hadoop cluster by just launching it as a separate service on the same machines. To access Hadoop data from Spark, just use a hdfs:// URL (typically `hdfs://<namenode>:9000/path`, but you can find the right URL on your Hadoop Namenode's web UI). Alternatively, you can set up a separate cluster for Spark, and still have it access HDFS over the network; this will be slower than disk-local access, but may not be a concern if you are still running in [...] +You can run Spark alongside your existing Hadoop cluster by just launching it as a separate service on the same machines. To access Hadoop data from Spark, just use an hdfs:// URL (typically `hdfs://<namenode>:9000/path`, but you can find the right URL on your Hadoop Namenode's web UI). Alternatively, you can set up a separate cluster for Spark, and still have it access HDFS over the network; this will be slower than disk-local access, but may not be a concern if you are still running in [...] # Configuring Ports for Network Security diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala index 5fb83b2..11e9495 100644 --- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala +++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala @@ -68,7 +68,7 @@ private[kinesis] class KinesisCheckpointer( if (checkpointer != null) { try { // We must call `checkpoint()` with no parameter to finish reading shards. - // See an URL below for details: + // See a URL below for details: // https://forums.aws.amazon.com/thread.jspa?threadID=244218 KinesisRecordProcessor.retryRandom(checkpointer.checkpoint(), 4, 100) } catch { diff --git a/python/pyspark/context.py b/python/pyspark/context.py index d689d1d..4d140f9 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -950,7 +950,7 @@ class SparkContext(object): def setCheckpointDir(self, dirName): """ Set the directory under which RDDs are going to be checkpointed. The - directory must be a HDFS path if running on a cluster. + directory must be an HDFS path if running on a cluster. """ self._jsc.sc().setCheckpointDir(dirName) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala index 6ee54b9..05c651f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala @@ -43,7 +43,7 @@ import org.apache.spark.util.{SizeEstimator, Utils} /** * An implementation of [[StateStoreProvider]] and [[StateStore]] in which all the data is backed - * by files in a HDFS-compatible file system. All updates to the store has to be done in sets + * by files in an HDFS-compatible file system. All updates to the store has to be done in sets * transactionally, and each set of updates increments the store's version. These versions can * be used to re-execute the updates (by retries in RDD operations) on the correct version of * the store, and regenerate the store version. @@ -79,7 +79,7 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit // java.util.ConcurrentModificationException type MapType = java.util.concurrent.ConcurrentHashMap[UnsafeRow, UnsafeRow] - /** Implementation of [[StateStore]] API which is backed by a HDFS-compatible file system */ + /** Implementation of [[StateStore]] API which is backed by an HDFS-compatible file system */ class HDFSBackedStateStore(val version: Long, mapToUpdate: MapType) extends StateStore { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala index 916d6a0..cc81cf6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala @@ -79,7 +79,7 @@ object StreamingQueryListener { /** * Event representing the start of a query - * @param id An unique query id that persists across restarts. See `StreamingQuery.id()`. + * @param id A unique query id that persists across restarts. See `StreamingQuery.id()`. * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`. * @param name User-specified name of the query, null if not specified. * @since 2.1.0 @@ -101,7 +101,7 @@ object StreamingQueryListener { /** * Event representing that termination of a query. * - * @param id An unique query id that persists across restarts. See `StreamingQuery.id()`. + * @param id A unique query id that persists across restarts. See `StreamingQuery.id()`. * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`. * @param exception The exception message of the query if the query was terminated * with an exception. Otherwise, it will be `None`. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala index 0b3945c..e2fea8c9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala @@ -74,7 +74,7 @@ class StateOperatorProgress private[sql]( * a trigger. Each event relates to processing done for a single trigger of the streaming * query. Events are emitted even when no new data is available to be processed. * - * @param id An unique query id that persists across restarts. See `StreamingQuery.id()`. + * @param id A unique query id that persists across restarts. See `StreamingQuery.id()`. * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`. * @param name User-specified name of the query, null if not specified. * @param timestamp Beginning time of the trigger in ISO8601 format, i.e. UTC timestamps. diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q index a75758a..f92cf24 100644 --- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q +++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q @@ -1,4 +1,4 @@ --- HIVE-3300 [jira] LOAD DATA INPATH fails if a hdfs file with same name is added to table +-- HIVE-3300 [jira] LOAD DATA INPATH fails if an hdfs file with same name is added to table -- 'loader' table is used only for uploading kv1.txt to HDFS (!hdfs -put is not working on minMRDriver) create table result (key string, value string); diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala index 6495c91..5a75b77 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala @@ -34,7 +34,7 @@ import org.apache.spark.util.Utils * Input streams that can generate RDDs from new data by running a service/thread only on * the driver node (that is, without running a receiver on worker nodes), can be * implemented by directly inheriting this InputDStream. For example, - * FileInputDStream, a subclass of InputDStream, monitors a HDFS directory from the driver for + * FileInputDStream, a subclass of InputDStream, monitors an HDFS directory from the driver for * new files and generates RDDs with the new files. For implementing input streams * that requires running a receiver on the worker nodes, use * [[org.apache.spark.streaming.dstream.ReceiverInputDStream]] as the parent class. @@ -48,7 +48,7 @@ abstract class InputDStream[T: ClassTag](_ssc: StreamingContext) ssc.graph.addInputStream(this) - /** This is an unique identifier for the input stream. */ + /** This is a unique identifier for the input stream. */ val id = ssc.getNewInputStreamId() // Keep track of the freshest rate for this stream using the rateEstimator --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org