Repository: spark
Updated Branches:
refs/heads/branch-2.0 c75ec5eaa -> 4009ddafd
[MINOR][CORE] Fix a HadoopRDD log message and remove unused imports in rdd
files.
## What changes were proposed in this pull request?
This PR fixes the following typos in log message and comments of
`HadoopRDD.scala`. Also, this removes unused imports.
```scala
- logWarning("Caching NewHadoopRDDs as deserialized objects usually leads
to undesired" +
+ logWarning("Caching HadoopRDDs as deserialized objects usually leads to
undesired" +
...
- // since its not removed yet
+ // since it's not removed yet
```
## How was this patch tested?
Manual.
Author: Dongjoon Hyun <[email protected]>
Closes #13294 from dongjoon-hyun/minor_rdd_fix_log_message.
(cherry picked from commit d6d3e50719b01005aa0e77349fc9a6ff88fecce3)
Signed-off-by: Andrew Or <[email protected]>
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4009ddaf
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4009ddaf
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4009ddaf
Branch: refs/heads/branch-2.0
Commit: 4009ddafd810f91f699e52d7822c8c959fe7761e
Parents: c75ec5e
Author: Dongjoon Hyun <[email protected]>
Authored: Wed May 25 10:51:33 2016 -0700
Committer: Andrew Or <[email protected]>
Committed: Wed May 25 10:51:41 2016 -0700
----------------------------------------------------------------------
core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala | 2 +-
core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala | 5 ++---
core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala | 1 -
core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala | 2 +-
core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala | 1 -
5 files changed, 4 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/4009ddaf/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala
b/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala
index be0cb17..41832e8 100644
--- a/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala
@@ -17,7 +17,7 @@
package org.apache.spark.rdd
-import org.apache.hadoop.conf.{ Configurable, Configuration }
+import org.apache.hadoop.conf.{Configurable, Configuration}
import org.apache.hadoop.io.Writable
import org.apache.hadoop.mapreduce._
import org.apache.hadoop.mapreduce.task.JobContextImpl
http://git-wip-us.apache.org/repos/asf/spark/blob/4009ddaf/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index b22134a..515fd6f 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -43,7 +43,6 @@ import org.apache.spark._
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.executor.DataReadMethod
import org.apache.spark.internal.Logging
import org.apache.spark.rdd.HadoopRDD.HadoopMapPartitionsWithSplitRDD
import org.apache.spark.scheduler.{HDFSCacheTaskLocation, HostTaskLocation}
@@ -70,7 +69,7 @@ private[spark] class HadoopPartition(rddId: Int, override val
index: Int, s: Inp
val envVars: Map[String, String] = if
(inputSplit.value.isInstanceOf[FileSplit]) {
val is: FileSplit = inputSplit.value.asInstanceOf[FileSplit]
// map_input_file is deprecated in favor of mapreduce_map_input_file but
set both
- // since its not removed yet
+ // since it's not removed yet
Map("map_input_file" -> is.getPath().toString(),
"mapreduce_map_input_file" -> is.getPath().toString())
} else {
@@ -335,7 +334,7 @@ class HadoopRDD[K, V](
override def persist(storageLevel: StorageLevel): this.type = {
if (storageLevel.deserialized) {
- logWarning("Caching NewHadoopRDDs as deserialized objects usually leads
to undesired" +
+ logWarning("Caching HadoopRDDs as deserialized objects usually leads to
undesired" +
" behavior because Hadoop's RecordReader reuses the same Writable
object for all records." +
" Use a map transformation to make copies of the records.")
}
http://git-wip-us.apache.org/repos/asf/spark/blob/4009ddaf/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index ad7c221..189dc7b 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -32,7 +32,6 @@ import org.apache.hadoop.mapreduce.task.{JobContextImpl,
TaskAttemptContextImpl}
import org.apache.spark._
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.executor.DataReadMethod
import org.apache.spark.internal.Logging
import org.apache.spark.rdd.NewHadoopRDD.NewHadoopMapPartitionsWithSplitRDD
import org.apache.spark.storage.StorageLevel
http://git-wip-us.apache.org/repos/asf/spark/blob/4009ddaf/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 3b12448..a714237 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -40,7 +40,7 @@ import org.apache.spark._
import org.apache.spark.Partitioner.defaultPartitioner
import org.apache.spark.annotation.Experimental
import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.executor.{DataWriteMethod, OutputMetrics}
+import org.apache.spark.executor.OutputMetrics
import org.apache.spark.internal.Logging
import org.apache.spark.partial.{BoundedDouble, PartialResult}
import org.apache.spark.serializer.Serializer
http://git-wip-us.apache.org/repos/asf/spark/blob/4009ddaf/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
index 4561685..49625b7 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
@@ -31,7 +31,6 @@ import scala.collection.Map
import scala.collection.mutable.ArrayBuffer
import scala.io.Source
import scala.reflect.ClassTag
-import scala.util.control.NonFatal
import org.apache.spark.{Partition, SparkEnv, TaskContext}
import org.apache.spark.util.Utils
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]