[GitHub] spark pull request: [SPARK-4092] [CORE] Fix InputMetrics for coale...

kayousterhout Thu, 06 Nov 2014 13:57:56 -0800

Github user kayousterhout commented on a diff in the pull request:

    https://github.com/apache/spark/pull/3120#discussion_r19978898
  
    --- Diff: 
core/src/test/scala/org/apache/spark/metrics/InputMetricsSuite.scala ---
    @@ -27,50 +30,92 @@ import scala.collection.mutable.ArrayBuffer
     import java.io.{FileWriter, PrintWriter, File}
     
     class InputMetricsSuite extends FunSuite with SharedSparkContext {
    -  test("input metrics when reading text file with single split") {
    -    val file = new File(getClass.getSimpleName + ".txt")
    -    val pw = new PrintWriter(new FileWriter(file))
    -    pw.println("some stuff")
    -    pw.println("some other stuff")
    -    pw.println("yet more stuff")
    -    pw.println("too much stuff")
    +
    +  @transient var tmpDir: File = _
    +  @transient var tmpFile: File = _
    +  @transient var tmpFilePath: String = _
    +
    +  override def beforeAll() {
    +    super.beforeAll()
    +
    +    tmpDir = Utils.createTempDir()
    +    val testTempDir = new File(tmpDir, "test")
    +    testTempDir.mkdir()
    +
    +    tmpFile = new File(testTempDir, getClass.getSimpleName + ".txt")
    +    val pw = new PrintWriter(new FileWriter(tmpFile))
    +    for (x <- 1 to 1000000) {
    +      pw.println("s")
    +    }
         pw.close()
    -    file.deleteOnExit()
     
    -    val taskBytesRead = new ArrayBuffer[Long]()
    -    sc.addSparkListener(new SparkListener() {
    -      override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
    -        taskBytesRead += taskEnd.taskMetrics.inputMetrics.get.bytesRead
    -      }
    -    })
    -    sc.textFile("file://" + file.getAbsolutePath, 2).count()
    +    // Path to tmpFile
    +    tmpFilePath = "file://" + tmpFile.getAbsolutePath
    +  }
     
    -    // Wait for task end events to come in
    -    sc.listenerBus.waitUntilEmpty(500)
    -    assert(taskBytesRead.length == 2)
    -    assert(taskBytesRead.sum >= file.length())
    +  override def afterAll() {
    +    super.afterAll()
    +    Utils.deleteRecursively(tmpDir)
       }
     
    -  test("input metrics when reading text file with multiple splits") {
    -    val file = new File(getClass.getSimpleName + ".txt")
    -    val pw = new PrintWriter(new FileWriter(file))
    -    for (i <- 0 until 10000) {
    -      pw.println("some stuff")
    +  test("input metrics for old hadoop with coalesce") {
    +    val bytesRead = runAndReturnBytesRead {
    +      sc.textFile(tmpFilePath, 4).count()
         }
    -    pw.close()
    -    file.deleteOnExit()
    +    val bytesRead2 = runAndReturnBytesRead {
    +      sc.textFile(tmpFilePath, 4).coalesce(2).count()
    +    }
    +    assert(bytesRead2 == bytesRead)
    +    assert(bytesRead2 >= tmpFile.length())
    +  }
    +
    +  test("input metrics with cache and coalesce") {
    +    // prime the cache manager
    +    val rdd = sc.textFile(tmpFilePath, 4).cache()
    +    rdd.collect()
    +
    +    val bytesRead = runAndReturnBytesRead {
    +      rdd.count()
    +    }
    +    val bytesRead2 = runAndReturnBytesRead {
    +      rdd.coalesce(4).count()
    +    }
    +
    +    // for count and coelesce, the same bytes should be read.
    +    assert(bytesRead2 >= bytesRead2)
    --- End diff --
    
    Is one of these supposed to be bytesRead?



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request: [SPARK-4092] [CORE] Fix InputMetrics for coale...

Reply via email to