Github user maropu commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21805#discussion_r204378903
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala ---
    @@ -206,4 +206,20 @@ class DatasetCacheSuite extends QueryTest with 
SharedSQLContext with TimeLimits
         // first time use, load cache
         checkDataset(df5, Row(10))
       }
    +
    +  test("SPARK-24850 InMemoryRelation string representation does not 
include cached plan") {
    +    val dummyQueryExecution = spark.range(0, 1).toDF().queryExecution
    +    val inMemoryRelation = InMemoryRelation(
    +      true,
    +      1000,
    +      StorageLevel.MEMORY_ONLY,
    +      dummyQueryExecution.sparkPlan,
    +      Some("test-relation"),
    +      dummyQueryExecution.logical)
    +
    +    
assert(!inMemoryRelation.simpleString.contains(dummyQueryExecution.sparkPlan.toString))
    +    assert(inMemoryRelation.simpleString ==
    +      s"InMemoryRelation(${inMemoryRelation.output},"
    +      + " StorageLevel(memory, deserialized, 1 replicas))")
    +  }
    --- End diff --
    
    How about just comparing explain results?
    ```
        val df = Seq((1, 2)).toDF("a", "b").cache
        val outputStream = new java.io.ByteArrayOutputStream()
        Console.withOut(outputStream) {
          df.explain(false)
        }
        assert(outputStream.toString.replaceAll("#\\d+", "#x").contains(
          "InMemoryRelation [a#x, b#x], StorageLevel(disk, memory, 
deserialized, 1 replicas)"))
    ```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to