Github user maropu commented on a diff in the pull request:
https://github.com/apache/spark/pull/21805#discussion_r204378903
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala ---
@@ -206,4 +206,20 @@ class DatasetCacheSuite extends QueryTest with
SharedSQLContext with TimeLimits
// first time use, load cache
checkDataset(df5, Row(10))
}
+
+ test("SPARK-24850 InMemoryRelation string representation does not
include cached plan") {
+ val dummyQueryExecution = spark.range(0, 1).toDF().queryExecution
+ val inMemoryRelation = InMemoryRelation(
+ true,
+ 1000,
+ StorageLevel.MEMORY_ONLY,
+ dummyQueryExecution.sparkPlan,
+ Some("test-relation"),
+ dummyQueryExecution.logical)
+
+
assert(!inMemoryRelation.simpleString.contains(dummyQueryExecution.sparkPlan.toString))
+ assert(inMemoryRelation.simpleString ==
+ s"InMemoryRelation(${inMemoryRelation.output},"
+ + " StorageLevel(memory, deserialized, 1 replicas))")
+ }
--- End diff --
How about just comparing explain results?
```
val df = Seq((1, 2)).toDF("a", "b").cache
val outputStream = new java.io.ByteArrayOutputStream()
Console.withOut(outputStream) {
df.explain(false)
}
assert(outputStream.toString.replaceAll("#\\d+", "#x").contains(
"InMemoryRelation [a#x, b#x], StorageLevel(disk, memory,
deserialized, 1 replicas)"))
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]