Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/19563#discussion_r150224414
--- Diff:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
---
@@ -639,6 +639,53 @@ class HashExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
assert(hiveHashPlan(wideRow).getInt(0) == hiveHashEval)
}
+ test("SPARK-22284: Compute hash for nested structs") {
+ val M = 80
+ val N = 10
+ val L = M * N
+ val O = 50
+ val seed = 42
+
+ val wideRow1 = new GenericInternalRow(Seq.tabulate(O)(j =>
+ new GenericInternalRow(Seq.tabulate(L)(i =>
+ new GenericInternalRow(Array[Any](
+ UTF8String.fromString((j * L + i).toString))))
+ .toArray[Any])).toArray[Any])
+ val inner1 = new StructType(
+ (0 until L).map(_ => StructField("structOfString",
structOfString)).toArray)
+ val schema1 = new StructType(
+ (0 until O).map(_ => StructField("structOfStructOfStrings",
inner1)).toArray)
+ val exprs1 = schema1.fields.zipWithIndex.map { case (f, i) =>
+ BoundReference(i, f.dataType, true)
+ }
+ val murmur3HashExpr1 = Murmur3Hash(exprs1, seed)
+ val murmur3HashPlan1 =
GenerateMutableProjection.generate(Seq(murmur3HashExpr1))
+
+ val murmursHashEval1 = Murmur3Hash(exprs1, seed).eval(wideRow1)
+ assert(murmur3HashPlan1(wideRow1).getInt(0) == murmursHashEval1)
+
+ val wideRow2 = new GenericInternalRow(Seq.tabulate(O)(k =>
--- End diff --
I think this case totally covers the previous case, can we just keep this
and remove `wideRow1`?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]