Github user liancheng commented on a diff in the pull request:
https://github.com/apache/spark/pull/8341#discussion_r37594424
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetAvroCompatibilitySuite.scala
---
@@ -86,47 +235,27 @@ class ParquetAvroCompatibilitySuite extends
ParquetCompatibilityTest with Shared
}
def makeParquetAvroCompat(i: Int): ParquetAvroCompat = {
- def nullable[T <: AnyRef] = makeNullable[T](i) _
-
def makeComplexColumn(i: Int): JMap[String, JList[Nested]] = {
- mapAsJavaMap(Seq.tabulate(3) { n =>
- (i + n).toString -> seqAsJavaList(Seq.tabulate(3) { m =>
+ Seq.tabulate(3) { n =>
+ (i + n).toString -> Seq.tabulate(3) { m =>
Nested
.newBuilder()
- .setNestedIntsColumn(seqAsJavaList(Seq.tabulate(3)(j => i + j
+ m)))
+ .setNestedIntsColumn(Seq.tabulate(3)(j => i + j + m:
Integer).asJava)
.setNestedStringColumn(s"val_${i + m}")
.build()
- })
- }.toMap)
+ }.asJava
+ }.toMap.asJava
}
ParquetAvroCompat
.newBuilder()
- .setBoolColumn(i % 2 == 0)
- .setIntColumn(i)
- .setLongColumn(i.toLong * 10)
- .setFloatColumn(i.toFloat + 0.1f)
- .setDoubleColumn(i.toDouble + 0.2d)
- .setBinaryColumn(ByteBuffer.wrap(s"val_$i".getBytes))
- .setStringColumn(s"val_$i")
-
- .setMaybeBoolColumn(nullable(i % 2 == 0: java.lang.Boolean))
- .setMaybeIntColumn(nullable(i: Integer))
- .setMaybeLongColumn(nullable(i.toLong: java.lang.Long))
- .setMaybeFloatColumn(nullable(i.toFloat + 0.1f: java.lang.Float))
- .setMaybeDoubleColumn(nullable(i.toDouble + 0.2d: java.lang.Double))
- .setMaybeBinaryColumn(nullable(ByteBuffer.wrap(s"val_$i".getBytes)))
- .setMaybeStringColumn(nullable(s"val_$i"))
-
- .setStringsColumn(Seq.tabulate(3)(n => s"arr_${i + n}"))
- .setStringToIntColumn(
- mapAsJavaMap(Seq.tabulate(3)(n => n.toString -> (i + n:
Integer)).toMap))
+ .setStringsColumn(Seq.tabulate(3)(n => s"arr_${i + n}").asJava)
+ .setStringToIntColumn(Seq.tabulate(3)(n => n.toString -> (i + n:
Integer)).toMap.asJava)
.setComplexColumn(makeComplexColumn(i))
-
.build()
}
- test("SPARK-9407 Don't push down predicates involving Parquet ENUM
columns") {
+ test("SPARK-9407 Push down predicates involving Parquet ENUM columns") {
--- End diff --
Ah, it was a typo. So SPARK-9407 was caused by predicates involving Parquet
`ENUM` columns. At first, I tried to fixed it by preventing all those
predicates to be pushed down. But later on I found it's easier to make Parquet
accept such predicates, and that's also what the most recent version of Parquet
does. However, the test case was written at the beginning and I forgot to
change the test name.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]