Github user rdblue commented on a diff in the pull request:
https://github.com/apache/spark/pull/21696#discussion_r199980897
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
---
@@ -19,166 +19,186 @@ package
org.apache.spark.sql.execution.datasources.parquet
import java.sql.Date
+import scala.collection.JavaConverters._
+
import org.apache.parquet.filter2.predicate._
import org.apache.parquet.filter2.predicate.FilterApi._
import org.apache.parquet.io.api.Binary
-import org.apache.parquet.schema.PrimitiveComparator
+import org.apache.parquet.schema._
+import org.apache.parquet.schema.OriginalType._
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.catalyst.util.DateTimeUtils.SQLDate
import org.apache.spark.sql.sources
-import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
/**
* Some utility function to convert Spark data source filters to Parquet
filters.
*/
private[parquet] class ParquetFilters(pushDownDate: Boolean,
pushDownStartWith: Boolean) {
+ case class ParquetSchemaType(
+ originalType: OriginalType,
+ primitiveTypeName: PrimitiveType.PrimitiveTypeName,
+ decimalMetadata: DecimalMetadata)
+
private def dateToDays(date: Date): SQLDate = {
DateTimeUtils.fromJavaDate(date)
}
- private val makeEq: PartialFunction[DataType, (String, Any) =>
FilterPredicate] = {
- case BooleanType =>
+ private val makeEq: PartialFunction[ParquetSchemaType, (String, Any) =>
FilterPredicate] = {
+ // BooleanType
--- End diff --
The other partial functions don't have these comments. Is that on purpose?
Maybe these should be constants instead to make the code more readable and
consistent?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]