Github user rdblue commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21556#discussion_r201755353
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
 ---
    @@ -225,12 +316,44 @@ private[parquet] class ParquetFilters(pushDownDate: 
Boolean, pushDownStartWith:
       def createFilter(schema: MessageType, predicate: sources.Filter): 
Option[FilterPredicate] = {
         val nameToType = getFieldMap(schema)
     
    +    def isDecimalMatched(value: Any, decimalMeta: DecimalMetadata): 
Boolean = value match {
    +      case decimal: JBigDecimal =>
    +        decimal.scale == decimalMeta.getScale
    +      case _ => false
    +    }
    +
    +    // Since SPARK-24716, ParquetFilter accepts parquet file schema to 
convert to
    +    // data source Filter. This must make sure that filter value matched 
the Filter.
    +    // If doesn't matched, then the schema used to read the file is 
incorrect,
    +    // which would cause data corruption.
    +    def valueCanMakeFilterOn(name: String, value: Any): Boolean = {
    +      value == null || (nameToType(name) match {
    +        case ParquetBooleanType => value.isInstanceOf[JBoolean]
    +        case ParquetByteType | ParquetShortType | ParquetIntegerType => 
value.isInstanceOf[Number]
    +        case ParquetLongType => value.isInstanceOf[JLong]
    +        case ParquetFloatType => value.isInstanceOf[JFloat]
    +        case ParquetDoubleType => value.isInstanceOf[JDouble]
    +        case ParquetStringType => value.isInstanceOf[String]
    +        case ParquetBinaryType => value.isInstanceOf[Array[Byte]]
    +        case ParquetDateType => value.isInstanceOf[Date]
    +        case ParquetSchemaType(DECIMAL, INT32, 0, decimalMeta) =>
    --- End diff --
    
    Can the decimal cases be collapsed to a single case on 
`ParquetSchemaType(DECIMAL, _, _, decimalMetadata)`?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to