Github user wangyum commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21556#discussion_r201928990
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
 ---
    @@ -248,29 +371,29 @@ private[parquet] class ParquetFilters(pushDownDate: 
Boolean, pushDownStartWith:
         // Probably I missed something and obviously this should be changed.
     
         predicate match {
    -      case sources.IsNull(name) if canMakeFilterOn(name) =>
    +      case sources.IsNull(name) if canMakeFilterOn(name, null) =>
             makeEq.lift(nameToType(name)).map(_(name, null))
    -      case sources.IsNotNull(name) if canMakeFilterOn(name) =>
    +      case sources.IsNotNull(name) if canMakeFilterOn(name, null) =>
             makeNotEq.lift(nameToType(name)).map(_(name, null))
     
    -      case sources.EqualTo(name, value) if canMakeFilterOn(name) =>
    +      case sources.EqualTo(name, value) if canMakeFilterOn(name, value) =>
             makeEq.lift(nameToType(name)).map(_(name, value))
    -      case sources.Not(sources.EqualTo(name, value)) if 
canMakeFilterOn(name) =>
    +      case sources.Not(sources.EqualTo(name, value)) if 
canMakeFilterOn(name, value) =>
             makeNotEq.lift(nameToType(name)).map(_(name, value))
     
    -      case sources.EqualNullSafe(name, value) if canMakeFilterOn(name) =>
    +      case sources.EqualNullSafe(name, value) if canMakeFilterOn(name, 
value) =>
             makeEq.lift(nameToType(name)).map(_(name, value))
    -      case sources.Not(sources.EqualNullSafe(name, value)) if 
canMakeFilterOn(name) =>
    +      case sources.Not(sources.EqualNullSafe(name, value)) if 
canMakeFilterOn(name, value) =>
             makeNotEq.lift(nameToType(name)).map(_(name, value))
    --- End diff --
    
    I handled null values at `valueCanMakeFilterOn`:
    ```scala
    def valueCanMakeFilterOn(name: String, value: Any): Boolean = {
      value == null || (nameToType(name) match {
        case ParquetBooleanType => value.isInstanceOf[JBoolean]
        case ParquetByteType | ParquetShortType | ParquetIntegerType => 
value.isInstanceOf[Number]
        case ParquetLongType => value.isInstanceOf[JLong]
        case ParquetFloatType => value.isInstanceOf[JFloat]
        case ParquetDoubleType => value.isInstanceOf[JDouble]
        case ParquetStringType => value.isInstanceOf[String]
        case ParquetBinaryType => value.isInstanceOf[Array[Byte]]
        case ParquetDateType => value.isInstanceOf[Date]
        case ParquetSchemaType(DECIMAL, INT32, _, decimalMeta) =>
          isDecimalMatched(value, decimalMeta)
        case ParquetSchemaType(DECIMAL, INT64, _, decimalMeta) =>
          isDecimalMatched(value, decimalMeta)
        case ParquetSchemaType(DECIMAL, FIXED_LEN_BYTE_ARRAY, _, decimalMeta) =>
          isDecimalMatched(value, decimalMeta)
        case _ => false
      })
    }
    ```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to