Github user marmbrus commented on a diff in the pull request:

    https://github.com/apache/spark/pull/7216#discussion_r34184828
  
    --- Diff: sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala 
---
    @@ -99,6 +104,56 @@ private[hive] object HiveShim {
         }
       }
     
    +  def toMetastoreFilter(
    +      predicates: Seq[Expression],
    +      partitionKeys: List[FieldSchema],
    +      hiveMetastoreVersion: String): Option[String] = {
    +
    +    // Binary comparison has been supported in getPartitionsByFilter() 
since Hive 0.13.
    +    // So if Hive matastore version is older than 0.13, predicates cannot 
be pushed down.
    +    // See HIVE-4888.
    +    val versionPattern = "([\\d]+\\.[\\d]+).*".r
    +    hiveMetastoreVersion match {
    +      case versionPattern(version) if (version.toDouble < 0.13) => return 
None
    +      case _ => // continue
    +    }
    +
    +    // hive varchar is treated as catalyst string, but hive varchar can't 
be pushed down.
    +    val varcharKeys = partitionKeys
    +      .filter(col => 
col.getType.startsWith(serdeConstants.VARCHAR_TYPE_NAME))
    +      .map(col => col.getName).toSet
    +
    +    // Hive getPartitionsByFilter() takes a string that represents 
partition
    +    // predicates like "str_key=\"value\" and int_key=1 ..."
    +    Option(predicates.foldLeft("") {
    +      (prevStr, expr) => {
    +        expr match {
    +          case op @ BinaryComparison(lhs, rhs) => {
    +            val curr: Option[String] =
    +              lhs match {
    +                case AttributeReference(_, _, _, _) => {
    +                  rhs.dataType match {
    +                    case _: IntegralType =>
    +                      Some(lhs.prettyString + op.symbol + rhs.prettyString)
    +                    case _: StringType if 
(!varcharKeys.contains(lhs.prettyString)) =>
    +                      Some(lhs.prettyString + op.symbol + "\"" + 
rhs.prettyString + "\"")
    +                    case _ => None
    +                  }
    +                }
    +                case _ => None
    +              }
    +            curr match {
    +              case Some(currStr) if (prevStr.nonEmpty) => s"$prevStr and 
$currStr"
    +              case Some(currStr) if (prevStr.isEmpty) => currStr
    +              case None => prevStr
    --- End diff --
    
    Is some of this logic an elaborate way to do 
`listOfStringPredicates.mkString(" and ")`?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to