Github user viirya commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22313#discussion_r214556040
  
    --- Diff: 
sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala ---
    @@ -55,19 +59,52 @@ import org.apache.spark.sql.types._
      * known to be convertible.
      */
     private[orc] object OrcFilters extends Logging {
    +  case class FilterWithTypeMap(filter: Filter, typeMap: Map[String, 
DataType])
    +
    +  private lazy val cacheExpireTimeout =
    +    
org.apache.spark.sql.execution.datasources.orc.OrcFilters.cacheExpireTimeout
    +
    +  private lazy val searchArgumentCache = CacheBuilder.newBuilder()
    +    .expireAfterAccess(cacheExpireTimeout, TimeUnit.SECONDS)
    +    .build(
    +      new CacheLoader[FilterWithTypeMap, Option[Builder]]() {
    +        override def load(typeMapAndFilter: FilterWithTypeMap): 
Option[Builder] = {
    +          buildSearchArgument(
    +            typeMapAndFilter.typeMap, typeMapAndFilter.filter, 
SearchArgumentFactory.newBuilder())
    +        }
    +      })
    +
    +  private def getOrBuildSearchArgumentWithNewBuilder(
    +      dataTypeMap: Map[String, DataType],
    +      expression: Filter): Option[Builder] = {
    +    // When `spark.sql.orc.cache.sarg.timeout` is 0, cache is disabled.
    +    if (cacheExpireTimeout > 0) {
    +      searchArgumentCache.get(FilterWithTypeMap(expression, dataTypeMap))
    +    } else {
    +      buildSearchArgument(dataTypeMap, expression, 
SearchArgumentFactory.newBuilder())
    --- End diff --
    
    When we set timeout to zero on the cache, the loaded element can be removed 
immediately. Maybe we don't need to check timeout like this and we can simplify 
the code.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to