[11/23] hive git commit: HIVE-17669: Cache to optimize SearchArgument deserialization (Mithun Radhakrishnan reviewed by Prasanth Jayachandran)
HIVE-17669: Cache to optimize SearchArgument deserialization (Mithun Radhakrishnan reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f1050a6e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f1050a6e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f1050a6e Branch: refs/heads/hive-14535 Commit: f1050a6e8462864a3d8c9696943cf5b4d12d9515 Parents: 63bda8c Author: Prasanth JayachandranAuthored: Wed Oct 11 14:29:57 2017 -0700 Committer: Prasanth Jayachandran Committed: Wed Oct 11 14:29:57 2017 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 3 + .../hive/ql/io/sarg/ConvertAstToSearchArg.java | 59 ++-- 2 files changed, 57 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/f1050a6e/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 942629b..9084785 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1393,6 +1393,9 @@ public class HiveConf extends Configuration { "references for the cached object. Setting this to true can help avoid out of memory\n" + "issues under memory pressure (in some cases) at the cost of slight unpredictability in\n" + "overall query performance."), +HIVE_IO_SARG_CACHE_MAX_WEIGHT_MB("hive.io.sarg.cache.max.weight.mb", 10, +"The max weight allowed for the SearchArgument Cache. By default, the cache allows a max-weight of 10MB, " + +"after which entries will be evicted."), HIVE_LAZYSIMPLE_EXTENDED_BOOLEAN_LITERAL("hive.lazysimple.extended_boolean_literal", false, "LazySimpleSerde uses this property to determine if it treats 'T', 't', 'F', 'f',\n" + http://git-wip-us.apache.org/repos/asf/hive/blob/f1050a6e/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java index 997334b..b9bf801 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java @@ -21,13 +21,16 @@ package org.apache.hadoop.hive.ql.io.sarg; import java.sql.Date; import java.sql.Timestamp; import java.util.List; +import java.util.concurrent.ExecutionException; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; -import org.apache.hadoop.hive.ql.io.sarg.LiteralDelegate; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -436,14 +439,60 @@ public class ConvertAstToSearchArg { } } - public static final String SARG_PUSHDOWN = "sarg.pushdown"; + private static volatile Cache sargsCache = null; + + private static synchronized Cache initializeAndGetSargsCache(Configuration conf) { +if (sargsCache == null) { + sargsCache = CacheBuilder.newBuilder() +.weigher((String key, SearchArgument value) -> key.length()) +.maximumWeight( +HiveConf.getIntVar(conf, + HiveConf.ConfVars.HIVE_IO_SARG_CACHE_MAX_WEIGHT_MB) * 1024 *1024 +) +.build(); // Can't use CacheLoader because SearchArguments may be built either from Kryo strings, + // or from expressions. +} +return sargsCache; + } + + private static Cache getSargsCache(Configuration conf) { +return sargsCache == null? initializeAndGetSargsCache(conf) : sargsCache; + } + + private static boolean isSargsCacheEnabled(Configuration conf) { +return HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_IO_SARG_CACHE_MAX_WEIGHT_MB) > 0; + } + + private static SearchArgument getSearchArgumentFromString(Configuration conf, String sargString) { + +try { + return isSargsCacheEnabled(conf)? getSargsCache(conf).get(sargString, ()
hive git commit: HIVE-17669: Cache to optimize SearchArgument deserialization (Mithun Radhakrishnan reviewed by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/branch-2.3 0c56cf696 -> 92d3070f2 HIVE-17669: Cache to optimize SearchArgument deserialization (Mithun Radhakrishnan reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/92d3070f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/92d3070f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/92d3070f Branch: refs/heads/branch-2.3 Commit: 92d3070f20eb7361aced6539e59a2d9fe90f8609 Parents: 0c56cf6 Author: Mithun RadhakrishnanAuthored: Thu Oct 12 10:41:38 2017 -0700 Committer: Mithun RK Committed: Thu Oct 12 10:49:55 2017 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 3 + .../hive/ql/io/sarg/ConvertAstToSearchArg.java | 76 ++-- 2 files changed, 74 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/92d3070f/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 073c087..41d12ce 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1371,6 +1371,9 @@ public class HiveConf extends Configuration { "references for the cached object. Setting this to true can help avoid out of memory\n" + "issues under memory pressure (in some cases) at the cost of slight unpredictability in\n" + "overall query performance."), +HIVE_IO_SARG_CACHE_MAX_WEIGHT_MB("hive.io.sarg.cache.max.weight.mb", 10, +"The max weight allowed for the SearchArgument Cache. By default, the cache allows a max-weight of 10MB, " + +"after which entries will be evicted."), HIVE_LAZYSIMPLE_EXTENDED_BOOLEAN_LITERAL("hive.lazysimple.extended_boolean_literal", false, "LazySimpleSerde uses this property to determine if it treats 'T', 't', 'F', 'f',\n" + http://git-wip-us.apache.org/repos/asf/hive/blob/92d3070f/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java index 997334b..2de8319 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java @@ -21,13 +21,18 @@ package org.apache.hadoop.hive.ql.io.sarg; import java.sql.Date; import java.sql.Timestamp; import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.Weigher; import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; -import org.apache.hadoop.hive.ql.io.sarg.LiteralDelegate; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -436,14 +441,75 @@ public class ConvertAstToSearchArg { } } - public static final String SARG_PUSHDOWN = "sarg.pushdown"; + private static volatile Cache sargsCache = null; + + private static synchronized Cache initializeAndGetSargsCache(Configuration conf) { +if (sargsCache == null) { + sargsCache = CacheBuilder.newBuilder() +.weigher(new Weigher () { + @Override + public int weigh(String key, SearchArgument value) { +return key.length(); + } +}) +.maximumWeight( +HiveConf.getIntVar(conf, + HiveConf.ConfVars.HIVE_IO_SARG_CACHE_MAX_WEIGHT_MB) * 1024 *1024 +) +.build(); // Can't use CacheLoader because SearchArguments may be built either from Kryo strings, + // or from expressions. +} +return sargsCache; + } + + private static Cache getSargsCache(Configuration conf) { +return sargsCache == null? initializeAndGetSargsCache(conf) : sargsCache; + } + + private static boolean isSargsCacheEnabled(Configuration conf) { +
[2/2] hive git commit: HIVE-17669: Cache to optimize SearchArgument deserialization (Mithun Radhakrishnan reviewed by Prasanth Jayachandran)
HIVE-17669: Cache to optimize SearchArgument deserialization (Mithun Radhakrishnan reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c332103e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c332103e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c332103e Branch: refs/heads/branch-2 Commit: c332103ef3254f15dca1077335191ecd96e65c38 Parents: 58fa65a Author: Mithun RadhakrishnanAuthored: Thu Oct 12 10:41:38 2017 -0700 Committer: Mithun Radhakrishnan Committed: Thu Oct 12 10:41:38 2017 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 3 + .../hive/ql/io/sarg/ConvertAstToSearchArg.java | 76 ++-- 2 files changed, 74 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/c332103e/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 62eabbc..3d3f3b2 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1382,6 +1382,9 @@ public class HiveConf extends Configuration { "references for the cached object. Setting this to true can help avoid out of memory\n" + "issues under memory pressure (in some cases) at the cost of slight unpredictability in\n" + "overall query performance."), +HIVE_IO_SARG_CACHE_MAX_WEIGHT_MB("hive.io.sarg.cache.max.weight.mb", 10, +"The max weight allowed for the SearchArgument Cache. By default, the cache allows a max-weight of 10MB, " + +"after which entries will be evicted."), HIVE_LAZYSIMPLE_EXTENDED_BOOLEAN_LITERAL("hive.lazysimple.extended_boolean_literal", false, "LazySimpleSerde uses this property to determine if it treats 'T', 't', 'F', 'f',\n" + http://git-wip-us.apache.org/repos/asf/hive/blob/c332103e/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java index 997334b..2de8319 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java @@ -21,13 +21,18 @@ package org.apache.hadoop.hive.ql.io.sarg; import java.sql.Date; import java.sql.Timestamp; import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.Weigher; import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; -import org.apache.hadoop.hive.ql.io.sarg.LiteralDelegate; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -436,14 +441,75 @@ public class ConvertAstToSearchArg { } } - public static final String SARG_PUSHDOWN = "sarg.pushdown"; + private static volatile Cache sargsCache = null; + + private static synchronized Cache initializeAndGetSargsCache(Configuration conf) { +if (sargsCache == null) { + sargsCache = CacheBuilder.newBuilder() +.weigher(new Weigher () { + @Override + public int weigh(String key, SearchArgument value) { +return key.length(); + } +}) +.maximumWeight( +HiveConf.getIntVar(conf, + HiveConf.ConfVars.HIVE_IO_SARG_CACHE_MAX_WEIGHT_MB) * 1024 *1024 +) +.build(); // Can't use CacheLoader because SearchArguments may be built either from Kryo strings, + // or from expressions. +} +return sargsCache; + } + + private static Cache getSargsCache(Configuration conf) { +return sargsCache == null? initializeAndGetSargsCache(conf) : sargsCache; + } + + private static boolean isSargsCacheEnabled(Configuration conf) { +return HiveConf.getIntVar(conf,
hive git commit: HIVE-17669: Cache to optimize SearchArgument deserialization (Mithun Radhakrishnan, reviewed by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/branch-2.2 1f16c0721 -> b8cff2d22 HIVE-17669: Cache to optimize SearchArgument deserialization (Mithun Radhakrishnan, reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b8cff2d2 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b8cff2d2 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b8cff2d2 Branch: refs/heads/branch-2.2 Commit: b8cff2d221c2c5e6f59a9452e2a5fbac1ea2a6ba Parents: 1f16c07 Author: Mithun RKAuthored: Tue Oct 3 13:33:48 2017 -0700 Committer: Mithun Radhakrishnan Committed: Wed Oct 11 16:03:07 2017 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 3 + .../hive/ql/io/sarg/ConvertAstToSearchArg.java | 76 ++-- 2 files changed, 74 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/b8cff2d2/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 06efb88..9ee4ca0 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1371,6 +1371,9 @@ public class HiveConf extends Configuration { "references for the cached object. Setting this to true can help avoid out of memory\n" + "issues under memory pressure (in some cases) at the cost of slight unpredictability in\n" + "overall query performance."), +HIVE_IO_SARG_CACHE_MAX_WEIGHT_MB("hive.io.sarg.cache.max.weight.mb", 10, +"The max weight allowed for the SearchArgument Cache. By default, the cache allows a max-weight of 10MB, " + +"after which entries will be evicted."), HIVE_ORC_SKIP_CORRUPT_DATA("hive.exec.orc.skip.corrupt.data", false, "If ORC reader encounters corrupt data, this value will be used to determine\n" + "whether to skip the corrupt data or throw exception. The default behavior is to throw exception."), http://git-wip-us.apache.org/repos/asf/hive/blob/b8cff2d2/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java index a3d3278..f9e4c85 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java @@ -21,13 +21,18 @@ package org.apache.hadoop.hive.ql.io.sarg; import java.sql.Date; import java.sql.Timestamp; import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.Weigher; import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; -import org.apache.hadoop.hive.ql.io.sarg.LiteralDelegate; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -437,14 +442,75 @@ public class ConvertAstToSearchArg { } } - public static final String SARG_PUSHDOWN = "sarg.pushdown"; + private static volatile Cache sargsCache = null; + + private static synchronized Cache initializeAndGetSargsCache(Configuration conf) { +if (sargsCache == null) { + sargsCache = CacheBuilder.newBuilder() +.weigher(new Weigher () { + @Override + public int weigh(String key, SearchArgument value) { +return key.length(); + } +}) +.maximumWeight( +HiveConf.getIntVar(conf, + HiveConf.ConfVars.HIVE_IO_SARG_CACHE_MAX_WEIGHT_MB) * 1024 *1024 +) +.build(); // Can't use CacheLoader because SearchArguments may be built either from Kryo strings, + // or from expressions. +} +return sargsCache; + } + + private static Cache getSargsCache(Configuration conf) { +return sargsCache == null? initializeAndGetSargsCache(conf) :
hive git commit: HIVE-17669: Cache to optimize SearchArgument deserialization (Mithun Radhakrishnan reviewed by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/branch-2 ae9d78737 -> 8cb6f9d36 HIVE-17669: Cache to optimize SearchArgument deserialization (Mithun Radhakrishnan reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8cb6f9d3 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8cb6f9d3 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8cb6f9d3 Branch: refs/heads/branch-2 Commit: 8cb6f9d36cb8a5508224cef66adbf8822be4935c Parents: ae9d787 Author: Prasanth JayachandranAuthored: Wed Oct 11 14:29:57 2017 -0700 Committer: Prasanth Jayachandran Committed: Wed Oct 11 14:35:01 2017 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 3 + .../hive/ql/io/sarg/ConvertAstToSearchArg.java | 59 ++-- 2 files changed, 57 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/8cb6f9d3/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 62eabbc..3d3f3b2 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1382,6 +1382,9 @@ public class HiveConf extends Configuration { "references for the cached object. Setting this to true can help avoid out of memory\n" + "issues under memory pressure (in some cases) at the cost of slight unpredictability in\n" + "overall query performance."), +HIVE_IO_SARG_CACHE_MAX_WEIGHT_MB("hive.io.sarg.cache.max.weight.mb", 10, +"The max weight allowed for the SearchArgument Cache. By default, the cache allows a max-weight of 10MB, " + +"after which entries will be evicted."), HIVE_LAZYSIMPLE_EXTENDED_BOOLEAN_LITERAL("hive.lazysimple.extended_boolean_literal", false, "LazySimpleSerde uses this property to determine if it treats 'T', 't', 'F', 'f',\n" + http://git-wip-us.apache.org/repos/asf/hive/blob/8cb6f9d3/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java index 997334b..b9bf801 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java @@ -21,13 +21,16 @@ package org.apache.hadoop.hive.ql.io.sarg; import java.sql.Date; import java.sql.Timestamp; import java.util.List; +import java.util.concurrent.ExecutionException; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; -import org.apache.hadoop.hive.ql.io.sarg.LiteralDelegate; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -436,14 +439,60 @@ public class ConvertAstToSearchArg { } } - public static final String SARG_PUSHDOWN = "sarg.pushdown"; + private static volatile Cache sargsCache = null; + + private static synchronized Cache initializeAndGetSargsCache(Configuration conf) { +if (sargsCache == null) { + sargsCache = CacheBuilder.newBuilder() +.weigher((String key, SearchArgument value) -> key.length()) +.maximumWeight( +HiveConf.getIntVar(conf, + HiveConf.ConfVars.HIVE_IO_SARG_CACHE_MAX_WEIGHT_MB) * 1024 *1024 +) +.build(); // Can't use CacheLoader because SearchArguments may be built either from Kryo strings, + // or from expressions. +} +return sargsCache; + } + + private static Cache getSargsCache(Configuration conf) { +return sargsCache == null? initializeAndGetSargsCache(conf) : sargsCache; + } + + private static boolean isSargsCacheEnabled(Configuration conf) { +return HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_IO_SARG_CACHE_MAX_WEIGHT_MB) > 0; + } + + private static SearchArgument getSearchArgumentFromString(Configuration conf, String sargString) { + +try {
hive git commit: HIVE-17669: Cache to optimize SearchArgument deserialization (Mithun Radhakrishnan reviewed by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/master 63bda8c70 -> f1050a6e8 HIVE-17669: Cache to optimize SearchArgument deserialization (Mithun Radhakrishnan reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f1050a6e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f1050a6e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f1050a6e Branch: refs/heads/master Commit: f1050a6e8462864a3d8c9696943cf5b4d12d9515 Parents: 63bda8c Author: Prasanth JayachandranAuthored: Wed Oct 11 14:29:57 2017 -0700 Committer: Prasanth Jayachandran Committed: Wed Oct 11 14:29:57 2017 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 3 + .../hive/ql/io/sarg/ConvertAstToSearchArg.java | 59 ++-- 2 files changed, 57 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/f1050a6e/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 942629b..9084785 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1393,6 +1393,9 @@ public class HiveConf extends Configuration { "references for the cached object. Setting this to true can help avoid out of memory\n" + "issues under memory pressure (in some cases) at the cost of slight unpredictability in\n" + "overall query performance."), +HIVE_IO_SARG_CACHE_MAX_WEIGHT_MB("hive.io.sarg.cache.max.weight.mb", 10, +"The max weight allowed for the SearchArgument Cache. By default, the cache allows a max-weight of 10MB, " + +"after which entries will be evicted."), HIVE_LAZYSIMPLE_EXTENDED_BOOLEAN_LITERAL("hive.lazysimple.extended_boolean_literal", false, "LazySimpleSerde uses this property to determine if it treats 'T', 't', 'F', 'f',\n" + http://git-wip-us.apache.org/repos/asf/hive/blob/f1050a6e/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java index 997334b..b9bf801 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java @@ -21,13 +21,16 @@ package org.apache.hadoop.hive.ql.io.sarg; import java.sql.Date; import java.sql.Timestamp; import java.util.List; +import java.util.concurrent.ExecutionException; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; -import org.apache.hadoop.hive.ql.io.sarg.LiteralDelegate; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -436,14 +439,60 @@ public class ConvertAstToSearchArg { } } - public static final String SARG_PUSHDOWN = "sarg.pushdown"; + private static volatile Cache sargsCache = null; + + private static synchronized Cache initializeAndGetSargsCache(Configuration conf) { +if (sargsCache == null) { + sargsCache = CacheBuilder.newBuilder() +.weigher((String key, SearchArgument value) -> key.length()) +.maximumWeight( +HiveConf.getIntVar(conf, + HiveConf.ConfVars.HIVE_IO_SARG_CACHE_MAX_WEIGHT_MB) * 1024 *1024 +) +.build(); // Can't use CacheLoader because SearchArguments may be built either from Kryo strings, + // or from expressions. +} +return sargsCache; + } + + private static Cache getSargsCache(Configuration conf) { +return sargsCache == null? initializeAndGetSargsCache(conf) : sargsCache; + } + + private static boolean isSargsCacheEnabled(Configuration conf) { +return HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_IO_SARG_CACHE_MAX_WEIGHT_MB) > 0; + } + + private static SearchArgument getSearchArgumentFromString(Configuration conf, String sargString) { + +try { +