[hive] branch master updated: HIVE-22988: LLAP: If consistent splits is disabled ordering instances is not required (Prasanth Jayachandran reviewed by Slim Bouguerra)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new b5de84a HIVE-22988: LLAP: If consistent splits is disabled ordering instances is not required (Prasanth Jayachandran reviewed by Slim Bouguerra) b5de84a is described below commit b5de84a6ce27904562bb15d7fe800b6ae3c32670 Author: Prasanth Jayachandran AuthorDate: Mon Mar 9 21:42:33 2020 -0700 HIVE-22988: LLAP: If consistent splits is disabled ordering instances is not required (Prasanth Jayachandran reviewed by Slim Bouguerra) --- .../hive/llap/tezplugins/LlapTaskSchedulerService.java | 14 +++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java index d11bf13..48501e5 100644 --- a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java +++ b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java @@ -296,6 +296,7 @@ public class LlapTaskSchedulerService extends TaskScheduler { private int totalGuaranteed = 0, unusedGuaranteed = 0; + private final boolean consistentSplits; /** * An internal version to make sure we don't race and overwrite a newer totalGuaranteed count in * ZK with an older one, without requiring us to make ZK updates under the main writeLock. @@ -345,6 +346,7 @@ public class LlapTaskSchedulerService extends TaskScheduler { LOCK_METRICS); readLock = lock.readLock(); writeLock = lock.writeLock(); +this.consistentSplits = HiveConf.getBoolVar(conf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS); if (conf.getBoolean(LLAP_PLUGIN_ENDPOINT_ENABLED, false)) { JobTokenSecretManager sm = null; @@ -444,8 +446,8 @@ public class LlapTaskSchedulerService extends TaskScheduler { String hostsString = HiveConf.getVar(conf, ConfVars.LLAP_DAEMON_SERVICE_HOSTS); LOG.info("Running with configuration: hosts={}, numSchedulableTasksPerNode={}, " -+ "nodeBlacklistConf={}, localityConf={}", -hostsString, numSchedulableTasksPerNode, nodeBlacklistConf, localityDelayConf); ++ "nodeBlacklistConf={}, localityConf={} consistentSplits={}", +hostsString, numSchedulableTasksPerNode, nodeBlacklistConf, localityDelayConf, consistentSplits); this.amRegistry = TezAmRegistryImpl.create(conf, true); synchronized (LlapTaskCommunicator.pluginInitLock) { @@ -1476,7 +1478,13 @@ public class LlapTaskSchedulerService extends TaskScheduler { } /* fall through - miss in locality or no locality-requested */ - Collection instances = activeInstances.getAllInstancesOrdered(true); + Collection instances; + if (consistentSplits) { +instances = activeInstances.getAllInstancesOrdered(true); + } else { +// if consistent splits are not used we don't need the ordering as there will be no cache benefit anyways +instances = activeInstances.getAll(); + } List allNodes = new ArrayList<>(instances.size()); List activeNodesWithFreeSlots = new ArrayList<>(); for (LlapServiceInstance inst : instances) {
[hive] branch master updated: HIVE-22979: Support total file size in statistics annotation (Prasanth Jayachandran reviewed by Jesus Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new a003428 HIVE-22979: Support total file size in statistics annotation (Prasanth Jayachandran reviewed by Jesus Camacho Rodriguez) a003428 is described below commit a0034284fe02a5012f883704fcd57652519a4cd5 Author: Prasanth Jayachandran AuthorDate: Mon Mar 9 10:39:42 2020 -0700 HIVE-22979: Support total file size in statistics annotation (Prasanth Jayachandran reviewed by Jesus Camacho Rodriguez) --- .../hive/ql/optimizer/spark/SparkMapJoinOptimizer.java | 2 +- .../stats/annotation/StatsRulesProcFactory.java| 2 +- .../org/apache/hadoop/hive/ql/plan/Statistics.java | 18 +++--- .../org/apache/hadoop/hive/ql/stats/BasicStats.java| 15 --- .../org/apache/hadoop/hive/ql/stats/StatsUtils.java| 6 -- .../ql/exec/tez/TestVectorMapJoinFastHashTable.java| 2 +- 6 files changed, 34 insertions(+), 11 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java index 5dcd49b..0638caf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java @@ -215,7 +215,7 @@ public class SparkMapJoinOptimizer implements SemanticNodeProcessor { LOG.debug("Found a big table branch with parent operator {} and position {}", parentOp, pos); bigTablePosition = pos; bigTableFound = true; -bigInputStat = new Statistics(0, Long.MAX_VALUE, 0); +bigInputStat = new Statistics(0, Long.MAX_VALUE, Long.MAX_VALUE, 0); } else { // Either we've found multiple big table branches, or the current branch cannot // be a big table branch. Disable mapjoin for these cases. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 0ada066..43fc449 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -2116,7 +2116,7 @@ public class StatsRulesProcFactory { } } -Statistics wcStats = new Statistics(newNumRows, newDataSize, 0); +Statistics wcStats = new Statistics(newNumRows, newDataSize, 0, 0); wcStats.setBasicStatsState(statsState); // evaluate filter expression and update statistics diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java index bc5f9d9..a4cb841 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java @@ -52,7 +52,10 @@ public class Statistics implements Serializable { private long numRows; private long runTimeNumRows; + // dataSize represents raw data size (estimated in-memory size based on row schema) after decompression and decoding. private long dataSize; + // totalFileSize represents on-disk size. + private long totalFileSize; private long numErasureCodedFiles; private State basicStatsState; private Map columnStats; @@ -60,12 +63,13 @@ public class Statistics implements Serializable { private boolean runtimeStats; public Statistics() { -this(0, 0, 0); +this(0, 0, 0, 0); } - public Statistics(long nr, long ds, long numEcFiles) { + public Statistics(long nr, long ds, long fs, long numEcFiles) { numRows = nr; dataSize = ds; +totalFileSize = fs; numErasureCodedFiles = numEcFiles; runTimeNumRows = -1; columnStats = null; @@ -74,6 +78,14 @@ public class Statistics implements Serializable { updateBasicStatsState(); } + public void setTotalFileSize(final long totalFileSize) { +this.totalFileSize = totalFileSize; + } + + public long getTotalFileSize() { +return totalFileSize; + } + public long getNumRows() { return numRows; } @@ -191,7 +203,7 @@ public class Statistics implements Serializable { @Override public Statistics clone() { -Statistics clone = new Statistics(numRows, dataSize, numErasureCodedFiles); +Statistics clone = new Statistics(numRows, dataSize, totalFileSize, numErasureCodedFiles); clone.setRunTimeNumRows(runTimeNumRows); clone.setBasicStatsState(basicStatsState); clone.setColumnStatsState(columnStatsState); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java
[hive] branch master updated: HIVE-22982 : TopN Key efficiency check might disable filter too soon (Attila Magyar via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 7e90ecf HIVE-22982 : TopN Key efficiency check might disable filter too soon (Attila Magyar via Ashutosh Chauhan) 7e90ecf is described below commit 7e90ecf480673364eb5de182f17f2c02eb144315 Author: Attila Magyar AuthorDate: Mon Mar 9 08:26:35 2020 -0700 HIVE-22982 : TopN Key efficiency check might disable filter too soon (Attila Magyar via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 4 ++-- .../hadoop/hive/ql/exec/TopNKeyOperator.java | 3 ++- .../hive/ql/exec/vector/VectorTopNKeyOperator.java | 11 - .../vector/wrapper/VectorHashKeyWrapperBatch.java | 4 ++-- .../VectorHashKeyWrapperGeneralComparator.java | 13 +-- .../apache/hadoop/hive/ql/plan/TopNKeyDesc.java| 26 ++ .../hadoop/hive/ql/exec/TestTopNKeyFilter.java | 2 +- 7 files changed, 44 insertions(+), 19 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 12f4822..a18a6d7 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2417,8 +2417,8 @@ public class HiveConf extends Configuration { HIVE_OPTIMIZE_TOPNKEY("hive.optimize.topnkey", true, "Whether to enable top n key optimizer."), HIVE_MAX_TOPN_ALLOWED("hive.optimize.topnkey.max", 128, "Maximum topN value allowed by top n key optimizer.\n" + "If the LIMIT is greater than this value then top n key optimization won't be used."), - HIVE_TOPN_EFFICIENCY_THRESHOLD("hive.optimize.topnkey.efficiency.threshold", 0.6f, "Disable topN key filter if the ratio between forwarded and total rows reaches this limit."), - HIVE_TOPN_EFFICIENCY_CHECK_BATCHES("hive.optimize.topnkey.efficiency.check.nbatches", 8, "Check topN key filter efficiency after a specific number of batches."), + HIVE_TOPN_EFFICIENCY_THRESHOLD("hive.optimize.topnkey.efficiency.threshold", 0.8f, "Disable topN key filter if the ratio between forwarded and total rows reaches this limit."), + HIVE_TOPN_EFFICIENCY_CHECK_BATCHES("hive.optimize.topnkey.efficiency.check.nbatches", 1, "Check topN key filter efficiency after a specific number of batches."), HIVE_TOPN_MAX_NUMBER_OF_PARTITIONS("hive.optimize.topnkey.partitions.max", 64, "Limit the maximum number of partitions used by the top N key operator."), HIVE_SHARED_WORK_OPTIMIZATION("hive.optimize.shared.work", true, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java index f09867b..e95d779 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java @@ -139,7 +139,8 @@ public class TopNKeyOperator extends Operator implements Serializab } if (runTimeNumRows % conf.getCheckEfficiencyNumRows() == 0) { // check the efficiency at every nth rows - checkTopNFilterEfficiency(topNKeyFilters, disabledPartitions, conf.getEfficiencyThreshold(), LOG); + checkTopNFilterEfficiency( +topNKeyFilters, disabledPartitions, conf.getEfficiencyThreshold(), LOG, conf.getCheckEfficiencyNumRows()); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java index 0f8eb17..10567c7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java @@ -178,14 +178,15 @@ public class VectorTopNKeyOperator extends Operator implements Vect batch.selectedInUse = selectedInUseBackup; if (incomingBatches % conf.getCheckEfficiencyNumBatches() == 0) { - checkTopNFilterEfficiency(topNKeyFilters, disabledPartitions, conf.getEfficiencyThreshold(), LOG); + checkTopNFilterEfficiency( +topNKeyFilters, disabledPartitions, conf.getEfficiencyThreshold(), LOG, conf.getCheckEfficiencyNumRows()); } } public static void checkTopNFilterEfficiency(Map filters, -Set disabledPartitions, -float efficiencyThreshold, -Logger log) + Set disabledPartitions, + float efficiencyThreshold, + Logger log, long checkEfficiencyNumRows) { Iterator> iterator =
[hive] branch master updated: HIVE-22033 : HiveServer2: fix delegation token renewal (Ion ALberdi via Szehon)
This is an automated email from the ASF dual-hosted git repository. szehon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 44f3829 HIVE-22033 : HiveServer2: fix delegation token renewal (Ion ALberdi via Szehon) 44f3829 is described below commit 44f3829a32b89c531f3f5fb402363f7a69b626af Author: Szehon Ho AuthorDate: Thu Mar 5 14:04:37 2020 +0100 HIVE-22033 : HiveServer2: fix delegation token renewal (Ion ALberdi via Szehon) --- .../metastore/security/TokenStoreDelegationTokenSecretManager.java | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/security/TokenStoreDelegationTokenSecretManager.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/security/TokenStoreDelegationTokenSecretManager.java index 7b32544..ee2ace8 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/security/TokenStoreDelegationTokenSecretManager.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/security/TokenStoreDelegationTokenSecretManager.java @@ -161,7 +161,10 @@ public class TokenStoreDelegationTokenSecretManager extends DelegationTokenSecre synchronized (this) { super.currentTokens.put(id, tokenInfo); try { -return super.renewToken(token, renewer); +long res = super.renewToken(token, renewer); +this.tokenStore.removeToken(id); +this.tokenStore.addToken(id, super.currentTokens.get(id)); +return res; } finally { super.currentTokens.remove(id); }