date:20200309

[hive] branch master updated: HIVE-22988: LLAP: If consistent splits is disabled ordering instances is not required (Prasanth Jayachandran reviewed by Slim Bouguerra)

2020-03-09 Thread prasanthj

This is an automated email from the ASF dual-hosted git repository.

prasanthj pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new b5de84a  HIVE-22988: LLAP: If consistent splits is disabled ordering 
instances is not required (Prasanth Jayachandran reviewed by Slim Bouguerra)
b5de84a is described below

commit b5de84a6ce27904562bb15d7fe800b6ae3c32670
Author: Prasanth Jayachandran 
AuthorDate: Mon Mar 9 21:42:33 2020 -0700

HIVE-22988: LLAP: If consistent splits is disabled ordering instances is 
not required (Prasanth Jayachandran reviewed by Slim Bouguerra)
---
 .../hive/llap/tezplugins/LlapTaskSchedulerService.java | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git 
a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java
 
b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java
index d11bf13..48501e5 100644
--- 
a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java
+++ 
b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java
@@ -296,6 +296,7 @@ public class LlapTaskSchedulerService extends TaskScheduler 
{
 
   private int totalGuaranteed = 0, unusedGuaranteed = 0;
 
+  private final boolean consistentSplits;
   /**
* An internal version to make sure we don't race and overwrite a newer 
totalGuaranteed count in
* ZK with an older one, without requiring us to make ZK updates under the 
main writeLock.
@@ -345,6 +346,7 @@ public class LlapTaskSchedulerService extends TaskScheduler 
{
 LOCK_METRICS);
 readLock = lock.readLock();
 writeLock = lock.writeLock();
+this.consistentSplits = HiveConf.getBoolVar(conf, 
ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS);
 
 if (conf.getBoolean(LLAP_PLUGIN_ENDPOINT_ENABLED, false)) {
   JobTokenSecretManager sm = null;
@@ -444,8 +446,8 @@ public class LlapTaskSchedulerService extends TaskScheduler 
{
 
 String hostsString = HiveConf.getVar(conf, 
ConfVars.LLAP_DAEMON_SERVICE_HOSTS);
 LOG.info("Running with configuration: hosts={}, 
numSchedulableTasksPerNode={}, "
-+ "nodeBlacklistConf={}, localityConf={}",
-hostsString, numSchedulableTasksPerNode, nodeBlacklistConf, 
localityDelayConf);
++ "nodeBlacklistConf={}, localityConf={} consistentSplits={}",
+hostsString, numSchedulableTasksPerNode, nodeBlacklistConf, 
localityDelayConf, consistentSplits);
 this.amRegistry = TezAmRegistryImpl.create(conf, true);
 
 synchronized (LlapTaskCommunicator.pluginInitLock) {
@@ -1476,7 +1478,13 @@ public class LlapTaskSchedulerService extends 
TaskScheduler {
   }
 
   /* fall through - miss in locality or no locality-requested */
-  Collection instances = 
activeInstances.getAllInstancesOrdered(true);
+  Collection instances;
+  if (consistentSplits) {
+instances = activeInstances.getAllInstancesOrdered(true);
+  } else {
+// if consistent splits are not used we don't need the ordering as 
there will be no cache benefit anyways
+instances = activeInstances.getAll();
+  }
   List allNodes = new ArrayList<>(instances.size());
   List activeNodesWithFreeSlots = new ArrayList<>();
   for (LlapServiceInstance inst : instances) {

[hive] branch master updated: HIVE-22979: Support total file size in statistics annotation (Prasanth Jayachandran reviewed by Jesus Camacho Rodriguez)

2020-03-09 Thread prasanthj

This is an automated email from the ASF dual-hosted git repository.

prasanthj pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new a003428  HIVE-22979: Support total file size in statistics annotation 
(Prasanth Jayachandran reviewed by Jesus Camacho Rodriguez)
a003428 is described below

commit a0034284fe02a5012f883704fcd57652519a4cd5
Author: Prasanth Jayachandran 
AuthorDate: Mon Mar 9 10:39:42 2020 -0700

HIVE-22979: Support total file size in statistics annotation (Prasanth
Jayachandran reviewed by Jesus Camacho Rodriguez)
---
 .../hive/ql/optimizer/spark/SparkMapJoinOptimizer.java |  2 +-
 .../stats/annotation/StatsRulesProcFactory.java|  2 +-
 .../org/apache/hadoop/hive/ql/plan/Statistics.java | 18 +++---
 .../org/apache/hadoop/hive/ql/stats/BasicStats.java| 15 ---
 .../org/apache/hadoop/hive/ql/stats/StatsUtils.java|  6 --
 .../ql/exec/tez/TestVectorMapJoinFastHashTable.java|  2 +-
 6 files changed, 34 insertions(+), 11 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
index 5dcd49b..0638caf 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
@@ -215,7 +215,7 @@ public class SparkMapJoinOptimizer implements 
SemanticNodeProcessor {
 LOG.debug("Found a big table branch with parent operator {} and 
position {}", parentOp, pos);
 bigTablePosition = pos;
 bigTableFound = true;
-bigInputStat = new Statistics(0, Long.MAX_VALUE, 0);
+bigInputStat = new Statistics(0, Long.MAX_VALUE, Long.MAX_VALUE, 
0);
   } else {
 // Either we've found multiple big table branches, or the current 
branch cannot
 // be a big table branch. Disable mapjoin for these cases.
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 0ada066..43fc449 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -2116,7 +2116,7 @@ public class StatsRulesProcFactory {
   }
 }
 
-Statistics wcStats = new Statistics(newNumRows, newDataSize, 0);
+Statistics wcStats = new Statistics(newNumRows, newDataSize, 0, 0);
 wcStats.setBasicStatsState(statsState);
 
 // evaluate filter expression and update statistics
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
index bc5f9d9..a4cb841 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
@@ -52,7 +52,10 @@ public class Statistics implements Serializable {
 
   private long numRows;
   private long runTimeNumRows;
+  // dataSize represents raw data size (estimated in-memory size based on row 
schema) after decompression and decoding.
   private long dataSize;
+  // totalFileSize represents on-disk size.
+  private long totalFileSize;
   private long numErasureCodedFiles;
   private State basicStatsState;
   private Map columnStats;
@@ -60,12 +63,13 @@ public class Statistics implements Serializable {
   private boolean runtimeStats;
 
   public Statistics() {
-this(0, 0, 0);
+this(0, 0, 0, 0);
   }
 
-  public Statistics(long nr, long ds, long numEcFiles) {
+  public Statistics(long nr, long ds, long fs, long numEcFiles) {
 numRows = nr;
 dataSize = ds;
+totalFileSize = fs;
 numErasureCodedFiles = numEcFiles;
 runTimeNumRows = -1;
 columnStats = null;
@@ -74,6 +78,14 @@ public class Statistics implements Serializable {
 updateBasicStatsState();
   }
 
+  public void setTotalFileSize(final long totalFileSize) {
+this.totalFileSize = totalFileSize;
+  }
+
+  public long getTotalFileSize() {
+return totalFileSize;
+  }
+
   public long getNumRows() {
 return numRows;
   }
@@ -191,7 +203,7 @@ public class Statistics implements Serializable {
 
   @Override
   public Statistics clone() {
-Statistics clone = new Statistics(numRows, dataSize, numErasureCodedFiles);
+Statistics clone = new Statistics(numRows, dataSize, totalFileSize, 
numErasureCodedFiles);
 clone.setRunTimeNumRows(runTimeNumRows);
 clone.setBasicStatsState(basicStatsState);
 clone.setColumnStatsState(columnStatsState);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java

[hive] branch master updated: HIVE-22982 : TopN Key efficiency check might disable filter too soon (Attila Magyar via Ashutosh Chauhan)

2020-03-09 Thread hashutosh

This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 7e90ecf  HIVE-22982 : TopN Key efficiency check might disable filter 
too soon (Attila Magyar via Ashutosh Chauhan)
7e90ecf is described below

commit 7e90ecf480673364eb5de182f17f2c02eb144315
Author: Attila Magyar 
AuthorDate: Mon Mar 9 08:26:35 2020 -0700

HIVE-22982 : TopN Key efficiency check might disable filter too soon 
(Attila Magyar via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |  4 ++--
 .../hadoop/hive/ql/exec/TopNKeyOperator.java   |  3 ++-
 .../hive/ql/exec/vector/VectorTopNKeyOperator.java | 11 -
 .../vector/wrapper/VectorHashKeyWrapperBatch.java  |  4 ++--
 .../VectorHashKeyWrapperGeneralComparator.java | 13 +--
 .../apache/hadoop/hive/ql/plan/TopNKeyDesc.java| 26 ++
 .../hadoop/hive/ql/exec/TestTopNKeyFilter.java |  2 +-
 7 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 12f4822..a18a6d7 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2417,8 +2417,8 @@ public class HiveConf extends Configuration {
 HIVE_OPTIMIZE_TOPNKEY("hive.optimize.topnkey", true, "Whether to enable 
top n key optimizer."),
 HIVE_MAX_TOPN_ALLOWED("hive.optimize.topnkey.max", 128, "Maximum topN 
value allowed by top n key optimizer.\n" +
   "If the LIMIT is greater than this value then top n key optimization 
won't be used."),
-
HIVE_TOPN_EFFICIENCY_THRESHOLD("hive.optimize.topnkey.efficiency.threshold", 
0.6f, "Disable topN key filter if the ratio between forwarded and total rows 
reaches this limit."),
-
HIVE_TOPN_EFFICIENCY_CHECK_BATCHES("hive.optimize.topnkey.efficiency.check.nbatches",
 8, "Check topN key filter efficiency after a specific number of batches."),
+
HIVE_TOPN_EFFICIENCY_THRESHOLD("hive.optimize.topnkey.efficiency.threshold", 
0.8f, "Disable topN key filter if the ratio between forwarded and total rows 
reaches this limit."),
+
HIVE_TOPN_EFFICIENCY_CHECK_BATCHES("hive.optimize.topnkey.efficiency.check.nbatches",
 1, "Check topN key filter efficiency after a specific number of batches."),
 HIVE_TOPN_MAX_NUMBER_OF_PARTITIONS("hive.optimize.topnkey.partitions.max", 
64, "Limit the maximum number of partitions used by the top N key operator."),
 
 HIVE_SHARED_WORK_OPTIMIZATION("hive.optimize.shared.work", true,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
index f09867b..e95d779 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
@@ -139,7 +139,8 @@ public class TopNKeyOperator extends Operator 
implements Serializab
 }
 
 if (runTimeNumRows % conf.getCheckEfficiencyNumRows() == 0) { // check the 
efficiency at every nth rows
-  checkTopNFilterEfficiency(topNKeyFilters, disabledPartitions, 
conf.getEfficiencyThreshold(), LOG);
+  checkTopNFilterEfficiency(
+topNKeyFilters, disabledPartitions, conf.getEfficiencyThreshold(), 
LOG, conf.getCheckEfficiencyNumRows());
 }
   }
 
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java
index 0f8eb17..10567c7 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java
@@ -178,14 +178,15 @@ public class VectorTopNKeyOperator extends 
Operator implements Vect
 batch.selectedInUse = selectedInUseBackup;
 
 if (incomingBatches % conf.getCheckEfficiencyNumBatches() == 0) {
-  checkTopNFilterEfficiency(topNKeyFilters, disabledPartitions, 
conf.getEfficiencyThreshold(), LOG);
+  checkTopNFilterEfficiency(
+topNKeyFilters, disabledPartitions, conf.getEfficiencyThreshold(), 
LOG, conf.getCheckEfficiencyNumRows());
 }
   }
 
   public static void checkTopNFilterEfficiency(Map 
filters,
-Set 
disabledPartitions,
-float efficiencyThreshold,
-Logger log)
+   Set 
disabledPartitions,
+   float efficiencyThreshold,
+   Logger log, long 
checkEfficiencyNumRows)
   {
 Iterator> iterator =

[hive] branch master updated: HIVE-22033 : HiveServer2: fix delegation token renewal (Ion ALberdi via Szehon)

2020-03-09 Thread szehon

This is an automated email from the ASF dual-hosted git repository.

szehon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 44f3829  HIVE-22033 : HiveServer2: fix delegation token renewal (Ion 
ALberdi via Szehon)
44f3829 is described below

commit 44f3829a32b89c531f3f5fb402363f7a69b626af
Author: Szehon Ho 
AuthorDate: Thu Mar 5 14:04:37 2020 +0100

HIVE-22033 : HiveServer2: fix delegation token renewal (Ion ALberdi via 
Szehon)
---
 .../metastore/security/TokenStoreDelegationTokenSecretManager.java   | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/security/TokenStoreDelegationTokenSecretManager.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/security/TokenStoreDelegationTokenSecretManager.java
index 7b32544..ee2ace8 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/security/TokenStoreDelegationTokenSecretManager.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/security/TokenStoreDelegationTokenSecretManager.java
@@ -161,7 +161,10 @@ public class TokenStoreDelegationTokenSecretManager 
extends DelegationTokenSecre
 synchronized (this) {
   super.currentTokens.put(id,  tokenInfo);
   try {
-return super.renewToken(token, renewer);
+long res = super.renewToken(token, renewer);
+this.tokenStore.removeToken(id);
+this.tokenStore.addToken(id, super.currentTokens.get(id));
+return res;
   } finally {
 super.currentTokens.remove(id);
   }

[hive] branch master updated: HIVE-22988: LLAP: If consistent splits is disabled ordering instances is not required (Prasanth Jayachandran reviewed by Slim Bouguerra)

[hive] branch master updated: HIVE-22979: Support total file size in statistics annotation (Prasanth Jayachandran reviewed by Jesus Camacho Rodriguez)

[hive] branch master updated: HIVE-22982 : TopN Key efficiency check might disable filter too soon (Attila Magyar via Ashutosh Chauhan)

[hive] branch master updated: HIVE-22033 : HiveServer2: fix delegation token renewal (Ion ALberdi via Szehon)

4 matches

Site Navigation

Mail list logo

Footer information