[hive] branch master updated: HIVE-24270: Move scratchdir cleanup to background

2020-10-28 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new f7e9d9b  HIVE-24270: Move scratchdir cleanup to background
f7e9d9b is described below

commit f7e9d9b14e9f1fb266aefa9cad73d509d9d614af
Author: Mustafa Iman 
AuthorDate: Tue Oct 13 14:14:10 2020 -0700

HIVE-24270: Move scratchdir cleanup to background

Signed-off-by: Ashutosh Chauhan 
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   5 +
 ql/src/java/org/apache/hadoop/hive/ql/Context.java |  13 +-
 .../hadoop/hive/ql/cleanup/CleanupService.java |  38 ++
 .../hive/ql/cleanup/EventualCleanupService.java| 145 
 .../hadoop/hive/ql/cleanup/SyncCleanupService.java |  68 +
 .../hadoop/hive/ql/session/SessionState.java   |  17 ++-
 .../hadoop/hive/ql/cleanup/TestCleanupService.java | 152 +
 .../hive/service/cli/session/HiveSessionImpl.java  |   7 +-
 .../hive/service/cli/session/SessionManager.java   |  18 +++
 9 files changed, 451 insertions(+), 12 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index edaa75b..45a44e9 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -5238,6 +5238,11 @@ public class HiveConf extends Configuration {
 
HIVE_SCHEDULED_QUERIES_MAX_EXECUTORS("hive.scheduled.queries.max.executors", 4, 
new RangeValidator(1, null),
 "Maximal number of scheduled query executors to allow."),
 
+
HIVE_ASYNC_CLEANUP_SERVICE_THREAD_COUNT("hive.async.cleanup.service.thread.count",
 10, new RangeValidator(0, null),
+"Number of threads that run some eventual cleanup operations after 
queries/sessions close. 0 means cleanup is sync."),
+
HIVE_ASYNC_CLEANUP_SERVICE_QUEUE_SIZE("hive.async.cleanup.service.queue.size", 
1, new RangeValidator(10, Integer.MAX_VALUE),
+"Size of the async cleanup queue. If cleanup queue is full, cleanup 
operations become synchronous. " +
+"Applicable only when number of async cleanup is turned on."),
 HIVE_QUERY_RESULTS_CACHE_ENABLED("hive.query.results.cache.enabled", true,
 "If the query results cache is enabled. This will keep results of 
previously executed queries " +
 "to be reused if the same query is executed again."),
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java 
b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
index a41c5c8..e4141fe 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
@@ -673,22 +673,21 @@ public class Context {
 if(this.fsResultCacheDirs != null) {
   resultCacheDir = this.fsResultCacheDirs.toUri().getPath();
 }
-for (Map.Entry entry : fsScratchDirs.entrySet()) {
+SessionState sessionState = SessionState.get();
+for (Path p: fsScratchDirs.values()) {
   try {
-Path p = entry.getValue();
 if (p.toUri().getPath().contains(stagingDir) && subDirOf(p, 
fsScratchDirs.values())  ) {
   LOG.debug("Skip deleting stagingDir: " + p);
   FileSystem fs = p.getFileSystem(conf);
   fs.cancelDeleteOnExit(p);
   continue; // staging dir is deleted when deleting the scratch dir
 }
-if(resultCacheDir == null || 
!p.toUri().getPath().contains(resultCacheDir)) {
+if (resultCacheDir == null || 
!p.toUri().getPath().contains(resultCacheDir)) {
   // delete only the paths which aren't result cache dir path
   // because that will be taken care by removeResultCacheDir
-FileSystem fs = p.getFileSystem(conf);
-LOG.debug("Deleting scratch dir: {}",  p);
-fs.delete(p, true);
-fs.cancelDeleteOnExit(p);
+  FileSystem fs = p.getFileSystem(conf);
+  LOG.info("Deleting scratch dir: {}", p);
+  sessionState.getCleanupService().deleteRecursive(p, fs);
 }
   } catch (Exception e) {
 LOG.warn("Error Removing Scratch: "
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/cleanup/CleanupService.java 
b/ql/src/java/org/apache/hadoop/hive/ql/cleanup/CleanupService.java
new file mode 100644
index 000..919298e
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/cleanup/CleanupService.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache 

[hive] branch master updated: HIVE-24209 : Incorrect search argument conversion for NOT BETWEEN operation when vectorization is enabled (Ganesha Shreedhara via Ashutosh Chauhan)

2020-10-05 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 041b9c2  HIVE-24209 : Incorrect search argument conversion for NOT 
BETWEEN operation when vectorization is enabled (Ganesha Shreedhara via 
Ashutosh Chauhan)
041b9c2 is described below

commit 041b9c25c3694c3fa09b132705eecccab96c6385
Author: Ganesha Shreedhara 
AuthorDate: Mon Oct 5 21:39:18 2020 -0700

HIVE-24209 : Incorrect search argument conversion for NOT BETWEEN operation 
when vectorization is enabled (Ganesha Shreedhara via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hive/ql/io/sarg/ConvertAstToSearchArg.java |   9 +-
 .../clientpositive/vector_between_columns.q|  43 +
 .../llap/vector_between_columns.q.out  | 211 +
 3 files changed, 262 insertions(+), 1 deletion(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
index 764c401..fd24eaa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
@@ -471,7 +471,14 @@ public class ConvertAstToSearchArg {
 } else if (op == GenericUDFIn.class) {
   createLeaf(PredicateLeaf.Operator.IN, expr, 0);
 } else if (op == GenericUDFBetween.class) {
-  createLeaf(PredicateLeaf.Operator.BETWEEN, expr, 1);
+  // Start with NOT operator when the first child of GenericUDFBetween 
operator is set to TRUE
+  if (Boolean.TRUE.equals(((ExprNodeConstantDesc) 
expression.getChildren().get(0)).getValue())) {
+builder.startNot();
+createLeaf(PredicateLeaf.Operator.BETWEEN, expr, 1);
+builder.end();
+  } else {
+createLeaf(PredicateLeaf.Operator.BETWEEN, expr, 1);
+  }
 } else if (op == GenericUDFOPNull.class) {
   createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0);
 } else if (op == GenericUDFOPNotNull.class) {
diff --git a/ql/src/test/queries/clientpositive/vector_between_columns.q 
b/ql/src/test/queries/clientpositive/vector_between_columns.q
index a8e9ca4..63142f3 100644
--- a/ql/src/test/queries/clientpositive/vector_between_columns.q
+++ b/ql/src/test/queries/clientpositive/vector_between_columns.q
@@ -5,6 +5,7 @@ SET hive.auto.convert.join=true;
 set hive.fetch.task.conversion=none;
 set hive.mapred.mode=nonstrict;
 set hive.join.inner.residual=false;
+set hive.optimize.index.filter=true;
 
 -- SORT_QUERY_RESULTS
 --
@@ -30,6 +31,43 @@ create table TINT stored as orc AS SELECT * FROM TINT_txt;
 -- Add a single NULL row that will come from ORC as isRepeated.
 insert into TINT values (NULL, NULL);
 
+CREATE EXTERNAL TABLE test_orc_ppd(
+  data_release bigint,
+  data_owner_ver_id bigint,
+  data_owner_dim_id bigint,
+  data_source_ver_id bigint,
+  data_source_dim_id bigint,
+  data_client_ver_id bigint,
+  data_client_dim_id bigint,
+  data_client_sub_ver_id bigint,
+  data_client_sub_dim_id bigint,
+  quarter_dim_id bigint,
+  market_dim_id bigint,
+  daypart_dim_id bigint,
+  demo_dim_id bigint,
+  station_dim_id bigint,
+  medium_dim_id bigint,
+  ad_length int,
+  exclude int,
+  population int,
+  client_cpp double,
+  client_cpm double,
+  low_cpp double,
+  mid_cpp double,
+  high_cpp double,
+  low_cpm double,
+  mid_cpm double,
+  high_cpm double,
+  low_cpp_index double,
+  mid_cpp_index double,
+  high_cpp_index double,
+  low_cpm_index double,
+  mid_cpm_index double,
+  high_cpm_index double)
+  STORED AS ORC;
+LOAD DATA LOCAL INPATH '../../data/files/orc_test_ppd'
+OVERWRITE INTO TABLE test_orc_ppd;
+
 explain vectorization expression
 select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint 
between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col 
from tint , tsint;
 
@@ -40,3 +78,8 @@ explain vectorization expression
 select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where 
tint.cint between tsint.csint and tsint.csint;
 
 select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where 
tint.cint between tsint.csint and tsint.csint;
+
+explain vectorization expression
+select data_release, count(*) from test_orc_ppd where NOT (data_release 
BETWEEN 20191201 AND 20200101) group by data_release;
+
+select data_release, count(*) from test_orc_ppd where NOT (data_release 
BETWEEN 20191201 AND 20200101) group by data_release;
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out 
b/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
index 546dc45..cb6d521 100644
--- a/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_between_co

[hive] branch master updated: HIVE-24224: Fix skipping header/footer for Hive on Tez on compressed file ( Panos G via Ashutosh Chauhan)s

2020-10-05 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 6946c81  HIVE-24224: Fix skipping header/footer for Hive on Tez on 
compressed file ( Panos G via Ashutosh Chauhan)s
6946c81 is described below

commit 6946c816694f2706098caed95e3219520fd57c5d
Author: Panos Garefalakis 
AuthorDate: Fri Oct 2 12:29:21 2020 +0100

HIVE-24224: Fix skipping header/footer for Hive on Tez on compressed file ( 
Panos G via Ashutosh Chauhan)s

Signed-off-by: Ashutosh Chauhan 
---
 .../test/resources/testconfiguration.properties|   2 +-
 .../hive/llap/io/encoded/PassThruOffsetReader.java |   9 +-
 .../apache/hadoop/hive/ql/io/HiveInputFormat.java  |  10 +-
 .../hadoop/hive/ql/io/RecordReaderWrapper.java | 205 ++
 .../compressed_skip_header_footer_aggr.q   |  74 -
 .../llap/compressed_skip_header_footer_aggr.q.out  | 290 +--
 .../tez/compressed_skip_header_footer_aggr.q.out   | 308 +
 7 files changed, 854 insertions(+), 44 deletions(-)

diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index a29a4a3..3680dc7 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -6,6 +6,7 @@ minimr.query.files=\
 
 # Queries ran by both MiniLlapLocal and MiniTez
 minitez.query.files.shared=\
+  compressed_skip_header_footer_aggr.q,\
   hybridgrace_hashjoin_1.q,\
   hybridgrace_hashjoin_2.q
 
@@ -43,7 +44,6 @@ minillap.query.files=\
   binary_output_format.q,\
   bucket5.q,\
   bucket6.q,\
-  compressed_skip_header_footer_aggr.q,\
   create_genericudaf.q,\
   create_udaf.q,\
   create_view.q,\
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/PassThruOffsetReader.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/PassThruOffsetReader.java
index cab13ee..56faace 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/PassThruOffsetReader.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/PassThruOffsetReader.java
@@ -27,7 +27,7 @@ import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
 
-@SuppressWarnings("rawtypes") class PassThruOffsetReader implements 
ReaderWithOffsets {
+class PassThruOffsetReader implements ReaderWithOffsets {
   protected final RecordReader sourceReader;
   protected final Object key;
   protected final Writable value;
@@ -58,7 +58,7 @@ import org.apache.hadoop.mapred.RecordReader;
*/
   if (!initialized) {
 // Skip header lines.
-opNotEOF = Utilities.skipHeader(sourceReader, skipFooterCnt, key, 
value);
+opNotEOF = Utilities.skipHeader(sourceReader, skipHeaderCnt, key, 
value);
 
 // Initialize footer buffer.
 if (opNotEOF && skipFooterCnt > 0) {
@@ -87,10 +87,9 @@ import org.apache.hadoop.mapred.RecordReader;
   if (opNotEOF) {
 // File reached the end
 return true;
-  } else {
-// Done reading
-return false;
   }
+  // Done reading
+  return false;
 } catch (Exception e) {
   throw new IOException(e);
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
index f564ed7..f5c25d6 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
@@ -401,7 +401,6 @@ public class HiveInputFormat
   public RecordReader getRecordReader(InputSplit split, JobConf job,
   Reporter reporter) throws IOException {
 HiveInputSplit hsplit = (HiveInputSplit) split;
-InputSplit inputSplit = hsplit.getInputSplit();
 String inputFormatClassName = null;
 Class inputFormatClass = null;
 try {
@@ -444,7 +443,8 @@ public class HiveInputFormat
 }
 RecordReader innerReader = null;
 try {
-  innerReader = inputFormat.getRecordReader(inputSplit, job, reporter);
+  // Handle the special header/footer skipping cases here.
+  innerReader = RecordReaderWrapper.create(inputFormat, hsplit, 
part.getTableDesc(), job, reporter);
 } catch (Exception e) {
   innerReader = HiveIOExceptionHandlerUtil
   .handleRecordReaderCreationException(e, job);
@@ -531,12 +531,10 @@ public class HiveInputFormat
 }
 
 conf.setInputFormat(inputFormat.getClass());
-int headerCount = 0;
-int footerCount = 0;
 boolean isCompressedFormat = isCompressedInput(finalDirs);
 if (table != null) {
-  headerCount = Utilities.getHeaderCount(table);
-  footerCount = Utilities.getFooterCount(table, conf);
+

[hive] branch master updated: HIVE-24205: Optimise CuckooSetBytes (Mustafa Iman via Rajesh Balamohan)

2020-10-05 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 56646de  HIVE-24205: Optimise CuckooSetBytes (Mustafa Iman via Rajesh 
Balamohan)
56646de is described below

commit 56646de06047320fa95387318be34ee80a123788
Author: Mustafa Iman 
AuthorDate: Fri Oct 2 16:35:44 2020 -0700

HIVE-24205: Optimise CuckooSetBytes (Mustafa Iman via Rajesh Balamohan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java | 7 +++
 1 file changed, 7 insertions(+)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
index f9a86ae..10e8332 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
@@ -41,6 +41,8 @@ public class CuckooSetBytes {
   private int rehashCount = 0;
   private static final long INT_MASK  = 0xL;
   private static final long BYTE_MASK = 0x00ffL;
+  private int maxLen;
+  private int minLen = Integer.MAX_VALUE;
 
   /**
* Allocate a new set to hold expectedSize values. Re-allocation to expand
@@ -76,6 +78,9 @@ public class CuckooSetBytes {
* and ending at start+len is present in the set.
*/
   public boolean lookup(byte[] b, int start, int len) {
+if (len < minLen || len > maxLen) {
+  return false;
+}
 
 return entryEqual(t1, h1(b, start, len), b, start, len)
 || entryEqual(t2, h2(b, start, len), b, start, len);
@@ -90,6 +95,8 @@ public class CuckooSetBytes {
 if (lookup(x, 0, x.length)) {
   return;
 }
+minLen = Math.min(minLen, x.length);
+maxLen = Math.max(maxLen, x.length);
 
 // Try to insert up to n times. Rehash if that fails.
 for(int i = 0; i != n; i++) {



[hive] branch master updated: HIVE-24011: Flaky test AsyncResponseHandlerTest ( Mustafa Iman via Ashutosh Chauhan)

2020-08-10 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 5d9a5cf  HIVE-24011: Flaky test AsyncResponseHandlerTest ( Mustafa 
Iman via Ashutosh Chauhan)
5d9a5cf is described below

commit 5d9a5cf5a36c1d704d2671eb57547ea50249f28b
Author: Mustafa Iman 
AuthorDate: Fri Aug 7 13:56:49 2020 -0700

HIVE-24011: Flaky test AsyncResponseHandlerTest ( Mustafa Iman via Ashutosh 
Chauhan)

Timeout is too low. Also retry logic could cause 
"java.lang.IllegalArgumentException: timeout value is negative"

Signed-off-by: Ashutosh Chauhan 
---
 .../test/org/apache/hadoop/hive/llap/AsyncResponseHandlerTest.java  | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git 
a/llap-common/src/test/org/apache/hadoop/hive/llap/AsyncResponseHandlerTest.java
 
b/llap-common/src/test/org/apache/hadoop/hive/llap/AsyncResponseHandlerTest.java
index 3d7bd90..d5d24cf 100644
--- 
a/llap-common/src/test/org/apache/hadoop/hive/llap/AsyncResponseHandlerTest.java
+++ 
b/llap-common/src/test/org/apache/hadoop/hive/llap/AsyncResponseHandlerTest.java
@@ -194,7 +194,7 @@ public class AsyncResponseHandlerTest {
   }
 
   private void assertTrueEventually(AssertTask assertTask) throws 
InterruptedException {
-assertTrueEventually(assertTask, 1);
+assertTrueEventually(assertTask, 10);
   }
 
   private void assertTrueEventually(AssertTask assertTask, int timeoutMillis) 
throws InterruptedException {
@@ -207,9 +207,7 @@ public class AsyncResponseHandlerTest {
 return;
   } catch (AssertionError e) {
 assertionError = e;
-long millisUntilTimeout = endTime - System.currentTimeMillis();
-sleep(millisUntilTimeout < 50 ? millisUntilTimeout : 50 );
-continue;
+sleep(50);
   }
 }
 throw assertionError;



[hive] branch master updated: HIVE-22934 Hive server interactive log counters to error stream ( Ramesh Kumar via Ashutosh Chauhan)

2020-08-10 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 4457c3e  HIVE-22934 Hive server interactive log counters to error 
stream ( Ramesh Kumar via Ashutosh Chauhan)
4457c3e is described below

commit 4457c3ec9360650be021ea84ed1d5d0f007d8308
Author: Ramesh Thangarajan 
AuthorDate: Wed Jul 1 12:26:49 2020 -0700

HIVE-22934 Hive server interactive log counters to error stream ( Ramesh 
Kumar via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hadoop/hive/cli/TestCliDriverMethods.java  |  1 +
 .../java/org/apache/hadoop/hive/ql/QTestUtil.java  |  4 +++-
 .../hadoop/hive/ql/session/SessionState.java   | 28 +-
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/cli/src/test/org/apache/hadoop/hive/cli/TestCliDriverMethods.java 
b/cli/src/test/org/apache/hadoop/hive/cli/TestCliDriverMethods.java
index 5f21900..37448fe 100644
--- a/cli/src/test/org/apache/hadoop/hive/cli/TestCliDriverMethods.java
+++ b/cli/src/test/org/apache/hadoop/hive/cli/TestCliDriverMethods.java
@@ -331,6 +331,7 @@ public class TestCliDriverMethods {
 
 sessionState.err = new SessionStream(data);
 sessionState.out = new SessionStream(System.out);
+sessionState.setIsQtestLogging(true);
 try {
   CliSessionState.start(sessionState);
   CliDriver cliDriver = new CliDriver();
diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java 
b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
index 6f42bbe..20ee2fe 100644
--- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
+++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
@@ -623,6 +623,7 @@ public class QTestUtil {
 }
 File outf = new File(logDir, stdoutName);
 setSessionOutputs(fileName, ss, outf);
+ss.setIsQtestLogging(true);
 
 if (fileName.equals("init_file.q")) {
   ss.initFiles.add(AbstractCliConfig.HIVE_ROOT + 
"/data/scripts/test_init_file.sql");
@@ -638,13 +639,14 @@ public class QTestUtil {
   ss.out.flush();
 }
 if (ss.err != null) {
-  ss.out.flush();
+  ss.err.flush();
 }
 
 qTestResultProcessor.setOutputs(ss, fo, fileName);
 
 ss.err = new CachingPrintStream(fo, true, "UTF-8");
 ss.setIsSilent(true);
+ss.setIsQtestLogging(true);
   }
 
   public CliSessionState startSessionState(boolean canReuseSession) throws 
IOException {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java 
b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
index 0bf6735..5d42efb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
@@ -161,6 +161,11 @@ public class SessionState implements ISessionAuthState{
   protected boolean isSilent;
 
   /**
+   * silent mode.
+   */
+  protected boolean isQtestLogging;
+
+  /**
* verbose mode
*/
   protected boolean isVerbose;
@@ -372,6 +377,10 @@ public class SessionState implements ISessionAuthState{
 }
   }
 
+  public boolean getIsQtestLogging() {
+return isQtestLogging;
+  }
+
   public boolean isHiveServerQuery() {
 return this.isHiveServerQuery;
   }
@@ -383,6 +392,10 @@ public class SessionState implements ISessionAuthState{
 this.isSilent = isSilent;
   }
 
+  public void setIsQtestLogging(boolean isQtestLogging) {
+this.isQtestLogging = isQtestLogging;
+  }
+
   public ReentrantLock getCompileLock() {
 return compileLock;
   }
@@ -1179,6 +1192,17 @@ public class SessionState implements ISessionAuthState{
   return (ss != null) ? ss.getIsSilent() : isSilent;
 }
 
+
+/**
+ * Is the logging to the info stream is enabled, or not.
+ * @return True if the logging is disabled to the HiveServer2 or HiveCli 
info stream
+ */
+public boolean getIsQtestLogging() {
+  SessionState ss = SessionState.get();
+  // use the session or the one supplied in constructor
+  return (ss != null) ? ss.getIsQtestLogging() : false;
+}
+
 /**
  * Logs into the log file.
  * BeeLine uses the operation log file to show the logs to the user, so 
depending on the
@@ -1270,7 +1294,9 @@ public class SessionState implements ISessionAuthState{
  * @param detail Extra detail to log which will be not printed if null
  */
 public void printError(String error, String detail) {
-  getErrStream().println(error);
+  if(!getIsSilent() || getIsQtestLogging()) {
+getErrStream().println(error);
+  }
   LOG.error(error + StringUtils.defaultString(detail));
 }
   }



[hive] branch master updated: HIVE-23975: Reuse evicted keys from aggregation buffers (Mustafa Iman via Rajesh Balamohan)

2020-08-05 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new a88871e  HIVE-23975: Reuse evicted keys from aggregation buffers 
(Mustafa Iman via Rajesh Balamohan)
a88871e is described below

commit a88871e58ecc4d57629e91454d129d8669c06f41
Author: Mustafa Iman 
AuthorDate: Sun Aug 2 21:25:20 2020 -0700

HIVE-23975: Reuse evicted keys from aggregation buffers (Mustafa Iman via 
Rajesh Balamohan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hive/ql/exec/vector/VectorGroupByOperator.java |  26 +++-
 .../wrapper/VectorHashKeyWrapperGeneral.java   | 141 +++--
 2 files changed, 158 insertions(+), 9 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
index b6cd405..f6b38d6 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
@@ -50,6 +50,7 @@ import 
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterF
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
 import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase;
 import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBatch;
+import 
org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperGeneral;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.GroupByDesc;
@@ -297,6 +298,8 @@ public class VectorGroupByOperator extends 
Operator
*/
final class ProcessingModeHashAggregate extends ProcessingModeBase {
 
+private Queue reusableKeyWrapperBuffer;
+
 /**
  * The global key-aggregation hash map.
  */
@@ -405,6 +408,10 @@ public class VectorGroupByOperator extends 
Operator
   }
   computeMemoryLimits();
   LOG.debug("using hash aggregation processing mode");
+
+  if (keyWrappersBatch.getVectorHashKeyWrappers()[0] instanceof 
VectorHashKeyWrapperGeneral) {
+reusableKeyWrapperBuffer = new 
ArrayDeque<>(VectorizedRowBatch.DEFAULT_SIZE);
+  }
 }
 
 @VisibleForTesting
@@ -488,6 +495,9 @@ public class VectorGroupByOperator extends 
Operator
 @Override
 public void close(boolean aborted) throws HiveException {
   reusableAggregationBufferRows.clear();
+  if (reusableKeyWrapperBuffer != null) {
+reusableKeyWrapperBuffer.clear();
+  }
   if (!aborted) {
 flush(true);
   }
@@ -536,7 +546,8 @@ public class VectorGroupByOperator extends 
Operator
   // is very important to clone the keywrapper, the one we have from 
our
   // keyWrappersBatch is going to be reset/reused on next batch.
   aggregationBuffer = allocateAggregationBuffer();
-  mapKeysAggregationBuffers.put(kw.copyKey(), aggregationBuffer);
+  KeyWrapper copyKeyWrapper = cloneKeyWrapper(kw);
+  mapKeysAggregationBuffers.put(copyKeyWrapper, aggregationBuffer);
   numEntriesHashTable++;
   numEntriesSinceCheck++;
 } else {
@@ -548,6 +559,16 @@ public class VectorGroupByOperator extends 
Operator
   }
 }
 
+private KeyWrapper cloneKeyWrapper(VectorHashKeyWrapperBase from) {
+  if (reusableKeyWrapperBuffer != null && reusableKeyWrapperBuffer.size() 
> 0) {
+KeyWrapper keyWrapper = reusableKeyWrapperBuffer.poll();
+from.copyKey(keyWrapper);
+return keyWrapper;
+  } else {
+return from.copyKey();
+  }
+}
+
 /**
  * Computes the memory limits for hash table flush (spill).
  */
@@ -637,6 +658,9 @@ public class VectorGroupByOperator extends 
Operator
   totalAccessCount -= bufferRow.getAccessCount();
   reusableAggregationBufferRows.add(bufferRow);
   bufferRow.resetAccessCount();
+  if (reusableKeyWrapperBuffer != null) {
+reusableKeyWrapperBuffer.add(pair.getKey());
+  }
   iter.remove();
   --numEntriesHashTable;
   if (++entriesFlushed >= entriesToFlush) {
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperGeneral.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperGeneral.java
index c605ce3..929bb0a 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperGeneral.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperGeneral.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.wrapper;
 
+import org.apache.hadoop.hi

[hive] branch master updated: HIVE-23952: Reuse VectorAggregationBuffer to reduce GC pressure in VectorGroupByOperator ( Mustafa Iman via Rajesh Balamohan)

2020-08-05 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 1ad68c0  HIVE-23952: Reuse VectorAggregationBuffer to reduce GC 
pressure in VectorGroupByOperator ( Mustafa Iman via Rajesh Balamohan)
1ad68c0 is described below

commit 1ad68c0cd2e039ae15fa222955d078d6bd3580b1
Author: Mustafa Iman 
AuthorDate: Wed Jul 29 21:49:42 2020 -0700

HIVE-23952: Reuse VectorAggregationBuffer to reduce GC pressure in 
VectorGroupByOperator ( Mustafa Iman via Rajesh Balamohan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hive/ql/exec/vector/VectorGroupByOperator.java | 46 ++---
 .../ql/exec/vector/TestVectorGroupByOperator.java  | 75 ++
 2 files changed, 111 insertions(+), 10 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
index 02864d9..b6cd405 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
@@ -22,12 +22,14 @@ import java.lang.management.ManagementFactory;
 import java.lang.management.MemoryMXBean;
 import java.lang.ref.SoftReference;
 import java.lang.reflect.Constructor;
+import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Queue;
 
 import org.apache.commons.lang3.ArrayUtils;
 import org.apache.hadoop.conf.Configuration;
@@ -107,7 +109,8 @@ public class VectorGroupByOperator extends 
Operator
   // transient.
   //---
 
-  private transient VectorAggregateExpression[] aggregators;
+  @VisibleForTesting
+  transient VectorAggregateExpression[] aggregators;
   /**
* The aggregation buffers to use for the current batch.
*/
@@ -159,10 +162,10 @@ public class VectorGroupByOperator extends 
Operator
* Interface for processing mode: global, hash, unsorted streaming, or group 
batch
*/
   private static interface IProcessingMode {
-public void initialize(Configuration hconf) throws HiveException;
-public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws 
HiveException;
-public void processBatch(VectorizedRowBatch batch) throws HiveException;
-public void close(boolean aborted) throws HiveException;
+void initialize(Configuration hconf) throws HiveException;
+void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws 
HiveException;
+void processBatch(VectorizedRowBatch batch) throws HiveException;
+void close(boolean aborted) throws HiveException;
   }
 
   /**
@@ -300,6 +303,9 @@ public class VectorGroupByOperator extends 
Operator
 @VisibleForTesting
 Map mapKeysAggregationBuffers;
 
+private Queue reusableAggregationBufferRows =
+new ArrayDeque<>(VectorizedRowBatch.DEFAULT_SIZE);
+
 /**
  * Total per hashtable entry fixed memory (does not depend on key/agg 
values).
  */
@@ -465,7 +471,23 @@ public class VectorGroupByOperator extends 
Operator
 }
 
 @Override
+protected VectorAggregationBufferRow allocateAggregationBuffer() throws 
HiveException {
+  VectorAggregationBufferRow bufferSet;
+  if (reusableAggregationBufferRows.size() > 0) {
+bufferSet = reusableAggregationBufferRows.remove();
+bufferSet.setVersionAndIndex(0, 0);
+for (int i = 0; i < aggregators.length; i++) {
+  aggregators[i].reset(bufferSet.getAggregationBuffer(i));
+}
+return bufferSet;
+  } else {
+return super.allocateAggregationBuffer();
+  }
+}
+
+@Override
 public void close(boolean aborted) throws HiveException {
+  reusableAggregationBufferRows.clear();
   if (!aborted) {
 flush(true);
   }
@@ -598,19 +620,23 @@ public class VectorGroupByOperator extends 
Operator
   mapKeysAggregationBuffers.entrySet().iterator();
   while(iter.hasNext()) {
 Map.Entry pair = iter.next();
+KeyWrapper keyWrapper = pair.getKey();
+VectorAggregationBufferRow bufferRow = pair.getValue();
 if (!all && avgAccess >= 1) {
-  if (pair.getValue().getAccessCount() > avgAccess) {
+  if (bufferRow.getAccessCount() > avgAccess) {
 // resetting to give chance for other entries
-totalAccessCount -= pair.getValue().getAccessCount();
-pair.getValue().resetAccessCount();
+totalAccessCount -= bufferRow.getAccessCount();
+bufferRow.resetAccessCount();
 continue;
   }
   

[hive] branch master updated: HIVE-23746 : Send task attempts async from AM to daemons (Mustafa Iman via Rajesh Balamohan)

2020-08-05 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 5c108dc  HIVE-23746 : Send task attempts async from AM to daemons 
(Mustafa Iman via Rajesh Balamohan)
5c108dc is described below

commit 5c108dc2c49f70228231099fce1a2032f33f9efd
Author: Mustafa Iman 
AuthorDate: Tue Jul 21 12:02:48 2020 -0700

HIVE-23746 : Send task attempts async from AM to daemons (Mustafa Iman via 
Rajesh Balamohan)

Signed-off-by: Ashutosh Chauhan 
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   2 +
 .../hive/llap/tez/LlapProtocolClientProxy.java |   8 +-
 .../apache/hadoop/hive/llap/AsyncPbRpcProxy.java   | 128 +++-
 .../hadoop/hive/llap/AsyncResponseHandler.java | 112 +++
 .../hadoop/hive/llap/AsyncResponseHandlerTest.java | 222 +
 5 files changed, 466 insertions(+), 6 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 9b637fb..1d64f6b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -4825,6 +4825,8 @@ public class HiveConf extends Configuration {
 LLAP_TASK_COMMUNICATOR_LISTENER_THREAD_COUNT(
 "hive.llap.task.communicator.listener.thread-count", 30,
 "The number of task communicator listener threads."),
+
LLAP_MAX_CONCURRENT_REQUESTS_PER_NODE("hive.llap.max.concurrent.requests.per.daemon",
 12,
+"Maximum number of concurrent requests to one daemon from Tez AM"),
 LLAP_TASK_COMMUNICATOR_CONNECTION_SLEEP_BETWEEN_RETRIES_MS(
   "hive.llap.task.communicator.connection.sleep.between.retries.ms", 
"2000ms",
   new TimeValidator(TimeUnit.MILLISECONDS),
diff --git 
a/llap-client/src/java/org/apache/hadoop/hive/llap/tez/LlapProtocolClientProxy.java
 
b/llap-client/src/java/org/apache/hadoop/hive/llap/tez/LlapProtocolClientProxy.java
index bc74c55..6702531 100644
--- 
a/llap-client/src/java/org/apache/hadoop/hive/llap/tez/LlapProtocolClientProxy.java
+++ 
b/llap-client/src/java/org/apache/hadoop/hive/llap/tez/LlapProtocolClientProxy.java
@@ -54,7 +54,7 @@ public class LlapProtocolClientProxy
 HiveConf.getTimeVar(conf, 
ConfVars.LLAP_TASK_COMMUNICATOR_CONNECTION_TIMEOUT_MS,
 TimeUnit.MILLISECONDS),
 HiveConf.getTimeVar(conf, 
ConfVars.LLAP_TASK_COMMUNICATOR_CONNECTION_SLEEP_BETWEEN_RETRIES_MS,
-TimeUnit.MILLISECONDS), -1, 1);
+TimeUnit.MILLISECONDS), -1, HiveConf.getIntVar(conf, 
ConfVars.LLAP_MAX_CONCURRENT_REQUESTS_PER_NODE));
   }
 
   public void registerDag(RegisterDagRequestProto request, String host, int 
port,
@@ -108,7 +108,7 @@ public class LlapProtocolClientProxy
 }
   }
 
-  private class SubmitWorkCallable extends 
NodeCallableRequest {
+  private class SubmitWorkCallable extends 
AsyncCallableRequest {
 
 protected SubmitWorkCallable(LlapNodeId nodeId,
   SubmitWorkRequestProto submitWorkRequestProto,
@@ -117,8 +117,8 @@ public class LlapProtocolClientProxy
 }
 
 @Override
-public SubmitWorkResponseProto call() throws Exception {
-  return getProxy(nodeId, null).submitWork(null, request);
+public void callInternal() throws Exception {
+  getProxy(nodeId, null).submitWork(null, request);
 }
   }
 
diff --git 
a/llap-common/src/java/org/apache/hadoop/hive/llap/AsyncPbRpcProxy.java 
b/llap-common/src/java/org/apache/hadoop/hive/llap/AsyncPbRpcProxy.java
index 25e10f4..5041c66 100644
--- a/llap-common/src/java/org/apache/hadoop/hive/llap/AsyncPbRpcProxy.java
+++ b/llap-common/src/java/org/apache/hadoop/hive/llap/AsyncPbRpcProxy.java
@@ -39,16 +39,22 @@ import java.util.concurrent.locks.ReentrantLock;
 
 import javax.net.SocketFactory;
 
+import com.google.protobuf.ServiceException;
 import org.apache.hadoop.conf.Configuration;
 // TODO: LlapNodeId is just a host+port pair; we could make this class more 
generic.
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.io.retry.RetryPolicies;
 import org.apache.hadoop.io.retry.RetryPolicy;
+import org.apache.hadoop.ipc.AsyncCallLimitExceededException;
+import org.apache.hadoop.ipc.Client;
+import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.util.concurrent.AsyncGet;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -100,11 +106,15 @@ public ab

[hive] 02/02: HIVE-23870: Optimise multiple text conversions in WritableHiveCharObjectInspector.getPrimitiveJavaObject / HiveCharWritable (Rajesh Balamohan via Ashutosh Chauhan, David Mollitor)

2020-07-22 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit 51346a0935acfca410c4858c7d4367e27a075392
Author: Rajesh Balamohan 
AuthorDate: Mon Jul 20 15:19:41 2020 +0530

HIVE-23870: Optimise multiple text conversions in 
WritableHiveCharObjectInspector.getPrimitiveJavaObject / HiveCharWritable 
(Rajesh Balamohan via Ashutosh Chauhan, David Mollitor)

Signed-off-by: Ashutosh Chauhan 
---
 .../org/apache/hadoop/hive/serde2/io/HiveBaseCharWritable.java |  8 +++-
 .../org/apache/hadoop/hive/serde2/io/HiveCharWritable.java | 10 +-
 .../org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java  |  2 ++
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git 
a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveBaseCharWritable.java 
b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveBaseCharWritable.java
index 5b7b3b4..c4bd6ff 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveBaseCharWritable.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveBaseCharWritable.java
@@ -27,12 +27,17 @@ import org.apache.hive.common.util.HiveStringUtils;
 
 public abstract class HiveBaseCharWritable {
   protected Text value = new Text();
+  protected int charLength = -1;
 
   public HiveBaseCharWritable() {
   }
 
   public int getCharacterLength() {
-return HiveStringUtils.getTextUtfLength(value);
+if (charLength != -1) {
+  return charLength;
+}
+charLength = HiveStringUtils.getTextUtfLength(value);
+return charLength;
   }
 
   /**
@@ -45,6 +50,7 @@ public abstract class HiveBaseCharWritable {
 
   public void readFields(DataInput in) throws IOException {
 value.readFields(in);
+charLength = -1;
   }
 
   public void write(DataOutput out) throws IOException {
diff --git 
a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java 
b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java
index 5cc10a8..ea3b8e5 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java
@@ -53,6 +53,7 @@ public class HiveCharWritable extends HiveBaseCharWritable
 
   public void set(HiveCharWritable val) {
 value.set(val.value);
+charLength = -1;
   }
 
   public void set(HiveCharWritable val, int maxLength) {
@@ -78,6 +79,9 @@ public class HiveCharWritable extends HiveBaseCharWritable
   }
 
   public Text getStrippedValue() {
+if (value.charAt(value.getLength() - 1) != ' ') {
+  return value;
+}
 // A lot of these methods could be done more efficiently by operating on 
the Text value
 // directly, rather than converting to HiveChar.
 return new Text(getHiveChar().getStrippedValue());
@@ -88,7 +92,11 @@ public class HiveCharWritable extends HiveBaseCharWritable
   }
 
   public int getCharacterLength() {
-return HiveStringUtils.getTextUtfLength(getStrippedValue());
+if (charLength != -1) {
+  return charLength;
+}
+charLength = HiveStringUtils.getTextUtfLength(getStrippedValue());
+return charLength;
   }
 
   public int compareTo(HiveCharWritable rhs) {
diff --git 
a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java 
b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java
index 796c533..c3812d6 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java
@@ -45,6 +45,7 @@ public class HiveVarcharWritable extends HiveBaseCharWritable
 
   public void set(HiveVarcharWritable val) {
 value.set(val.value);
+charLength = val.charLength;
   }
 
   public void set(HiveVarcharWritable val, int maxLength) {
@@ -57,6 +58,7 @@ public class HiveVarcharWritable extends HiveBaseCharWritable
 
   public void set(String val, int maxLength) {
 value.set(HiveBaseChar.enforceMaxLength(val, maxLength));
+charLength = maxLength;
   }
 
   public HiveVarchar getHiveVarchar() {



[hive] 01/02: HIVE-23843: Improve key evictions in VectorGroupByOperator (Rajesh Balamohan via Ashutosh Chauhan, Zoltan Haindrich)

2020-07-22 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit 380be9afd1a364fe0ff83e61e17ba4ced12f29a0
Author: Rajesh Balamohan 
AuthorDate: Tue Jul 14 10:00:14 2020 +0530

HIVE-23843: Improve key evictions in VectorGroupByOperator (Rajesh 
Balamohan via Ashutosh Chauhan, Zoltan Haindrich)

Signed-off-by: Ashutosh Chauhan 
---
 .../ql/exec/vector/VectorAggregationBufferRow.java |  12 +-
 .../hive/ql/exec/vector/VectorGroupByOperator.java |  53 +++--
 .../ql/exec/vector/TestVectorGroupByOperator.java  | 125 ++---
 3 files changed, 168 insertions(+), 22 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java
index 494db35..a7ef154 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java
@@ -28,7 +28,8 @@ public class VectorAggregationBufferRow {
   private VectorAggregateExpression.AggregationBuffer[] aggregationBuffers;
   private int version;
   private int index;
-  
+  private int accessed = 0;
+
   public VectorAggregationBufferRow(
   VectorAggregateExpression.AggregationBuffer[] aggregationBuffers) {
 this.aggregationBuffers = aggregationBuffers;
@@ -80,5 +81,12 @@ public class VectorAggregationBufferRow {
   aggregationBuffers[i].reset();
 }
   }
-  
+
+  public int getAccessCount() {
+return accessed;
+  }
+
+  public void incrementAccessCount() {
+accessed++;
+  }
 }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
index 9f81e8e..85535f5 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
@@ -151,6 +151,10 @@ public class VectorGroupByOperator extends 
Operator
   private float memoryThreshold;
 
   private boolean isLlap = false;
+
+  // tracks overall access count in map agg buffer any given time.
+  private long totalAccessCount;
+
   /**
* Interface for processing mode: global, hash, unsorted streaming, or group 
batch
*/
@@ -251,7 +255,7 @@ public class VectorGroupByOperator extends 
Operator
* This mode is very simple, there are no keys to consider, and only flushes 
one row at closing
* The one row must flush even if no input was seen (NULLs)
*/
-  private class ProcessingModeGlobalAggregate extends ProcessingModeBase {
+  final class ProcessingModeGlobalAggregate extends ProcessingModeBase {
 
 /**
  * In global processing mode there is only one set of aggregation buffers
@@ -288,12 +292,13 @@ public class VectorGroupByOperator extends 
Operator
   /**
* Hash Aggregate mode processing
*/
-  private class ProcessingModeHashAggregate extends ProcessingModeBase {
+   final class ProcessingModeHashAggregate extends ProcessingModeBase {
 
 /**
  * The global key-aggregation hash map.
  */
-private Map 
mapKeysAggregationBuffers;
+@VisibleForTesting
+Map mapKeysAggregationBuffers;
 
 /**
  * Total per hashtable entry fixed memory (does not depend on key/agg 
values).
@@ -334,7 +339,8 @@ public class VectorGroupByOperator extends 
Operator
 /**
  * A soft reference used to detect memory pressure
  */
-private SoftReference gcCanary = new SoftReference(new 
Object());
+@VisibleForTesting
+SoftReference gcCanary = new SoftReference(new Object());
 
 /**
  * Counts the number of time the gcCanary died and was resurrected
@@ -387,10 +393,19 @@ public class VectorGroupByOperator extends 
Operator
   sumBatchSize = 0;
 
   mapKeysAggregationBuffers = new HashMap();
+  if (groupingSets != null && groupingSets.length > 0) {
+this.maxHtEntries = this.maxHtEntries / groupingSets.length;
+LOG.info("New maxHtEntries: {}, groupingSets len: {}", maxHtEntries, 
groupingSets.length);
+  }
   computeMemoryLimits();
   LOG.debug("using hash aggregation processing mode");
 }
 
+@VisibleForTesting
+int getMaxHtEntries() {
+  return maxHtEntries;
+}
+
 @Override
 public void doProcessBatch(VectorizedRowBatch batch, boolean 
isFirstGroupingSet,
 boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException {
@@ -502,6 +517,10 @@ public class VectorGroupByOperator extends 
Operator
   mapKeysAggregationBuffers.put(kw.copyKey(), aggregationBuffer);
   numEntriesHashTable++;
   numEntriesSinceCheck++;
+} else {
+  // for access tracking
+  aggregationBuffer.incrementAccessCount

[hive] branch master updated (c279768 -> 51346a0)

2020-07-22 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git.


from c279768  HIVE-23849: Hive skips the creation of ColumnAccessInfo when 
creating a view (Barnabas Maidics, reviewed by Peter Vary, Jesus Camacho 
Rodriguez)
 new 380be9a  HIVE-23843: Improve key evictions in VectorGroupByOperator 
(Rajesh Balamohan via Ashutosh Chauhan, Zoltan Haindrich)
 new 51346a0  HIVE-23870: Optimise multiple text conversions in 
WritableHiveCharObjectInspector.getPrimitiveJavaObject / HiveCharWritable 
(Rajesh Balamohan via Ashutosh Chauhan, David Mollitor)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../ql/exec/vector/VectorAggregationBufferRow.java |  12 +-
 .../hive/ql/exec/vector/VectorGroupByOperator.java |  53 +++--
 .../ql/exec/vector/TestVectorGroupByOperator.java  | 125 ++---
 .../hive/serde2/io/HiveBaseCharWritable.java   |   8 +-
 .../hadoop/hive/serde2/io/HiveCharWritable.java|  10 +-
 .../hadoop/hive/serde2/io/HiveVarcharWritable.java |   2 +
 6 files changed, 186 insertions(+), 24 deletions(-)



[hive] branch master updated: HIVE-23733 : genIncludedColNames functionality for ORCIputFormat ( Panos G via Ashutosh Chauhan) Adding getOriginalColumnNames as part of LLAP Includes interface

2020-07-21 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new d7ee83d  HIVE-23733 : genIncludedColNames functionality for 
ORCIputFormat ( Panos G via Ashutosh Chauhan) Adding getOriginalColumnNames as 
part of LLAP Includes interface
d7ee83d is described below

commit d7ee83d0e23d74f0258ab3678bced016d4043db3
Author: Panos Garefalakis 
AuthorDate: Sat Jun 20 21:59:16 2020 +0100

HIVE-23733 : genIncludedColNames functionality for ORCIputFormat ( Panos G 
via Ashutosh Chauhan)
Adding getOriginalColumnNames as part of LLAP Includes interface

Signed-off-by: Ashutosh Chauhan 
---
 .../hive/llap/io/api/impl/LlapRecordReader.java|  6 ++
 .../hive/llap/io/decode/ColumnVectorProducer.java  |  1 +
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java  | 25 ++
 3 files changed, 32 insertions(+)

diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
index a257a06..55a142e 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
@@ -742,6 +742,12 @@ class LlapRecordReader implements 
RecordReader
 }
 
 @Override
+public String[] getOriginalColumnNames(TypeDescription fileSchema) {
+  return OrcInputFormat.genIncludedColNames(
+  fileSchema, filePhysicalColumnIds, acidStructColumnId);
+}
+
+@Override
 public String getQueryId() {
   return HiveConf.getVar(jobConf, HiveConf.ConfVars.HIVEQUERYID);
 }
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java
index e37379b..2a3d7fd 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java
@@ -49,6 +49,7 @@ public interface ColumnVectorProducer {
 List getPhysicalColumnIds();
 List getReaderLogicalColumnIds();
 TypeDescription[] getBatchReaderTypes(TypeDescription fileSchema);
+String[] getOriginalColumnNames(TypeDescription fileSchema);
 String getQueryId();
 boolean isProbeDecodeEnabled();
 byte getProbeMjSmallTablePos();
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index de962cd..1380185 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -421,6 +421,31 @@ public class OrcInputFormat implements 
InputFormat,
 return result;
   }
 
+  // Mostly dup of genIncludedColumns
+  public static String[] genIncludedColNames(TypeDescription fileSchema,
+ List included, Integer recursiveStruct) {
+String[] originalColNames = new String[included.size()];
+List children = fileSchema.getChildren();
+for (int columnNumber = 0; columnNumber < children.size(); ++columnNumber) 
{
+  int indexInBatchCols = included.indexOf(columnNumber);
+  if (indexInBatchCols >= 0) {
+// child Index and FiledIdx should be the same
+originalColNames[indexInBatchCols] = 
fileSchema.getFieldNames().get(columnNumber);
+  } else if (recursiveStruct != null && recursiveStruct == columnNumber) {
+// This assumes all struct cols immediately follow struct
+List nestedChildren = 
children.get(columnNumber).getChildren();
+for (int columnNumberDelta = 0; columnNumberDelta < 
nestedChildren.size(); ++columnNumberDelta) {
+  int columnNumberNested = columnNumber + 1 + columnNumberDelta;
+  int nestedIxInBatchCols = included.indexOf(columnNumberNested);
+  if (nestedIxInBatchCols >= 0) {
+originalColNames[nestedIxInBatchCols] = 
children.get(columnNumber).getFieldNames().get(columnNumberDelta);
+  }
+}
+  }
+}
+return originalColNames;
+  }
+
 
   private static void addColumnToIncludes(TypeDescription child, boolean[] 
result) {
 for(int col = child.getId(); col <= child.getMaximumId(); ++col) {



[hive] branch master updated: HIVE-16490. Hive should not use getKeyProvider from DFSClient directly ( Uma Maheswara Rao G via David Mollitor).

2020-07-21 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new cf75c77  HIVE-16490. Hive should not use getKeyProvider from DFSClient 
directly ( Uma Maheswara Rao G via David Mollitor).
cf75c77 is described below

commit cf75c776f44450e268d2bf4f4f67bbc0ccd9ef94
Author: Uma Maheswara Rao G 
AuthorDate: Sun Jul 19 00:12:20 2020 -0700

HIVE-16490. Hive should not use getKeyProvider from DFSClient directly ( 
Uma Maheswara Rao G via David Mollitor).

Signed-off-by: Ashutosh Chauhan 
---
 .../src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git 
a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java 
b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index 440efe9..858a799 100644
--- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -1223,11 +1223,9 @@ public class Hadoop23Shims extends HadoopShimsSecure {
 private final Configuration conf;
 
 public HdfsEncryptionShim(URI uri, Configuration conf) throws IOException {
-  DistributedFileSystem dfs = (DistributedFileSystem)FileSystem.get(uri, 
conf);
-
   this.conf = conf;
-  this.keyProvider = dfs.getClient().getKeyProvider();
   this.hdfsAdmin = new HdfsAdmin(uri, conf);
+  this.keyProvider = this.hdfsAdmin.getKeyProvider();
 }
 
 @Override



[hive] branch master updated: HIVE-23734: Untangle LlapRecordReader Includes construction (Panos G via Ashutosh Chauhan)

2020-07-21 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 47da936  HIVE-23734: Untangle LlapRecordReader Includes construction 
(Panos G via Ashutosh Chauhan)
47da936 is described below

commit 47da936a06e50ba11e7cee9cbdc32715077709cb
Author: Panos Garefalakis 
AuthorDate: Sat Jun 20 22:12:07 2020 +0100

HIVE-23734: Untangle LlapRecordReader Includes construction (Panos G via 
Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hive/llap/io/api/impl/LlapRecordReader.java| 50 +++---
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
index c148dd4..a257a06 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
@@ -648,8 +648,10 @@ class LlapRecordReader implements 
RecordReader
   // Note: columnIds below makes additional changes for ACID. Don't 
use this var directly.
   this.readerSchema = readerSchema;
   this.jobConf = jobConf;
+  this.includeAcidColumns = includeAcidColumns;
+
+  // Assume including everything means the VRB will have everything.
   if (tableIncludedCols == null) {
-// Assume including everything means the VRB will have everything.
 // TODO: this is rather brittle, esp. in view of schema evolution (in 
abstract, not as 
 //   currently implemented in Hive). The compile should supply the 
columns it expects
 //   to see, which is not "all, of any schema". Is VRB row CVs the 
right mechanism
@@ -659,43 +661,39 @@ class LlapRecordReader implements 
RecordReader
   tableIncludedCols.add(i);
 }
   }
-  LOG.debug("Logical table includes: {}", tableIncludedCols);
+
   this.readerLogicalColumnIds = tableIncludedCols;
+  LOG.debug("Logical table includes: {}", readerLogicalColumnIds);
+
   // Note: schema evolution currently does not support column index 
changes.
   //   So, the indices should line up... to be fixed in SE v2?
-  List filePhysicalColumnIds = readerLogicalColumnIds;
   if (isAcidScan) {
 int rootCol = OrcInputFormat.getRootColumn(false);
-filePhysicalColumnIds = new ArrayList<>(filePhysicalColumnIds.size() + 
rootCol);
+this.filePhysicalColumnIds = new 
ArrayList<>(readerLogicalColumnIds.size() + rootCol);
 this.acidStructColumnId = rootCol - 1; // OrcRecordUpdater.ROW. This 
is somewhat fragile...
-// Note: this guarantees that physical column IDs are in order.
-for (int i = 0; i < rootCol; ++i) {
-  // We don't want to include the root struct in ACID case; it would 
cause the whole
-  // struct to get read without projection.
-  if (acidStructColumnId == i) continue;
-  if(!includeAcidColumns) {
-/*
-  if not including acid columns, we still want to number the
-  physical columns as if acid columns are included becase
-  {@link #generateFileIncludes(TypeDescription)} takes the file
-  schema as input
-  (eg >)
- */
-continue;
+if (includeAcidColumns) {
+  // Up to acidStructColumnId: as we don't want to include the root 
struct in ACID case;
+  // it would cause the whole struct to get read without projection.
+  for (int i = 0; i < acidStructColumnId; ++i) {
+// Note: this guarantees that physical column IDs are in order.
+filePhysicalColumnIds.add(i);
   }
-  filePhysicalColumnIds.add(i);
 }
+/**
+ * Even when NOT including acid columns, we still want to number the
+ * physical columns as if acid columns are included because
+ * {@link #generateFileIncludes(TypeDescription)} takes the file
+ * schema as input
+ * (eg >)
+ */
 for (int tableColumnId : readerLogicalColumnIds) {
-  //but make sure to generate correct ids in type tree in-order
-  //walk order
+  // Make sure to generate correct ids in type tree in-order traversal
+  /* ok, so if filePhysicalColumnIds include acid column ids, we end 
up decoding the vectors*/
   filePhysicalColumnIds.add(rootCol + tableColumnId);
 }
-/*ok, so if filePhysicalColumnIds include acid column ids, we end up
- decoding the vectors*/
+  } else {
+this.filePhysicalColumnIds = readerLogicalColumnIds;
   }
- 
-  

[hive] branch master updated: HIVE-23855: TestQueryShutdownHooks is flaky (Mustafa Iman via Panos G, Ashutosh Chauhan)

2020-07-21 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 2864d8d  HIVE-23855: TestQueryShutdownHooks is flaky (Mustafa Iman via 
Panos G, Ashutosh Chauhan)
2864d8d is described below

commit 2864d8dbeafebfa3059e07fb34206906466f99f9
Author: Mustafa Iman 
AuthorDate: Fri Jul 17 15:07:36 2020 -0700

HIVE-23855: TestQueryShutdownHooks is flaky (Mustafa Iman via Panos G, 
Ashutosh Chauhan)

Increased timeout for async query. Test were not isolated very well. Test 
async query did not clean up properly. State leaked to test sync causing it to 
fail. Cleanup is moved to @After so cleanup is always run.

Signed-off-by: Ashutosh Chauhan 
---
 .../hadoop/util/ShutdownHookManagerInspector.java  | 15 -
 .../cli/operation/TestQueryShutdownHooks.java  | 64 +-
 2 files changed, 39 insertions(+), 40 deletions(-)

diff --git 
a/service/src/test/org/apache/hadoop/util/ShutdownHookManagerInspector.java 
b/service/src/test/org/apache/hadoop/util/ShutdownHookManagerInspector.java
index d360475..2221f20 100644
--- a/service/src/test/org/apache/hadoop/util/ShutdownHookManagerInspector.java
+++ b/service/src/test/org/apache/hadoop/util/ShutdownHookManagerInspector.java
@@ -20,9 +20,20 @@ package org.apache.hadoop.util;
 
 import java.util.List;
 
+import static org.junit.Assert.assertEquals;
+
 public class ShutdownHookManagerInspector {
 
-  public static List getShutdownHooksInOrder() {
-return ShutdownHookManager.get().getShutdownHooksInOrder();
+  public static int getShutdownHookCount() {
+return ShutdownHookManager.get().getShutdownHooksInOrder().size();
+  }
+
+  public static void assertShutdownHookCount(int expected) {
+List entries = 
ShutdownHookManager.get().getShutdownHooksInOrder();
+StringBuilder errorBuilder = new StringBuilder("Shutdown hooks:\n");
+for (ShutdownHookManager.HookEntry entry: entries) {
+  errorBuilder.append(entry.getHook()).append(" 
Priority:").append(entry.getPriority()).append("\n");
+}
+assertEquals(errorBuilder.toString(), expected, entries.size());
   }
 }
diff --git 
a/service/src/test/org/apache/hive/service/cli/operation/TestQueryShutdownHooks.java
 
b/service/src/test/org/apache/hive/service/cli/operation/TestQueryShutdownHooks.java
index 0233e8b..0170c71 100644
--- 
a/service/src/test/org/apache/hive/service/cli/operation/TestQueryShutdownHooks.java
+++ 
b/service/src/test/org/apache/hive/service/cli/operation/TestQueryShutdownHooks.java
@@ -28,6 +28,7 @@ import org.apache.hive.service.cli.OperationStatus;
 import org.apache.hive.service.cli.SessionHandle;
 import org.apache.hive.service.cli.thrift.EmbeddedThriftBinaryCLIService;
 import org.apache.hive.service.cli.thrift.ThriftCLIServiceClient;
+import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -40,11 +41,13 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.fail;
 
-@org.junit.Ignore("HIVE-23855 TestQueryShutdownHooks is flaky")
 public class TestQueryShutdownHooks {
 
+  private static final long ASYNC_QUERY_TIMEOUT_MS = 60;
   private EmbeddedThriftBinaryCLIService service;
   private ThriftCLIServiceClient client;
+  private SessionHandle sessionHandle;
+  private final Map confOverlay = new HashMap<>();
 
   @Before
   public void setUp() throws Exception {
@@ -57,21 +60,23 @@ public class TestQueryShutdownHooks {
 hiveConf.setVar(ConfVars.HIVE_LOCK_MANAGER, 
"org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager");
 service.init(hiveConf);
 client = new ThriftCLIServiceClient(service);
-SessionHandle tempSession = client.openSession("anonymous", "anonymous", 
new HashMap<>());
+sessionHandle = client.openSession("anonymous", "anonymous", new 
HashMap<>());
 // any job causes creation of HadoopJobExecHelper's shutdown hook. It is 
once per JVM
 // We want it to be created before we count the hooks so it does not cause 
off by one error in our count
-client.executeStatement(tempSession, "select reflect(\"java.lang.System\", 
\"currentTimeMillis\")", new HashMap<>());
-client.closeSession(tempSession);
+client.executeStatement(sessionHandle, "select 
reflect(\"java.lang.System\", \"currentTimeMillis\")", new HashMap<>());
+  }
+
+  @After
+  public void cleanup() throws HiveSQLException {
+if (sessionHandle != null) {
+  client.closeSession(sessionHandle);
+}
+service.stop();
   }
 
   @Test
   public void testSync() throws Exception {
-Map opConf = new HashMap();
-
-SessionHandle sessHandle =

[hive] branch master updated: HIVE-23871: ObjectStore should properly handle MicroManaged Table properties (Panos G via Mustafa Iman, Ashutosh Chauhan)

2020-07-20 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new bdc7d27  HIVE-23871: ObjectStore should properly handle MicroManaged 
Table properties (Panos G via Mustafa Iman, Ashutosh Chauhan)
bdc7d27 is described below

commit bdc7d27ff3d0c99bf4372834559a5d34b86b6c57
Author: Panos Garefalakis 
AuthorDate: Fri Jul 17 16:11:54 2020 +0100

HIVE-23871: ObjectStore should properly handle MicroManaged Table 
properties (Panos G via Mustafa Iman, Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 data/files/table1_delim.txt|   5 +
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java |   2 +-
 .../clientpositive/load_micromanaged_delim.q   |  29 
 .../results/clientpositive/llap/acid_stats5.q.out  |  12 ++
 .../clientpositive/llap/check_constraint.q.out |   6 +
 .../llap/create_transactional_insert_only.q.out|   2 +
 .../llap/load_micromanaged_delim.q.out | 192 +
 .../test/results/clientpositive/llap/mm_all.q.out  |   6 +
 .../test/results/clientpositive/llap/mm_bhif.q.out |  44 ++---
 .../results/clientpositive/llap/mm_default.q.out   |   8 +
 .../test/results/clientpositive/llap/mm_exim.q.out |   4 +
 .../hadoop/hive/metastore/MetaStoreDirectSql.java  |  15 +-
 .../apache/hadoop/hive/metastore/ObjectStore.java  |  43 ++---
 .../apache/hadoop/hive/metastore/txn/TxnUtils.java |   6 +
 14 files changed, 315 insertions(+), 59 deletions(-)

diff --git a/data/files/table1_delim.txt b/data/files/table1_delim.txt
new file mode 100644
index 000..60a592d
--- /dev/null
+++ b/data/files/table1_delim.txt
@@ -0,0 +1,5 @@
+1  Acura   4
+2  Toyota  3
+3  Tesla   5
+4  Honda   5
+11 Mazda   2
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 1d013ae..3c1741f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -7463,7 +7463,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
 acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, 
isMmTable);
 checkAcidConstraints();
   } else {
-// Acid tables can't be list bucketed or have skewed cols
+// Transactional tables can't be list bucketed or have skewed cols
 lbCtx = 
constructListBucketingCtx(destinationPartition.getSkewedColNames(),
 destinationPartition.getSkewedColValues(), 
destinationPartition.getSkewedColValueLocationMaps(),
 destinationPartition.isStoredAsSubDirectories());
diff --git a/ql/src/test/queries/clientpositive/load_micromanaged_delim.q 
b/ql/src/test/queries/clientpositive/load_micromanaged_delim.q
new file mode 100644
index 000..00ba262
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/load_micromanaged_delim.q
@@ -0,0 +1,29 @@
+set hive.support.concurrency=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+
+dfs -mkdir ${system:test.tmp.dir}/delim_table;
+dfs -mkdir ${system:test.tmp.dir}/delim_table_ext;
+dfs -mkdir ${system:test.tmp.dir}/delim_table_trans;
+dfs -cp ${system:hive.root}/data/files/table1_delim.txt 
${system:test.tmp.dir}/delim_table/;
+dfs -cp ${system:hive.root}/data/files/table1_delim.txt 
${system:test.tmp.dir}/delim_table_ext/;
+dfs -cp ${system:hive.root}/data/files/table1_delim.txt 
${system:test.tmp.dir}/delim_table_trans/;
+
+-- Checking that MicroManged and External tables have the same behaviour with 
delimited input files
+-- External table
+CREATE EXTERNAL TABLE delim_table_ext(id INT, name STRING, safety INT) ROW 
FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE LOCATION 
'${system:test.tmp.dir}/delim_table_ext/';
+describe formatted delim_table_ext;
+SELECT * FROM delim_table_ext;
+
+-- MicroManaged insert_only table
+CREATE TABLE delim_table_micro(id INT, name STRING, safety INT) ROW FORMAT 
DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE 
TBLPROPERTIES('transactional'='true', "transactional_properties"="insert_only");
+LOAD DATA INPATH '${system:test.tmp.dir}/delim_table/table1_delim.txt' 
OVERWRITE INTO TABLE delim_table_micro;
+describe formatted delim_table_micro;
+SELECT * FROM delim_table_micro;
+
+-- Same as above with different syntax
+CREATE TRANSACTIONAL TABLE delim_table_trans(id INT, name STRING, safety INT) 
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE;
+LOAD DATA INPATH '${system:test.tmp.dir}/delim_table_trans/table1_delim.txt' 
OVERWRITE INTO TABLE delim_table_trans;
+describe formatted delim_table_trans;
+SELECT * FROM delim_table_trans;
diff --git a/ql/src/test/results

[hive] branch master updated: HIVE-23363: Upgrade DataNucleus dependency to 5.2 (David Mollitor via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan

2020-07-08 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new aa407da  HIVE-23363: Upgrade DataNucleus dependency to 5.2 (David 
Mollitor via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan 

aa407da is described below

commit aa407da7b00aea585629f27c2b88d1faf17eae32
Author: David Mollitor 
AuthorDate: Mon Jun 15 14:56:49 2020 -0400

HIVE-23363: Upgrade DataNucleus dependency to 5.2 (David Mollitor via 
Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan 
---
 pom.xml|  8 +++---
 .../hive/metastore/PersistenceManagerProvider.java | 30 +++---
 .../src/main/resources/package.jdo | 10 
 standalone-metastore/pom.xml   |  8 +++---
 4 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/pom.xml b/pom.xml
index 2a0c328..0513fe7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -116,10 +116,10 @@
 1.12.0
 1.8.2
 1.21.0
-4.2.4
-4.1.17
-4.1.19
-3.2.0-m3
+5.2.2
+5.2.2
+3.2.0-m13
+5.2.2
 1.2
 1.7
 3.2.2
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PersistenceManagerProvider.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PersistenceManagerProvider.java
index f97808a..870532a 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PersistenceManagerProvider.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PersistenceManagerProvider.java
@@ -42,8 +42,8 @@ import org.datanucleus.NucleusContext;
 import org.datanucleus.PropertyNames;
 import org.datanucleus.api.jdo.JDOPersistenceManager;
 import org.datanucleus.api.jdo.JDOPersistenceManagerFactory;
-import org.datanucleus.store.scostore.Store;
-import org.datanucleus.util.WeakValueMap;
+import org.datanucleus.util.ConcurrentReferenceHashMap;
+import org.datanucleus.store.types.scostore.Store;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -367,26 +367,26 @@ public class PersistenceManagerProvider {
   }
 
   private static void clearClr(ClassLoaderResolver clr) throws Exception {
-if (clr != null) {
-  if (clr instanceof ClassLoaderResolverImpl) {
-ClassLoaderResolverImpl clri = (ClassLoaderResolverImpl) clr;
-long resourcesCleared = clearFieldMap(clri, "resources");
-long loadedClassesCleared = clearFieldMap(clri, "loadedClasses");
-long unloadedClassesCleared = clearFieldMap(clri, "unloadedClasses");
-LOG.debug("Cleared ClassLoaderResolverImpl: {}, {}, {}", 
resourcesCleared,
-loadedClassesCleared, unloadedClassesCleared);
-  }
+if (clr instanceof ClassLoaderResolverImpl) {
+  ClassLoaderResolverImpl clri = (ClassLoaderResolverImpl) clr;
+  int resourcesCleared = clearFieldMap(clri, "resources");
+  int loadedClassesCleared = clearFieldMap(clri, "loadedClasses");
+  int unloadedClassesCleared = clearFieldMap(clri, "unloadedClasses");
+
+  LOG.debug(
+  "Cleared ClassLoaderResolverImpl: resources: {}, loaded classes: {}, 
unloaded classes: {}",
+  resourcesCleared, loadedClassesCleared, unloadedClassesCleared);
 }
   }
 
-  private static long clearFieldMap(ClassLoaderResolverImpl clri, String 
mapFieldName)
+  private static int clearFieldMap(ClassLoaderResolverImpl clri, String 
mapFieldName)
   throws Exception {
 Field mapField = 
ClassLoaderResolverImpl.class.getDeclaredField(mapFieldName);
 mapField.setAccessible(true);
 
-Map map = (Map) mapField.get(clri);
-long sz = map.size();
-mapField.set(clri, Collections.synchronizedMap(new WeakValueMap()));
+Map map = (Map) mapField.get(clri);
+final int sz = map.size();
+mapField.set(clri, new ConcurrentReferenceHashMap<>());
 return sz;
   }
 
diff --git 
a/standalone-metastore/metastore-server/src/main/resources/package.jdo 
b/standalone-metastore/metastore-server/src/main/resources/package.jdo
index 0252dd0..d1f4b33 100644
--- a/standalone-metastore/metastore-server/src/main/resources/package.jdo
+++ b/standalone-metastore/metastore-server/src/main/resources/package.jdo
@@ -345,11 +345,11 @@
   
 
   
-  
+  
 
-
 
-  
+  
+
 
   
   
@@ -357,8 +357,8 @@
 
   
 
-  
-  
+  
+
 
   
 
diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml
index b0267aa..a7eb54b 100644
--- a/standalone-metastore/pom.xml

[hive] branch master updated: HIVE-23277 : HiveProtoLogger should carry out JSON conversion in its own thread ( Attila Magyar via Rajesh Balamohan)

2020-07-08 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 58e532e  HIVE-23277 : HiveProtoLogger should carry out JSON conversion 
in its own thread ( Attila Magyar via Rajesh Balamohan)
58e532e is described below

commit 58e532e17fd5f8c0d6188756c1a9869b467dfaff
Author: Attila Magyar 
AuthorDate: Wed Jul 8 15:01:49 2020 -0700

HIVE-23277 : HiveProtoLogger should carry out JSON conversion in its own 
thread ( Attila Magyar via Rajesh Balamohan)

Signed-off-by: Ashutosh Chauhan 
---
 .../apache/hadoop/hive/ql/exec/ExplainTask.java| 20 -
 .../ql/hooks/HiveHookEventProtoPartialBuilder.java | 86 ++
 .../hadoop/hive/ql/hooks/HiveProtoLoggingHook.java | 67 +++--
 .../ql/optimizer/physical/StageIDsRearranger.java  |  8 +-
 .../TestHiveHookEventProtoPartialBuilder.java  | 82 +
 .../hive/ql/hooks/TestHiveProtoLoggingHook.java|  1 +
 6 files changed, 216 insertions(+), 48 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
index 750abcb..f2ed01a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
@@ -244,9 +244,25 @@ public class ExplainTask extends Task 
implements Serializable {
 work.getCboPlan(), work.getOptimizedSQL());
   }
 
+  public JSONObject getJSONPlan(PrintStream out, ExplainWork work, String 
stageIdRearrange)
+  throws Exception {
+return getJSONPlan(out, work.getRootTasks(), work.getFetchTask(),
+work.isFormatted(), work.getExtended(), work.isAppendTaskType(), 
work.getCboInfo(),
+work.getCboPlan(), work.getOptimizedSQL(), stageIdRearrange);
+  }
+
+  public JSONObject getJSONPlan(PrintStream out, List> tasks, Task 
fetchTask,
+boolean jsonOutput, boolean isExtended, 
boolean appendTaskType, String cboInfo,
+String cboPlan, String optimizedSQL) throws 
Exception {
+return getJSONPlan(
+out, tasks, fetchTask, jsonOutput, isExtended,
+appendTaskType, cboInfo, cboPlan, optimizedSQL,
+conf.getVar(ConfVars.HIVESTAGEIDREARRANGE));
+  }
+
   public JSONObject getJSONPlan(PrintStream out, List> tasks, Task 
fetchTask,
   boolean jsonOutput, boolean isExtended, boolean appendTaskType, String 
cboInfo,
-  String cboPlan, String optimizedSQL) throws Exception {
+  String cboPlan, String optimizedSQL, String stageIdRearrange) throws 
Exception {
 
 // If the user asked for a formatted output, dump the json output
 // in the output stream
@@ -274,7 +290,7 @@ public class ExplainTask extends Task 
implements Serializable {
   }
 }
 
-List ordered = StageIDsRearranger.getExplainOrder(conf, tasks);
+List ordered = StageIDsRearranger.getExplainOrder(tasks, 
stageIdRearrange);
 
 if (fetchTask != null) {
   
fetchTask.setParentTasks((List)StageIDsRearranger.getFetchSources(tasks));
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveHookEventProtoPartialBuilder.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveHookEventProtoPartialBuilder.java
new file mode 100644
index 000..9b9b4e1
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveHookEventProtoPartialBuilder.java
@@ -0,0 +1,86 @@
+/*
+ *
+ *  * Licensed to the Apache Software Foundation (ASF) under one
+ *  * or more contributor license agreements.  See the NOTICE file
+ *  * distributed with this work for additional information
+ *  * regarding copyright ownership.  The ASF licenses this file
+ *  * to you under the Apache License, Version 2.0 (the
+ *  * "License"); you may not use this file except in compliance
+ *  * with the License.  You may obtain a copy of the License at
+ *  *
+ *  * http://www.apache.org/licenses/LICENSE-2.0
+ *  *
+ *  * Unless required by applicable law or agreed to in writing, software
+ *  * distributed under the License is distributed on an "AS IS" BASIS,
+ *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  * See the License for the specific language governing permissions and
+ *  * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.hive.ql.hooks;
+
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+import org.apache.hadoop.hive.ql.exec.ExplainTask;
+import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.hooks.HiveProtoLoggingHook.OtherInfoType;
+import org.apache.hadoop.hive.ql.hooks.proto.HiveHookEvents;
+import org.apache.hadoop.hive.ql.plan.ExplainWork;
+import org.json.JSONObject;
+import org.slf4j.Logger;
+import org.slf4j.Logg

[hive] branch master updated: HIVE-23665 Rewrite last_value to first_value to enable streaming results (Ramesh Kumar via Jesus Camacho Rodriguez)

2020-07-07 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 5c1a3d6  HIVE-23665 Rewrite last_value to first_value to enable 
streaming results (Ramesh Kumar via Jesus Camacho Rodriguez)
5c1a3d6 is described below

commit 5c1a3d6dfe64d0daf2ea47047cfcb9dae62e2fa5
Author: Ramesh Thangarajan 
AuthorDate: Wed Jun 24 10:13:31 2020 -0700

HIVE-23665 Rewrite last_value to first_value to enable streaming results 
(Ramesh Kumar via Jesus Camacho Rodriguez)

Signed-off-by: Ashutosh Chauhan 
---
 .../rules/HiveWindowingLastValueRewrite.java   | 119 +++
 .../hadoop/hive/ql/parse/CalcitePlanner.java   |   4 +-
 .../vector_ptf_last_value_streaming.q  |   9 +
 .../llap/vector_ptf_last_value_streaming.q.out | 192 +
 .../llap/vector_ptf_part_simple.q.out  | 238 ++---
 5 files changed, 478 insertions(+), 84 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveWindowingLastValueRewrite.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveWindowingLastValueRewrite.java
new file mode 100644
index 000..5845553
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveWindowingLastValueRewrite.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexFieldCollation;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexOver;
+import org.apache.calcite.rex.RexShuttle;
+import org.apache.calcite.rex.RexWindow;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.commons.collections.CollectionUtils;
+import 
org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter;
+
+/**
+ * Rule to rewrite a window function containing a last value clause.
+ */
+public class HiveWindowingLastValueRewrite extends RelOptRule {
+
+  public static final HiveWindowingLastValueRewrite INSTANCE = new 
HiveWindowingLastValueRewrite();
+
+  private static final String FIRST_VALUE_FUNC = "first_value";
+  private static final String LAST_VALUE_FUNC = "last_value";
+
+
+  private HiveWindowingLastValueRewrite() {
+super(operand(Project.class, any()));
+  }
+
+  @Override
+  public void onMatch(RelOptRuleCall call) {
+Project project = call.rel(0);
+
+List newExprs = new ArrayList<>();
+LastValueRewriteRexShuttle lastValueRewrite = new 
LastValueRewriteRexShuttle(
+project.getCluster().getRexBuilder());
+boolean modified = false;
+for (RexNode expr : project.getChildExps()) {
+  RexNode newExpr = lastValueRewrite.apply(expr);
+  newExprs.add(newExpr);
+  modified |= (newExpr != expr);
+}
+if (modified) {
+  RelNode newProject = project.copy(
+  project.getTraitSet(), project.getInput(), newExprs, 
project.getRowType());
+  call.transformTo(newProject);
+}
+  }
+
+  private static class LastValueRewriteRexShuttle extends RexShuttle {
+
+private final RexBuilder rexBuilder;
+
+private LastValueRewriteRexShuttle(RexBuilder rexBuilder) {
+  this.rexBuilder = rexBuilder;
+}
+
+public RexNode visitOver(RexOver over) {
+  if (over.op.getName().equals(LAST_VALUE_FUNC) && 
over.getWindow().getLowerBound().isUnbounded()
+&& over.getWindow().getUpperBound().isUnbounded()) {
+ImmutableList orderKeys = 
over.getWindow().orderKeys;
+if

[hive] branch master updated: HIVE-23735 : Reducer misestimate for export command (Rajesh Balamohan via Ashutosh Chauhan)

2020-07-06 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 54a2c0c  HIVE-23735 : Reducer misestimate for export command (Rajesh 
Balamohan via Ashutosh Chauhan)
54a2c0c is described below

commit 54a2c0c6ec7b47ae63e25c83a405c7e349f739ca
Author: Rajesh Balamohan 
AuthorDate: Mon Jul 6 08:39:56 2020 -0700

HIVE-23735 : Reducer misestimate for export command (Rajesh Balamohan via 
Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index de746a8..72794e4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -6866,7 +6866,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
 int numFiles = 1;
 int totalFiles = 1;
 
-if (dest_tab.getNumBuckets() > 0) {
+if (dest_tab.getNumBuckets() > 0 && !dest_tab.getBucketCols().isEmpty()) {
   enforceBucketing = true;
   if (updating(dest) || deleting(dest)) {
 partnCols = getPartitionColsFromBucketColsForUpdateDelete(input, true);



[hive] branch master updated: HIVE-17879 : Upgrade Datanucleus Maven Plugin

2020-06-14 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 321b46f  HIVE-17879 : Upgrade Datanucleus Maven Plugin
321b46f is described below

commit 321b46fe1baf179719ec644c573a39abaf4646d4
Author: Ashutosh Chauhan 
AuthorDate: Sun Jun 14 18:45:04 2020 -0700

HIVE-17879 : Upgrade Datanucleus Maven Plugin
---
 pom.xml   | 13 -
 standalone-metastore/metastore-server/pom.xml |  2 +-
 2 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/pom.xml b/pom.xml
index eaadad0..2a31dbd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -99,7 +99,6 @@
 
 
 1.0b3
-
3.3.0-release
 -Xmx2048m
 2.17
 1.12
@@ -1170,18 +1169,6 @@
   ${maven.surefire.plugin.version}
 
 
-  org.datanucleus
-  datanucleus-maven-plugin
-  ${datanucleus.maven.plugin.version}
-  
-
-  org.datanucleus
-  datanucleus-core
-  ${datanucleus-core.version}
-
-  
-
-
   org.apache.felix
   maven-bundle-plugin
   ${felix.version}
diff --git a/standalone-metastore/metastore-server/pom.xml 
b/standalone-metastore/metastore-server/pom.xml
index df6be61..d1a3631 100644
--- a/standalone-metastore/metastore-server/pom.xml
+++ b/standalone-metastore/metastore-server/pom.xml
@@ -722,7 +722,7 @@
   
 org.datanucleus
 datanucleus-maven-plugin
-4.0.5
+5.2.1
 
   JDO
   false



[hive] branch master updated: HIVE-22687 : Query hangs indefinitely if LLAP daemon registers after the query is submitted (Himanshu Mishra, Attila Magyar via Ashutosh Chauhan, Prasanth J)

2020-06-13 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new e85c9e6  HIVE-22687 : Query hangs indefinitely if LLAP daemon 
registers after the query is submitted (Himanshu Mishra, Attila Magyar via 
Ashutosh Chauhan, Prasanth J)
e85c9e6 is described below

commit e85c9e6a2635e773c691e196bece759dfbe53878
Author: Ashutosh Chauhan 
AuthorDate: Sat Jun 13 14:21:06 2020 -0700

HIVE-22687 : Query hangs indefinitely if LLAP daemon registers after the 
query is submitted (Himanshu Mishra, Attila Magyar via Ashutosh Chauhan, 
Prasanth J)

Signed-off-by: Ashutosh Chauhan 
---
 .../hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java | 4 +++-
 .../src/java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git 
a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java
 
b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java
index 92c4771..cf4e7b8 100644
--- 
a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java
+++ 
b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java
@@ -153,9 +153,10 @@ public class LlapZookeeperRegistryImpl
 HiveConf.getVarWithoutType(conf, 
ConfVars.LLAP_DAEMON_TASK_SCHEDULER_WAIT_QUEUE_SIZE));
 populateConfigValues(capacityValues.entrySet());
 
-String uniqueId = registerServiceRecord(daemonZkRecord);
+String uniqueId = UNIQUE_ID.toString();
 long znodeCreationTimeout = 120;
 
+initializeWithoutRegisteringInternal();
 // Create a znode under the rootNamespace parent for this instance of the 
server
 try {
   slotZnode = new SlotZnode(
@@ -171,6 +172,7 @@ public class LlapZookeeperRegistryImpl
   throw (e instanceof IOException) ? (IOException)e : new IOException(e);
 }
 
+registerServiceRecord(daemonZkRecord, uniqueId);
 LOG.info("Registered node. Created a znode on ZooKeeper for LLAP instance: 
rpc: {}, " +
 "shuffle: {}, webui: {}, mgmt: {}, znodePath: {}", rpcEndpoint, 
getShuffleEndpoint(),
 getServicesEndpoint(), getMngEndpoint(), 
getRegistrationZnodePath());
diff --git 
a/llap-client/src/java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java 
b/llap-client/src/java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java
index 2b21baa..249fa49 100644
--- 
a/llap-client/src/java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java
+++ 
b/llap-client/src/java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java
@@ -339,7 +339,7 @@ public abstract class ZkRegistryBase {
   }
 
 
-  final void initializeWithoutRegisteringInternal() throws IOException {
+  final protected void initializeWithoutRegisteringInternal() throws 
IOException {
 // Create a znode under the rootNamespace parent for this instance of the 
server
 try {
   try {



[hive] branch master updated: HIVE-23468 : LLAP: Optimise OrcEncodedDataReader to avoid FS init to NN (Rajesh Balamohan via Ashutosh Chauhan)

2020-06-13 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 03165e0  HIVE-23468 : LLAP: Optimise OrcEncodedDataReader to avoid FS 
init to NN (Rajesh Balamohan via Ashutosh Chauhan)
03165e0 is described below

commit 03165e00eb52d77062a741b576e48971827dfea3
Author: Rajesh Balamohan 
AuthorDate: Sat Jun 13 14:08:43 2020 -0700

HIVE-23468 : LLAP: Optimise OrcEncodedDataReader to avoid FS init to NN 
(Rajesh Balamohan via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java   | 2 +-
 ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java| 7 +++
 .../hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java   | 2 +-
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
index c90ff52..35d066a 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
@@ -537,7 +537,7 @@ public class OrcEncodedDataReader extends 
CallableWithNdc
 path = split.getPath();
 if (fileKey instanceof Long && HiveConf.getBoolVar(
 daemonConf, ConfVars.LLAP_IO_USE_FILEID_PATH)) {
-  path = HdfsUtils.getFileIdPath(fsSupplier.get(), path, (long)fileKey);
+  path = HdfsUtils.getFileIdPath(path, (long)fileKey);
 }
 LlapIoImpl.ORC_LOGGER.trace("Creating reader for {} ({})", path, 
split.getPath());
 long startTime = counters.startTimeCounter();
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java
index 4de5c8c..f590eb6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java
@@ -102,10 +102,9 @@ public class HdfsUtils {
   //   as public utility method in HDFS to obtain the inode-based path.
   private static String HDFS_ID_PATH_PREFIX = "/.reserved/.inodes/";
 
-  public static Path getFileIdPath(
-  FileSystem fileSystem, Path path, long fileId) {
-return ((fileSystem instanceof DistributedFileSystem))
-? new Path(HDFS_ID_PATH_PREFIX + fileId) : path;
+  public static Path getFileIdPath(Path path, long fileId) {
+// BI/ETL split strategies set fileId correctly when HDFS is used.
+return (fileId > 0) ? new Path(HDFS_ID_PATH_PREFIX + fileId) : path;
   }
 
   public static boolean isDefaultFs(DistributedFileSystem fs) {
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java
index 3fd93ca..a3bbb7b 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java
@@ -221,7 +221,7 @@ public class VectorizedParquetRecordReader extends 
ParquetRecordReaderBase
   FileSystem fs = file.getFileSystem(configuration);
   if (cacheKey instanceof Long && HiveConf.getBoolVar(
   cacheConf, ConfVars.LLAP_IO_USE_FILEID_PATH)) {
-file = HdfsUtils.getFileIdPath(fs, file, (long)cacheKey);
+file = HdfsUtils.getFileIdPath(file, (long)cacheKey);
   }
 }
 



[hive] branch master updated: HIVE-23554 : Adding FilterContext as part of LLAP ColumnVectorBatch (propagated in ReadPipeline). Also moving the code that prepares VectorBatches in EncodedDataConsumer

2020-06-13 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 3065904  HIVE-23554 : Adding FilterContext as part of LLAP 
ColumnVectorBatch (propagated in ReadPipeline). Also moving the code that 
prepares VectorBatches in EncodedDataConsumer in a separate method -- this will 
be handy when dealing with row-filters later on. (Panos G via Ashutosh Chauhan)
3065904 is described below

commit 30659041e470702e4a52f024eabe28d2d848d478
Author: Panos Garefalakis 
AuthorDate: Wed May 27 11:51:48 2020 +0100

HIVE-23554 : Adding FilterContext as part of LLAP ColumnVectorBatch 
(propagated in ReadPipeline). Also moving the code that prepares VectorBatches 
in EncodedDataConsumer in a separate method -- this will be handy when dealing 
with row-filters later on. (Panos G via Ashutosh Chauhan)

Change-Id: I0177756e842e60f6850c966cfa44fe0d53df4a28
Signed-off-by: Ashutosh Chauhan 
---
 .../hive/llap/io/api/impl/ColumnVectorBatch.java   | 15 
 .../llap/io/decode/OrcEncodedDataConsumer.java | 27 +-
 2 files changed, 31 insertions(+), 11 deletions(-)

diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/ColumnVectorBatch.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/ColumnVectorBatch.java
index 19b0b55..52dc072 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/ColumnVectorBatch.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/ColumnVectorBatch.java
@@ -20,12 +20,17 @@ package org.apache.hadoop.hive.llap.io.api.impl;
 
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.io.filter.MutableFilterContext;
+
+import java.util.Arrays;
 
 /**
  * Unlike VRB, doesn't have some fields, and doesn't have all columns
  * (non-selected, partition cols, cols for downstream ops, etc.)
+ * It does, however, hold the FilterContext of the VRB.
  */
 public class ColumnVectorBatch {
+  public MutableFilterContext filterContext;
   public ColumnVector[] cols;
   public int size;
 
@@ -34,6 +39,7 @@ public class ColumnVectorBatch {
   }
 
   public ColumnVectorBatch(int columnCount, int batchSize) {
+this.filterContext = new VectorizedRowBatch(0);
 this.cols = new ColumnVector[columnCount];
 this.size = batchSize;
   }
@@ -51,6 +57,15 @@ public class ColumnVectorBatch {
   return "";
 }
 StringBuilder b = new StringBuilder();
+b.append("FilterContext used: ");
+b.append(filterContext.isSelectedInUse());
+b.append(", size: ");
+b.append(filterContext.getSelectedSize());
+b.append('\n');
+b.append("Selected: ");
+b.append(filterContext.isSelectedInUse() ? 
Arrays.toString(filterContext.getSelected()) : "[]");
+b.append('\n');
+
 b.append("Column vector types: ");
 for (int k = 0; k < cols.length; k++) {
   ColumnVector cv = cols[k];
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
index 79dba42..9459a4f 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
@@ -152,17 +152,10 @@ public class OrcEncodedDataConsumer
 }
 
 ColumnVectorBatch cvb = cvbPool.take();
+cvb.filterContext.reset();
 // assert cvb.cols.length == batch.getColumnIxs().length; // Must be 
constant per split.
 cvb.size = batchSize;
 for (int idx = 0; idx < columnReaders.length; ++idx) {
-  TreeReader reader = columnReaders[idx];
-  if (cvb.cols[idx] == null) {
-// Orc store rows inside a root struct (hive writes it this way).
-// When we populate column vectors we skip over the root struct.
-cvb.cols[idx] = createColumn(batchSchemas[idx], 
VectorizedRowBatch.DEFAULT_SIZE, useDecimal64ColumnVectors);
-  }
-  trace.logTreeReaderNextVector(idx);
-
   /*
* Currently, ORC's TreeReaderFactory class does this:
*
@@ -198,9 +191,8 @@ public class OrcEncodedDataConsumer
* it doesn't get confused.
*
*/
-  ColumnVector cv = cvb.cols[idx];
-  cv.reset();
-  cv.ensureSize(batchSize, false);
+  TreeReader reader = columnReaders[idx];
+  ColumnVector cv = prepareColumnVector(cvb, idx, batchSize);
   reader.nextVector(cv, null, batchSize);
 }
 
@@ -218,6 +210,19 @@ public class OrcEncodedDataConsumer
 }
  

[hive] branch master updated: HIVE-23580 : deleteOnExit set is not cleaned up, causing memory pressure (Attila Magyar via Ashutosh Chauhan)

2020-06-13 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new fecad5b  HIVE-23580 : deleteOnExit set is not cleaned up, causing 
memory pressure (Attila Magyar via Ashutosh Chauhan)
fecad5b is described below

commit fecad5b0f72c535ed1c53f2cc62b0d6649b651ae
Author: Attila Magyar 
AuthorDate: Sat Jun 13 13:47:38 2020 -0700

HIVE-23580 : deleteOnExit set is not cleaned up, causing memory pressure 
(Attila Magyar via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 ql/src/java/org/apache/hadoop/hive/ql/Context.java | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java 
b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
index 318c207..a41c5c8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
@@ -678,6 +678,8 @@ public class Context {
 Path p = entry.getValue();
 if (p.toUri().getPath().contains(stagingDir) && subDirOf(p, 
fsScratchDirs.values())  ) {
   LOG.debug("Skip deleting stagingDir: " + p);
+  FileSystem fs = p.getFileSystem(conf);
+  fs.cancelDeleteOnExit(p);
   continue; // staging dir is deleted when deleting the scratch dir
 }
 if(resultCacheDir == null || 
!p.toUri().getPath().contains(resultCacheDir)) {



[hive] branch master updated: HIVE-23666 : checkHashModeEfficiency is skipped when a groupby operator doesn't have a grouping set (Ramesh Kumar via Jesus Camacho Rodriguez)

2020-06-13 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 45f6efd  HIVE-23666 : checkHashModeEfficiency is skipped when a 
groupby operator doesn't have a grouping set (Ramesh Kumar via Jesus Camacho 
Rodriguez)
45f6efd is described below

commit 45f6efd994d6c1e34425821ac1878bdff0249500
Author: RAMESH KUMAR THANGARAJAN 
AuthorDate: Sat Jun 13 10:36:59 2020 -0700

HIVE-23666 : checkHashModeEfficiency is skipped when a groupby operator 
doesn't have a grouping set (Ramesh Kumar via Jesus Camacho Rodriguez)

Signed-off-by: Ashutosh Chauhan 
---
 .../hive/ql/exec/vector/VectorGroupByOperator.java | 23 +++---
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
index d4d18ef..9f81e8e 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
@@ -180,21 +180,20 @@ public class VectorGroupByOperator extends 
Operator
 
   if (!groupingSetsPresent) {
 doProcessBatch(batch, false, null);
-return;
-  }
-
-  // We drive the doProcessBatch logic with the same batch but different
-  // grouping set id and null variation.
-  // PERFORMANCE NOTE: We do not try to reuse columns and generate the 
KeyWrappers anew...
+  } else {
+// We drive the doProcessBatch logic with the same batch but different
+// grouping set id and null variation.
+// PERFORMANCE NOTE: We do not try to reuse columns and generate the 
KeyWrappers anew...
 
-  final int size = groupingSets.length;
-  for (int i = 0; i < size; i++) {
+final int size = groupingSets.length;
+for (int i = 0; i < size; i++) {
 
-// NOTE: We are overwriting the constant vector value...
-groupingSetsDummyVectorExpression.setLongValue(groupingSets[i]);
-groupingSetsDummyVectorExpression.evaluate(batch);
+  // NOTE: We are overwriting the constant vector value...
+  groupingSetsDummyVectorExpression.setLongValue(groupingSets[i]);
+  groupingSetsDummyVectorExpression.evaluate(batch);
 
-doProcessBatch(batch, (i == 0), allGroupingSetsOverrideIsNulls[i]);
+  doProcessBatch(batch, (i == 0), allGroupingSetsOverrideIsNulls[i]);
+}
   }
 
   if (this instanceof ProcessingModeHashAggregate) {



[hive] branch master updated: HIVE-23561: Fixing arrow serializer for Decimals with selected (Panos G via Mahesh Kumar)

2020-06-09 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 739cc43  HIVE-23561: Fixing arrow serializer for Decimals with 
selected (Panos G via Mahesh Kumar)
739cc43 is described below

commit 739cc434505e4ee79e677cf39ff7fcee79527b2c
Author: Panos Garefalakis 
AuthorDate: Thu May 28 14:22:49 2020 +0100

HIVE-23561: Fixing arrow serializer for Decimals with selected (Panos G via 
Mahesh Kumar)

Change-Id: Ie92fe13f134c71d2510dd82a9cbee39fe90a2273
Signed-off-by: Ashutosh Chauhan 
---
 .../hive/ql/io/arrow/ArrowColumnarBatchSerDe.java  |   4 +-
 .../apache/hadoop/hive/ql/io/arrow/Serializer.java |   8 +-
 .../ql/io/arrow/TestArrowColumnarBatchSerDe.java   | 160 -
 3 files changed, 161 insertions(+), 11 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java
index 0408707..4896bc4 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hive.ql.io.arrow;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Lists;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.complex.impl.UnionListWriter;
@@ -97,7 +98,8 @@ public class ArrowColumnarBatchSerDe extends AbstractSerDe {
   StructObjectInspector rowObjectInspector;
   Configuration conf;
 
-  private Serializer serializer;
+  @VisibleForTesting
+  Serializer serializer;
   private Deserializer deserializer;
 
   @Override
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
index d5a9b2c..5a79641 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hive.ql.io.arrow;
 
+import com.google.common.annotations.VisibleForTesting;
 import io.netty.buffer.ArrowBuf;
 import org.apache.arrow.vector.BigIntVector;
 import org.apache.arrow.vector.BitVector;
@@ -44,8 +45,6 @@ import org.apache.arrow.vector.types.Types;
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.arrow.vector.util.DecimalUtility;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
@@ -105,7 +104,8 @@ public class Serializer {
   private final static byte[] EMPTY_BYTES = new byte[0];
 
   // Hive columns
-  private final VectorizedRowBatch vectorizedRowBatch;
+  @VisibleForTesting
+  final VectorizedRowBatch vectorizedRowBatch;
   private final VectorAssignRow vectorAssignRow;
   private int batchSize;
   private BufferAllocator allocator;
@@ -923,7 +923,7 @@ public class Serializer {
 final int scale = decimalVector.getScale();
 decimalVector.set(i, ((DecimalColumnVector) 
hiveVector).vector[j].getHiveDecimal().bigDecimalValue().setScale(scale));
 
-final HiveDecimalWritable writable = ((DecimalColumnVector) 
hiveVector).vector[i];
+final HiveDecimalWritable writable = ((DecimalColumnVector) 
hiveVector).vector[j];
 decimalHolder.precision = writable.precision();
 decimalHolder.scale = scale;
 try (ArrowBuf arrowBuf = allocator.buffer(DecimalHolder.WIDTH)) {
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java
index be15197..d5aaa9e 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java
@@ -203,7 +203,14 @@ public class TestArrowColumnarBatchSerDe {
   private void initAndSerializeAndDeserialize(String[][] schema, Object[][] 
rows) throws SerDeException {
 ArrowColumnarBatchSerDe serDe = new ArrowColumnarBatchSerDe();
 StructObjectInspector rowOI = initSerDe(serDe, schema);
-serializeAndDeserialize(serDe, rows, rowOI);
+serializeAndDeserialize(serDe, rows, rowOI, null);
+  }
+
+  private void initAndSerializeAndDeserializeSelected(String[][] schema, 
Object[][] rows, int[] selected)
+  throws SerDeException {
+ArrowColumnarBatchSerDe serDe = new ArrowColumnarBatchSerDe();
+StructObjectInspector rowOI = initSerDe(serDe, schema);
+serializeAndDeserialize(serDe, rows, rowOI, selected

[hive] branch master updated: HIVE-23435 : Full outer join result is missing rows (Mustafa Iman via Ashutosh Chauhan)

2020-06-01 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 779c42f  HIVE-23435 : Full outer join result is missing rows (Mustafa 
Iman via Ashutosh Chauhan)
779c42f is described below

commit 779c42fa3c73f41785eb3266f3438ab9e2a75032
Author: Mustafa Iman 
AuthorDate: Mon Jun 1 10:26:14 2020 -0700

HIVE-23435 : Full outer join result is missing rows (Mustafa Iman via 
Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hadoop/hive/ql/exec/CommonJoinOperator.java|  18 +++-
 .../hive/ql/exec/CommonMergeJoinOperator.java  |   2 +-
 .../apache/hadoop/hive/ql/exec/JoinOperator.java   |   1 +
 .../hadoop/hive/ql/exec/MapJoinOperator.java   |   1 +
 .../hadoop/hive/ql/exec/SMBMapJoinOperator.java|   1 +
 .../ql/exec/vector/VectorMapJoinBaseOperator.java  |   2 -
 .../ql/exec/vector/VectorSMBMapJoinOperator.java   |   1 -
 .../clientpositive/vector_full_outer_join2.q   |  28 +
 .../results/clientpositive/llap/join_1to1.q.out|  84 ---
 .../llap/vector_full_outer_join2.q.out | 113 +
 10 files changed, 233 insertions(+), 18 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
index 2d76848..7a7c8a5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
@@ -341,6 +341,8 @@ public abstract class CommonJoinOperator extends
 forwardCache = new Object[totalSz];
 aliasFilterTags = new short[numAliases];
 Arrays.fill(aliasFilterTags, (byte)0xff);
+aliasFilterTagsNext = new short[numAliases];
+Arrays.fill(aliasFilterTagsNext, (byte) 0xff);
 
 filterTags = new short[numAliases];
 skipVectors = new boolean[numAliases][];
@@ -478,6 +480,7 @@ public abstract class CommonJoinOperator extends
*   100, 30 :   N,  N
*/
   protected transient short[] aliasFilterTags;
+  protected transient short[] aliasFilterTagsNext;
 
   // all evaluation should be processed here for valid aliasFilterTags
   //
@@ -491,11 +494,21 @@ public abstract class CommonJoinOperator extends
   short filterTag = JoinUtil.isFiltered(row, joinFilters[alias],
   joinFilterObjectInspectors[alias], filterMaps[alias]);
   nr.add(new ShortWritable(filterTag));
-  aliasFilterTags[alias] &= filterTag;
 }
 return nr;
   }
 
+  protected void addToAliasFilterTags(byte alias, List object, boolean 
isNextGroup) {
+boolean hasFilter = hasFilter(alias);
+if (hasFilter) {
+  if (isNextGroup) {
+aliasFilterTagsNext[alias] &= ((ShortWritable) 
(object.get(object.size() - 1))).get();
+  } else {
+aliasFilterTags[alias] &= ((ShortWritable) (object.get(object.size() - 
1))).get();
+  }
+}
+  }
+
   // fill forwardCache with skipvector
   // returns whether a record was forwarded
   private boolean createForwardJoinObject(boolean[] skip) throws HiveException 
{
@@ -961,7 +974,8 @@ public abstract class CommonJoinOperator extends
 genJoinObject();
   }
 }
-Arrays.fill(aliasFilterTags, (byte)0xff);
+System.arraycopy(aliasFilterTagsNext, 0, aliasFilterTags, 0, 
aliasFilterTagsNext.length);
+Arrays.fill(aliasFilterTagsNext, (byte) 0xff);
   }
 
   protected void reportProgress() {
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java
index 581577e..0d9dc46 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java
@@ -46,7 +46,6 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.io.WritableComparator;
@@ -242,6 +241,7 @@ public class CommonMergeJoinOperator extends 
AbstractMapJoinOperator implements Serial
   alias = (byte) tag;
 
   List nr = getFilteredValue(alias, row);
+  addToAliasFilterTags(alias, nr, false);
 
   if (handleSkewJoin) {
 skewJoinKeyContext.handleSkew(tag);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
index 07b1fba..489d09f 100644
--- a/ql/src/java/org/apache

[hive] branch master updated: HIVE-23488 : Optimise PartitionManagementTask::Msck::repair (Rajesh Balamohan via Ashutosh Chauhan)

2020-05-27 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new a3a25eb  HIVE-23488 : Optimise PartitionManagementTask::Msck::repair 
(Rajesh Balamohan via Ashutosh Chauhan)
a3a25eb is described below

commit a3a25ebb7e65d3348213a2e79f629eaffd9605c6
Author: Rajesh Balamohan 
AuthorDate: Wed May 27 11:14:52 2020 -0700

HIVE-23488 : Optimise PartitionManagementTask::Msck::repair (Rajesh 
Balamohan via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hive/ql/metadata/TestHiveMetaStoreChecker.java | 48 +++---
 .../hive/metastore/HiveMetaStoreChecker.java   | 26 ++--
 .../org/apache/hadoop/hive/metastore/Msck.java |  2 +-
 3 files changed, 39 insertions(+), 37 deletions(-)

diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java 
b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java
index 520eb1b..198fb81 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java
@@ -116,7 +116,7 @@ public class TestHiveMetaStoreChecker {
   @Test
   public void testTableCheck() throws HiveException, IOException, TException, 
MetastoreException {
 CheckResult result = new CheckResult();
-checker.checkMetastore(catName, dbName, null, null, result);
+checker.checkMetastore(catName, dbName, null, null, null, result);
 // we haven't added anything so should return an all ok
 assertEquals(Collections.emptySet(), result.getTablesNotInMs());
 assertEquals(Collections.emptySet(), result.getTablesNotOnFs());
@@ -125,7 +125,7 @@ public class TestHiveMetaStoreChecker {
 
 // check table only, should not exist in ms
 result = new CheckResult();
-checker.checkMetastore(catName, dbName, tableName, null, result);
+checker.checkMetastore(catName, dbName, tableName, null, null, result);
 assertEquals(1, result.getTablesNotInMs().size());
 assertEquals(tableName, result.getTablesNotInMs().iterator().next());
 assertEquals(Collections.emptySet(), result.getTablesNotOnFs());
@@ -148,7 +148,7 @@ public class TestHiveMetaStoreChecker {
 // now we've got a table, check that it works
 // first check all (1) tables
 result = new CheckResult();
-checker.checkMetastore(catName, dbName, null, null, result);
+checker.checkMetastore(catName, dbName, null, null, null, result);
 assertEquals(Collections.emptySet(), result.getTablesNotInMs());
 assertEquals(Collections.emptySet(), result.getTablesNotOnFs());
 assertEquals(Collections.emptySet(), 
result.getPartitionsNotOnFs());
@@ -156,7 +156,7 @@ public class TestHiveMetaStoreChecker {
 
 // then let's check the one we know about
 result = new CheckResult();
-checker.checkMetastore(catName, dbName, tableName, null, result);
+checker.checkMetastore(catName, dbName, tableName, null, null, result);
 assertEquals(Collections.emptySet(), result.getTablesNotInMs());
 assertEquals(Collections.emptySet(), result.getTablesNotOnFs());
 assertEquals(Collections.emptySet(), 
result.getPartitionsNotOnFs());
@@ -168,7 +168,7 @@ public class TestHiveMetaStoreChecker {
 
 // now this shouldn't find the path on the fs
 result = new CheckResult();
-checker.checkMetastore(catName, dbName, tableName, null, result);
+checker.checkMetastore(catName, dbName, tableName, null, null, result);
 assertEquals(Collections.emptySet(), result.getTablesNotInMs());
 assertEquals(1, result.getTablesNotOnFs().size());
 assertEquals(tableName, result.getTablesNotOnFs().iterator().next());
@@ -184,7 +184,7 @@ public class TestHiveMetaStoreChecker {
 
 // find the extra table
 result = new CheckResult();
-checker.checkMetastore(catName, dbName, null, null, result);
+checker.checkMetastore(catName, dbName, null, null, null, result);
 assertEquals(1, result.getTablesNotInMs().size());
 assertEquals(fakeTable.getName(), 
Lists.newArrayList(result.getTablesNotInMs()).get(0));
 assertEquals(Collections.emptySet(), result.getTablesNotOnFs());
@@ -198,7 +198,7 @@ public class TestHiveMetaStoreChecker {
 
 // should return all ok
 result = new CheckResult();
-checker.checkMetastore(catName, dbName, null, null, result);
+checker.checkMetastore(catName, dbName, null, null, null, result);
 assertEquals(Collections.emptySet(), result.getTablesNotInMs());
 assertEquals(Collections.emptySet(), result.getTablesNotOnFs());
 assertEquals(Collections.emptySet(), 
result.getPartitionsNotOnFs());
@@ -222,7 +222,7 @@ public class TestHiveMetaStoreChecker {
 fs.mkdirs(fakePart);
 fs.deleteOnExit(fakePart);
 CheckResult result = new CheckResult

[hive] branch master updated: HIVE-23487 : Optimise PartitionManagementTask (Rajesh Balamohan via Ashutosh Chauhan)

2020-05-26 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 65e947e  HIVE-23487 : Optimise PartitionManagementTask (Rajesh 
Balamohan via Ashutosh Chauhan)
65e947e is described below

commit 65e947eec05304ed5b9441a374c240cd9a53d30a
Author: Rajesh Balamohan 
AuthorDate: Tue May 26 22:33:33 2020 -0700

HIVE-23487 : Optimise PartitionManagementTask (Rajesh Balamohan via 
Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hadoop/hive/ql/ddl/misc/msck/MsckOperation.java  |  2 +-
 .../hive/ql/exec/TestMsckCreatePartitionsInBatches.java  |  2 +-
 .../hive/ql/exec/TestMsckDropPartitionsInBatches.java|  2 +-
 .../main/java/org/apache/hadoop/hive/metastore/Msck.java | 16 ++--
 .../hadoop/hive/metastore/PartitionManagementTask.java   |  7 ---
 5 files changed, 17 insertions(+), 12 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/msck/MsckOperation.java 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/msck/MsckOperation.java
index c05d699..3ffc58f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/msck/MsckOperation.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/msck/MsckOperation.java
@@ -52,7 +52,7 @@ public class MsckOperation extends DDLOperation {
   public int execute() throws HiveException, IOException, TException {
 try {
   Msck msck = new Msck(false, false);
-  msck.init(context.getDb().getConf());
+  msck.init(Msck.getMsckConf(context.getDb().getConf()));
 
   String[] names = Utilities.getDbTableName(desc.getTableName());
 
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java
index 7821f40..869249a 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java
@@ -83,7 +83,7 @@ public class TestMsckCreatePartitionsInBatches {
   throw new HiveException(e);
 }
 msck = new Msck( false, false);
-msck.init(hiveConf);
+msck.init(Msck.getMsckConf(hiveConf));
   }
 
   @Before
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java
index 8be3112..e7318bf 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java
@@ -80,7 +80,7 @@ public class TestMsckDropPartitionsInBatches {
 SessionState.start(hiveConf);
 db = new HiveMetaStoreClient(hiveConf);
 msck = new Msck( false, false);
-msck.init(hiveConf);
+msck.init(Msck.getMsckConf(hiveConf));
   }
 
   @Before
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java
index f4e109d..59a96e8 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java
@@ -84,15 +84,19 @@ public class Msck {
 
   public void init(Configuration conf) throws MetaException {
 if (msc == null) {
-  // the only reason we are using new conf here is to override 
EXPRESSION_PROXY_CLASS
-  Configuration metastoreConf = MetastoreConf.newMetastoreConf(new 
Configuration(conf));
-  
metastoreConf.set(MetastoreConf.ConfVars.EXPRESSION_PROXY_CLASS.getVarname(),
-MsckPartitionExpressionProxy.class.getCanonicalName());
-  setConf(metastoreConf);
-  this.msc = new HiveMetaStoreClient(metastoreConf);
+  setConf(conf);
+  this.msc = new HiveMetaStoreClient(conf);
 }
   }
 
+  public static Configuration getMsckConf(Configuration conf) {
+// the only reason we are using new conf here is to override 
EXPRESSION_PROXY_CLASS
+Configuration metastoreConf = MetastoreConf.newMetastoreConf(new 
Configuration(conf));
+
metastoreConf.set(MetastoreConf.ConfVars.EXPRESSION_PROXY_CLASS.getVarname(),
+MsckPartitionExpressionProxy.class.getCanonicalName());
+return metastoreConf;
+  }
+
   /**
* MetastoreCheck, see if the data in the metastore matches what is on the
* dfs. Current version checks for tables and partitions that are either
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java
index e4488f4..612ac87 100644
--- 
a/standalone

[hive] branch master updated: HIVE-23281 : ObjectStore::convertToStorageDescriptor can be optimised to reduce calls to DB for ACID tables (Ramesh Kumar, Rajesh Balamohan via Ashutosh Chauhan)

2020-05-26 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 6aeb63a  HIVE-23281 : ObjectStore::convertToStorageDescriptor can be 
optimised to reduce calls to DB for ACID tables (Ramesh Kumar, Rajesh Balamohan 
via Ashutosh Chauhan)
6aeb63a is described below

commit 6aeb63a2becec1d2eeae8e144da1cbaf84546d72
Author: Ashutosh Chauhan 
AuthorDate: Sun May 24 23:14:55 2020 -0700

HIVE-23281 : ObjectStore::convertToStorageDescriptor can be optimised to 
reduce calls to DB for ACID tables (Ramesh Kumar, Rajesh Balamohan via Ashutosh 
Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../org/apache/hadoop/hive/ql/metadata/Table.java  |  2 +
 .../hive/ql/parse/ImportSemanticAnalyzer.java  |  9 ++-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 19 ++---
 .../exim_09_nonpart_noncompat_serdeparam.q.out |  2 +-
 .../clientpositive/llap/acid_bucket_pruning.q.out  |  2 -
 .../clientpositive/llap/acid_no_buckets.q.out  |  4 -
 .../clientpositive/llap/acid_nullscan.q.out|  2 -
 .../results/clientpositive/llap/acid_stats2.q.out  | 18 -
 .../results/clientpositive/llap/acid_stats5.q.out  | 12 ---
 .../clientpositive/llap/acid_table_stats.q.out | 12 ---
 .../clientpositive/llap/autoColumnStats_4.q.out|  6 --
 .../clientpositive/llap/check_constraint.q.out | 18 -
 .../llap/create_transactional_full_acid.q.out  |  2 -
 .../llap/create_transactional_insert_only.q.out|  2 -
 .../clientpositive/llap/default_constraint.q.out   | 24 --
 .../insert_values_orig_table_use_metadata.q.out| 10 ---
 .../test/results/clientpositive/llap/mm_all.q.out  |  6 --
 .../test/results/clientpositive/llap/mm_bhif.q.out | 44 +++
 .../results/clientpositive/llap/mm_default.q.out   | 12 ---
 .../test/results/clientpositive/llap/mm_exim.q.out |  4 -
 .../llap/murmur_hash_migration2.q.out  |  2 -
 .../clientpositive/llap/sqlmerge_stats.q.out   | 10 ---
 .../clientpositive/llap/stats_nonpart.q.out|  4 -
 .../results/clientpositive/llap/stats_part.q.out   | 10 ---
 .../results/clientpositive/llap/stats_part2.q.out  | 30 
 .../clientpositive/llap/stats_sizebug.q.out|  4 -
 .../hadoop/hive/metastore/MetaStoreDirectSql.java  | 43 +++
 .../apache/hadoop/hive/metastore/ObjectStore.java  | 87 --
 28 files changed, 127 insertions(+), 273 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
index 8805eee..61b9fb8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
@@ -209,6 +209,8 @@ public class Table implements Serializable {
   // We have to use MetadataTypedColumnsetSerDe because LazySimpleSerDe 
does
   // not support a table with no columns.
   
sd.getSerdeInfo().setSerializationLib(MetadataTypedColumnsetSerDe.class.getName());
+  //TODO setting serializaton format here is hacky. Only lazy simple serde 
needs it
+  // so should be set by serde only. Setting it here sets it 
unconditionally.
   
sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1");
   sd.setInputFormat(SequenceFileInputFormat.class.getName());
   sd.setOutputFormat(HiveSequenceFileOutputFormat.class.getName());
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
index c21c6f1..cd92247 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
@@ -934,15 +934,18 @@ public class ImportSemanticAnalyzer extends 
BaseSemanticAnalyzer {
   .getSerdeParam(serdeConstants.SERIALIZATION_FORMAT);
   String importedSerdeFormat = tableDesc.getSerdeProps().get(
   serdeConstants.SERIALIZATION_FORMAT);
-  /*
+
+  /* TODO : Remove this weirdity. See notes in Table.getEmptyTable()
* If Imported SerdeFormat is null, then set it to "1" just as
* metadata.Table.getEmptyTable
*/
   importedSerdeFormat = importedSerdeFormat == null ? "1" : 
importedSerdeFormat;
-  if (!ObjectUtils.equals(existingSerdeFormat, importedSerdeFormat)) {
+  if (!TxnUtils.isTransactionalTable(table.getParameters()) &&
+  !ObjectUtils.equals(existingSerdeFormat, importedSerdeFormat)) {
 throw new SemanticException(
 ErrorMsg.INCOMPATIBLE_SCHEMA
-.getMsg(" Table Serde format does not match"));
+.getMsg(" Table Serde format does not match. Imported :"
++ &quo

[hive] branch master updated: HIVE-23214 Get rid of skipCorrupt as part of ORC read pipeline (Panos Garefalakis via Ashutosh Chauhan)

2020-05-26 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new f712def  HIVE-23214 Get rid of skipCorrupt as part of ORC read 
pipeline (Panos Garefalakis via Ashutosh Chauhan)
f712def is described below

commit f712def65b716ba6646828ed8f8be4464abbedc8
Author: Panos Garefalakis 
AuthorDate: Mon May 18 16:51:15 2020 +0100

HIVE-23214 Get rid of skipCorrupt as part of ORC read pipeline (Panos 
Garefalakis via Ashutosh Chauhan)

Change-Id: Ic1efd6dcffc71adfa1ac3059ceacbd3f30e6ef7e
Signed-off-by: Ashutosh Chauhan 
---
 .../hive/llap/io/decode/GenericColumnVectorProducer.java   |  3 +--
 .../hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java|  5 +
 .../hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java | 10 +++---
 3 files changed, 5 insertions(+), 13 deletions(-)

diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java
index 1617692..1c7e537 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java
@@ -85,8 +85,7 @@ public class GenericColumnVectorProducer implements 
ColumnVectorProducer {
   SchemaEvolutionFactory sef, InputFormat sourceInputFormat, 
Deserializer sourceSerDe,
   Reporter reporter, JobConf job, Map parts) throws 
IOException {
 cacheMetrics.incrCacheReadRequests();
-OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(
-consumer, includes, false, counters, ioMetrics);
+OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(consumer, 
includes, counters, ioMetrics);
 SerDeFileMetadata fm;
 try {
   fm = new SerDeFileMetadata(sourceSerDe);
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java
index 17c4821..50abdfd 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java
@@ -56,7 +56,6 @@ public class OrcColumnVectorProducer implements 
ColumnVectorProducer {
   private final LowLevelCache lowLevelCache;
   private final BufferUsageManager bufferManager;
   private final Configuration conf;
-  private boolean _skipCorrupt; // TODO: get rid of this
   private LlapDaemonCacheMetrics cacheMetrics;
   private LlapDaemonIOMetrics ioMetrics;
   // TODO: if using in multiple places, e.g. SerDe cache, pass this in.
@@ -73,7 +72,6 @@ public class OrcColumnVectorProducer implements 
ColumnVectorProducer {
 this.lowLevelCache = lowLevelCache;
 this.bufferManager = bufferManager;
 this.conf = conf;
-this._skipCorrupt = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf);
 this.cacheMetrics = cacheMetrics;
 this.ioMetrics = ioMetrics;
 this.tracePool = tracePool;
@@ -90,8 +88,7 @@ public class OrcColumnVectorProducer implements 
ColumnVectorProducer {
   InputFormat unused0, Deserializer unused1, Reporter reporter, 
JobConf job,
   Map parts) throws IOException {
 cacheMetrics.incrCacheReadRequests();
-OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(
-consumer, includes, _skipCorrupt, counters, ioMetrics);
+OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(consumer, 
includes, counters, ioMetrics);
 OrcEncodedDataReader reader = new OrcEncodedDataReader(lowLevelCache, 
bufferManager,
 metadataCache, conf, job, split, includes, sarg, edc, counters, sef, 
tracePool, parts);
 edc.init(reader, reader, reader.getTrace());
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
index b697a0d..79dba42 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
@@ -70,20 +70,16 @@ public class OrcEncodedDataConsumer
   private ConsumerFileMetadata fileMetadata; // We assume one request is only 
for one file.
   private CompressionCodec codec;
   private List stripes;
-  private final boolean skipCorrupt; // TODO: get rid of this
   private SchemaEvolution evolution;
   private IoTrace trace;
   private final Includes includes;
   private TypeDescription[] batchSchemas;
   private boolean useDecimal64ColumnVectors;
 
-  public OrcEncodedDataConsumer(
-Consumer consumer, Includes includes, boolean 
skipCorrupt

[hive] branch master updated: HIVE-21971 : HS2 leaks classloader due to `ReflectionUtils::CONSTRUCTOR_CACHE` with temporary functions + GenericUDF (Rajesh Balamohan via Ashutosh Chauhan)

2020-05-26 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new bd84b5c  HIVE-21971 : HS2 leaks classloader due to 
`ReflectionUtils::CONSTRUCTOR_CACHE` with temporary functions + GenericUDF 
(Rajesh Balamohan via Ashutosh Chauhan)
bd84b5c is described below

commit bd84b5cfdc0423463331935f85cbbed50b364e4b
Author: Rajesh Balamohan 
AuthorDate: Tue May 26 16:38:59 2020 -0700

HIVE-21971 : HS2 leaks classloader due to 
`ReflectionUtils::CONSTRUCTOR_CACHE` with temporary functions + GenericUDF 
(Rajesh Balamohan via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hadoop/hive/ql/session/SessionState.java   | 22 ++
 .../hadoop/hive/ql/session/TestSessionState.java   | 51 ++
 2 files changed, 73 insertions(+)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java 
b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
index 55bd27e..20f352e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
@@ -25,6 +25,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.PrintStream;
 import java.lang.management.ManagementFactory;
+import java.lang.reflect.Method;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.security.AccessController;
@@ -106,6 +107,7 @@ import org.apache.hadoop.hive.shims.HadoopShims;
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.hive.shims.Utils;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.ReflectionUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -1808,6 +1810,26 @@ public class SessionState implements ISessionAuthState{
   Hive.closeCurrent();
 }
 progressMonitor = null;
+// Hadoop's ReflectionUtils caches constructors for the classes it 
instantiated.
+// In UDFs, this can result in classloaders not getting GCed for a 
temporary function,
+// resulting in a PermGen leak when used extensively from HiveServer2
+// There are lots of places where hadoop's ReflectionUtils is still used. 
Until all of them are
+// cleared up, we would have to retain this to avoid mem leak.
+clearReflectionUtilsCache();
+  }
+
+  private void clearReflectionUtilsCache() {
+Method clearCacheMethod;
+try {
+  clearCacheMethod = ReflectionUtils.class.getDeclaredMethod("clearCache");
+  if (clearCacheMethod != null) {
+clearCacheMethod.setAccessible(true);
+clearCacheMethod.invoke(null);
+LOG.debug("Cleared Hadoop ReflectionUtils CONSTRUCTOR_CACHE");
+  }
+} catch (Exception e) {
+  LOG.info("Failed to clear up Hadoop ReflectionUtils CONSTRUCTOR_CACHE", 
e);
+}
   }
 
   private void unCacheDataNucleusClassLoaders() {
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java 
b/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java
index 0fa1c81..4c374e8 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java
@@ -24,9 +24,12 @@ import static org.junit.Assert.fail;
 
 import java.io.File;
 import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
 import java.lang.reflect.Method;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Map;
 
 import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.fs.FileSystem;
@@ -34,6 +37,12 @@ import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.ParentNotDirectoryException;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.util.ReflectionUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -221,6 +230,48 @@ public class TestSessionState {
 }
   }
 
+  static class DummyUDF extends GenericUDF {
+
+@Override public ObjectInspector initialize(ObjectInspector[] arguments)
+throws UDFArgumentException {
+  return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+}
+
+@Override public Object evaluate(DeferredObject[] arguments) throws 
HiveException {
+  return "dummy";
+}
+
+@Override public String getDisplayString(S

[hive] branch master updated: HIVE-23447 : Avoid sending configs to tasks and AM which are only relevant for HS2

2020-05-26 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 4e70a13  HIVE-23447 : Avoid sending configs to tasks and AM which are 
only relevant for HS2
4e70a13 is described below

commit 4e70a1397504929df8fee227b57e4171e5688edc
Author: Ashutosh Chauhan 
AuthorDate: Mon May 11 22:05:13 2020 -0700

HIVE-23447 : Avoid sending configs to tasks and AM which are only relevant 
for HS2
---
 ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
index 97220c0..d21aa45 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
@@ -1396,6 +1396,9 @@ public class DagUtils {
 
 // TODO: convert this to a predicate too
 hiveConf.stripHiddenConfigurations(conf);
+
+// Remove hive configs which are used only in HS2 and not needed for 
execution
+
conf.unset(ConfVars.HIVE_AUTHORIZATION_SQL_STD_AUTH_CONFIG_WHITELIST.varname); 
 return conf;
   }
 



[hive] branch master updated: HIVE-23536 : Provide an option to skip stats generation for major compaction (Peter Vary via Ashutosh Chauhan)

2020-05-25 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 947b7a4  HIVE-23536 : Provide an option to skip stats generation for 
major compaction (Peter Vary via Ashutosh Chauhan)
947b7a4 is described below

commit 947b7a44896fa57bc4e2ddaa6014cc4cb2c7002e
Author: Peter Vary 
AuthorDate: Mon May 25 16:13:32 2020 -0700

HIVE-23536 : Provide an option to skip stats generation for major 
compaction (Peter Vary via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |  6 +++
 .../hive/ql/txn/compactor/TestCompactor.java   | 52 ++
 .../hadoop/hive/ql/txn/compactor/CompactorMR.java  |  5 ++-
 .../hadoop/hive/ql/txn/compactor/Worker.java   |  8 ++--
 4 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index a00d907..8094d28 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2908,6 +2908,12 @@ public class HiveConf extends Configuration {
 
 HIVE_COMPACTOR_WAIT_TIMEOUT("hive.compactor.wait.timeout", 30L, "Time 
out in "
 + "milliseconds for blocking compaction. It's value has to be higher 
than 2000 milliseconds. "),
+
+HIVE_MR_COMPACTOR_GATHER_STATS("hive.mr.compactor.gather.stats", true, "If 
set to true MAJOR compaction " +
+"will gather stats if there are stats already associated with the 
table/partition.\n" +
+"Turn this off to save some resources and the stats are not used 
anyway.\n" +
+"Works only for MR based compaction, CRUD based compaction uses 
hive.stats.autogather."),
+
 /**
  * @deprecated Use MetastoreConf.COMPACTOR_INITIATOR_FAILED_THRESHOLD
  */
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
index c687f14..32fe535 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
@@ -129,6 +129,7 @@ public class TestCompactor {
 hiveConf.setVar(HiveConf.ConfVars.POSTEXECHOOKS, "");
 hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, TEST_WAREHOUSE_DIR);
 hiveConf.setVar(HiveConf.ConfVars.HIVEINPUTFORMAT, 
HiveInputFormat.class.getName());
+hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false);
 
 TxnDbUtil.setConfValues(hiveConf);
 TxnDbUtil.cleanDb(hiveConf);
@@ -1468,6 +1469,57 @@ public class TestCompactor {
 }
   }
 
+  @Test
+  public void testCompactorGatherStats() throws Exception {
+String dbName = "default";
+String tableName = "stats_comp_test";
+List colNames = Arrays.asList("a");
+executeStatementOnDriver("drop table if exists " + dbName + "." + 
tableName, driver);
+executeStatementOnDriver("create table " + dbName + "." + tableName +
+" (a INT) STORED AS ORC TBLPROPERTIES ('transactional'='true')", 
driver);
+executeStatementOnDriver("insert into " + dbName + "." + tableName + " 
values(1)", driver);
+executeStatementOnDriver("insert into " + dbName + "." + tableName + " 
values(1)", driver);
+
+TxnStore txnHandler = TxnUtils.getTxnStore(conf);
+txnHandler.compact(new CompactionRequest(dbName, tableName, 
CompactionType.MAJOR));
+runWorker(conf);
+
+// Make sure we do not have statistics for this table yet
+// Compaction generates stats only if there is any
+List colStats = 
msClient.getTableColumnStatistics(dbName,
+tableName, colNames, Constants.HIVE_ENGINE);
+assertEquals("No stats should be there for the table", 0, colStats.size());
+
+executeStatementOnDriver("analyze table " + dbName + "." + tableName + " 
compute statistics for columns", driver);
+executeStatementOnDriver("insert into " + dbName + "." + tableName + " 
values(2)", driver);
+
+// Make sure we have old statistics for the table
+colStats = msClient.getTableColumnStatistics(dbName, tableName, colNames, 
Constants.HIVE_ENGINE);
+assertEquals("Stats should be there", 1, colStats.size());
+assertEquals("Value should contain old data", 1, 
colStats.get(0).getStatsData().getLongStats().getHighValue());
+assertEquals("V

[hive] branch master updated: HIVE-23535 : Bump Minimum Required Version of Maven to 3.0.5 (David Mollitor via Ashutosh Chauhan)

2020-05-25 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new f76df73  HIVE-23535 : Bump Minimum Required Version of Maven to 3.0.5 
(David Mollitor via Ashutosh Chauhan)
f76df73 is described below

commit f76df736d5461c0b6466432522498ca85bd4b240
Author: David Mollitor 
AuthorDate: Mon May 25 16:04:23 2020 -0700

HIVE-23535 : Bump Minimum Required Version of Maven to 3.0.5 (David 
Mollitor via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 pom.xml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pom.xml b/pom.xml
index 60f5f13..b4b41ea 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,9 +26,6 @@
 
   Hive
   https://hive.apache.org
-  
-2.2.1
-  
 
   
 storage-api



[hive] branch master updated: HIVE-23480: use the JsonPropertyOrder annotation to ensure the ordering of the serialized properties. (Panos G via Ashutosh Chauhan)

2020-05-25 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new ef7a9de  HIVE-23480: use the JsonPropertyOrder annotation to ensure 
the ordering of the serialized properties. (Panos G via Ashutosh Chauhan)
ef7a9de is described below

commit ef7a9de93523051bfa16d753a3528a9a8457ac57
Author: Panos Garefalakis 
AuthorDate: Mon May 18 16:32:19 2020 +0100

HIVE-23480: use the JsonPropertyOrder annotation to ensure the ordering of 
the serialized properties. (Panos G via Ashutosh Chauhan)

Change-Id: I2f2b3f1d9eec1e26b5b6e445efe6f0106f4ea15d
Signed-off-by: Ashutosh Chauhan 
---
 .../src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java| 2 ++
 1 file changed, 2 insertions(+)

diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java
index eed1428..cf88474 100644
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java
+++ 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java
@@ -32,6 +32,7 @@ import org.slf4j.LoggerFactory;
 
 import com.fasterxml.jackson.annotation.JsonInclude;
 import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonPropertyOrder;
 import com.fasterxml.jackson.core.JsonGenerator;
 import com.fasterxml.jackson.core.JsonParser;
 import com.fasterxml.jackson.core.JsonProcessingException;
@@ -167,6 +168,7 @@ public class StatsSetupConst {
   COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE, ROW_COUNT, RAW_DATA_SIZE, 
NUM_PARTITIONS,
   NUM_ERASURE_CODED_FILES);
 
+  @JsonPropertyOrder({"basicStats", "columnStats"})
   private static class ColumnStatsAccurate {
 private static ObjectReader objectReader;
 private static ObjectWriter objectWriter;



[hive] branch master updated: HIVE-23529 : CTAS is broken for uniontype when row_deserialize (Mustafa Iman via Ashutosh Chauhan)

2020-05-24 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 3015ff2  HIVE-23529 : CTAS is broken for uniontype when 
row_deserialize (Mustafa Iman via Ashutosh Chauhan)
3015ff2 is described below

commit 3015ff26ad0761c6332c6935ec83436d46bde561
Author: Mustafa Iman 
AuthorDate: Sun May 24 20:54:24 2020 -0700

HIVE-23529 : CTAS is broken for uniontype when row_deserialize (Mustafa 
Iman via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 data/files/data_with_union.txt |  1 +
 .../test/resources/testconfiguration.properties|  1 +
 .../clientpositive/row_deserialize_with_union.q| 17 
 .../llap/row_deserialize_with_union.q.out  | 45 ++
 .../objectinspector/ObjectInspectorUtils.java  |  5 ++-
 .../TestStandardObjectInspectors.java  | 12 +++---
 6 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/data/files/data_with_union.txt b/data/files/data_with_union.txt
new file mode 100644
index 000..dd1e7f9
--- /dev/null
+++ b/data/files/data_with_union.txt
@@ -0,0 +1 @@
+0102111foo
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 7a91eef..e1ba435 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -745,6 +745,7 @@ spark.query.files=\
   rcfile_bigdata.q,\
   reduce_deduplicate_exclude_join.q,\
   router_join_ppr.q,\
+  row_deserialize_with_union.q,\
   runtime_skewjoin_mapjoin_spark.q,\
   sample1.q,\
   sample10.q,\
diff --git a/ql/src/test/queries/clientpositive/row_deserialize_with_union.q 
b/ql/src/test/queries/clientpositive/row_deserialize_with_union.q
new file mode 100644
index 000..cb7d2aa
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/row_deserialize_with_union.q
@@ -0,0 +1,17 @@
+SET hive.vectorized.execution.enabled=true;
+set hive.vectorized.use.row.serde.deserialize=true;
+set hive.vectorized.use.vector.serde.deserialize=false;
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/data_with_union/;
+dfs -copyFromLocal ../../data/files/data_with_union.txt 
${system:test.tmp.dir}/data_with_union/data_with_union.txt;
+
+CREATE EXTERNAL TABLE data_with_union(
+  unionfield uniontype,
+  arrayfield array,
+  mapfield map,
+  structfield struct<`sf1`:int, `sf2`:string>)
+stored as textfile
+location '${system:test.tmp.dir}/data_with_union';
+
+create table data_with_union_2 as select * from data_with_union;
+
+select * from data_with_union_2;
\ No newline at end of file
diff --git 
a/ql/src/test/results/clientpositive/llap/row_deserialize_with_union.q.out 
b/ql/src/test/results/clientpositive/llap/row_deserialize_with_union.q.out
new file mode 100644
index 000..cc0ba5c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/row_deserialize_with_union.q.out
@@ -0,0 +1,45 @@
+PREHOOK: query: CREATE EXTERNAL TABLE data_with_union(
+  unionfield uniontype,
+  arrayfield array,
+  mapfield map,
+  structfield struct<`sf1`:int, `sf2`:string>)
+stored as textfile
+ A masked pattern was here 
+PREHOOK: type: CREATETABLE
+ A masked pattern was here 
+PREHOOK: Output: database:default
+PREHOOK: Output: default@data_with_union
+POSTHOOK: query: CREATE EXTERNAL TABLE data_with_union(
+  unionfield uniontype,
+  arrayfield array,
+  mapfield map,
+  structfield struct<`sf1`:int, `sf2`:string>)
+stored as textfile
+ A masked pattern was here 
+POSTHOOK: type: CREATETABLE
+ A masked pattern was here 
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@data_with_union
+PREHOOK: query: create table data_with_union_2 as select * from data_with_union
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@data_with_union
+PREHOOK: Output: database:default
+PREHOOK: Output: default@data_with_union_2
+POSTHOOK: query: create table data_with_union_2 as select * from 
data_with_union
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@data_with_union
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@data_with_union_2
+POSTHOOK: Lineage: data_with_union_2.arrayfield SIMPLE 
[(data_with_union)data_with_union.FieldSchema(name:arrayfield, type:array, 
comment:null), ]
+POSTHOOK: Lineage: data_with_union_2.mapfield SIMPLE 
[(data_with_union)data_with_union.FieldSchema(name:mapfield, type:map, 
comment:null), ]
+POSTHOOK: Lineage: data_with_union_2.structfield SIMPLE 
[(data_with_union)data_with_union.FieldSchema(name:structfield, 
type:struct, comment:null), ]
+POSTHOOK: Lineage: data_with_union_2.unionfield SIMPLE 
[(data_with_union)data_with_union.FieldSchema(name:unionfield, 
type:uniontype, comment:null), ]
+PREHOOK: query: select * from data_with_unio

[hive] branch master updated: HIVE-23494 : Upgrade Apache parent POM to version 23 (David Mollitor via Ashutosh Chauhan)

2020-05-24 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 270ca80  HIVE-23494 : Upgrade Apache parent POM to version 23 (David 
Mollitor via Ashutosh Chauhan)
270ca80 is described below

commit 270ca800353458ebce6eb262781bd39b15f5e349
Author: David Mollitor 
AuthorDate: Sun May 24 20:38:48 2020 -0700

HIVE-23494 : Upgrade Apache parent POM to version 23 (David Mollitor via 
Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 pom.xml |  2 +-
 standalone-metastore/metastore-common/pom.xml   |  1 -
 standalone-metastore/metastore-server/pom.xml   |  3 ---
 .../metastore-tools/metastore-benchmarks/pom.xml|  2 --
 standalone-metastore/metastore-tools/pom.xml| 17 ++---
 .../metastore-tools/tools-common/pom.xml|  2 --
 standalone-metastore/pom.xml| 18 --
 storage-api/pom.xml | 21 +++--
 testutils/pom.xml   |  2 --
 upgrade-acid/pom.xml|  8 ++--
 10 files changed, 16 insertions(+), 60 deletions(-)

diff --git a/pom.xml b/pom.xml
index 7515b4c..60f5f13 100644
--- a/pom.xml
+++ b/pom.xml
@@ -17,7 +17,7 @@
   
 org.apache
 apache
-21
+23
   
   org.apache.hive
   hive
diff --git a/standalone-metastore/metastore-common/pom.xml 
b/standalone-metastore/metastore-common/pom.xml
index e58c748..1938dce 100644
--- a/standalone-metastore/metastore-common/pom.xml
+++ b/standalone-metastore/metastore-common/pom.xml
@@ -415,7 +415,6 @@
 
   org.apache.maven.plugins
   maven-antrun-plugin
-  ${maven.antrun.plugin.version}
   
 
   ant-contrib
diff --git a/standalone-metastore/metastore-server/pom.xml 
b/standalone-metastore/metastore-server/pom.xml
index ea5f905..adc98d4 100644
--- a/standalone-metastore/metastore-server/pom.xml
+++ b/standalone-metastore/metastore-server/pom.xml
@@ -411,7 +411,6 @@
 
   org.apache.maven.plugins
   maven-antrun-plugin
-  ${maven.antrun.plugin.version}
   
 
   ant-contrib
@@ -574,7 +573,6 @@
   
 org.apache.maven.plugins
 maven-failsafe-plugin
-2.20.1
 
   
 
@@ -634,7 +632,6 @@
   
 org.apache.rat
 apache-rat-plugin
-0.10
 
   
 binary-package-licenses/**
diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml 
b/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml
index 06a4d8d..612ff6f 100644
--- a/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml
+++ b/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml
@@ -176,8 +176,6 @@
 org.apache.maven.plugins
 maven-compiler-plugin
 
-  1.8
-  1.8
   javac-with-errorprone
   true
 
diff --git a/standalone-metastore/metastore-tools/pom.xml 
b/standalone-metastore/metastore-tools/pom.xml
index f831a2e..b482c15 100644
--- a/standalone-metastore/metastore-tools/pom.xml
+++ b/standalone-metastore/metastore-tools/pom.xml
@@ -15,16 +15,15 @@
 http://maven.apache.org/POM/4.0.0;
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance;
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd;>
+  4.0.0
   
 hive-standalone-metastore
-org.apache.hive
 4.0.0-SNAPSHOT
+org.apache.hive
   
-  4.0.0
 
   hive-metastore-tools
   Hive Metastore Tools
-
   pom
 
   
@@ -34,9 +33,6 @@
 
   
 4.0.0-SNAPSHOT
-2.20.1
-UTF-8
-UTF-8
 2.8
 2.3.1
 3.1.0
@@ -149,15 +145,6 @@
   
 
   
-
-  
-
-  org.apache.maven.plugins
-  maven-compiler-plugin
-  3.7.0
-
-  
-
 
   
   
diff --git a/standalone-metastore/metastore-tools/tools-common/pom.xml 
b/standalone-metastore/metastore-tools/tools-common/pom.xml
index 44f02b7..7403b13 100644
--- a/standalone-metastore/metastore-tools/tools-common/pom.xml
+++ b/standalone-metastore/metastore-tools/tools-common/pom.xml
@@ -74,8 +74,6 @@
 org.apache.maven.plugins
 maven-compiler-plugin
 
-  1.8
-  1.8
   javac-with-errorprone
   true
 
diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml
index ee2701e..577497b 100644
--- a/standalone-metastore/pom.xml
+++ b/standalone-metastore/pom.xml
@@ -16,30 +16,30 @@
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance;
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://mav

[hive] branch master updated: HIVE-23501 : AOOB in VectorDeserializeRow when complex types are converted to primitive types (Ramesh Kumar via Ashutosh Chauhan)

2020-05-21 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 04b311d  HIVE-23501 : AOOB in VectorDeserializeRow when complex types 
are converted to primitive types (Ramesh Kumar via Ashutosh Chauhan)
04b311d is described below

commit 04b311d2ce52fb6ab6cf6fe4edb91cd0de970946
Author: RAMESH KUMAR THANGARAJAN 
AuthorDate: Wed May 20 22:21:07 2020 -0700

HIVE-23501 : AOOB in VectorDeserializeRow when complex types are converted 
to primitive types (Ramesh Kumar via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 data/files/arrayofIntdata/base_001/54_0|   1 +
 .../test/resources/testconfiguration.properties|   1 +
 .../hive/ql/exec/vector/VectorDeserializeRow.java  |   6 +-
 .../clientpositive/vector_deserialize_row.q|  33 +
 .../llap/vector_deserialize_row.q.out  | 145 +
 5 files changed, 182 insertions(+), 4 deletions(-)

diff --git a/data/files/arrayofIntdata/base_001/54_0 
b/data/files/arrayofIntdata/base_001/54_0
new file mode 100644
index 000..a7a5893
--- /dev/null
+++ b/data/files/arrayofIntdata/base_001/54_0
@@ -0,0 +1 @@
+test   1   
5208187416695208186759785208123282775208126369095208128960765208133938905208135089945208135646875208135960535208135314935208136316075208136109305208136556885208136690205208137412015208137269385208137339875208137443045208137204395208137666885208137720445208137787775208137264825208137990955208138331935208138811555208139113415208139319435208137474495208138853255208139267335208139179105208137428265208139428805208139564045208139611825208139439395208
 [...]
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index f48e433..e7c3e43 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -946,6 +946,7 @@ minillaplocal.query.files=\
   vector_decimal64_case_when_nvl.q,\
   vector_decimal64_case_when_nvl_cbo.q,\
   vector_decimal64_multi_vertex.q,\
+  vector_deserialize_row.q,\
   vector_full_outer_join.q,\
   vector_fullouter_mapjoin_1_fast.q,\
   vector_fullouter_mapjoin_1_optimized.q,\
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
index 97166ec..6453069 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
@@ -846,8 +846,7 @@ public final class VectorDeserializeRow {
 
   // Ensure child size.
   final int childCapacity = listColVector.child.isNull.length;
-  final int childCount = listColVector.childCount;
-  if (childCapacity < childCount / 0.75) {
+  if (childCapacity < offset / 0.75) {
 listColVector.child.ensureSize(childCapacity * 2, true);
   }
 
@@ -877,8 +876,7 @@ public final class VectorDeserializeRow {
 
   // Ensure child size.
   final int childCapacity = mapColVector.keys.isNull.length;
-  final int childCount = mapColVector.childCount;
-  if (childCapacity < childCount / 0.75) {
+  if (childCapacity < offset / 0.75) {
 mapColVector.keys.ensureSize(childCapacity * 2, true);
 mapColVector.values.ensureSize(childCapacity * 2, true);
   }
diff --git a/ql/src/test/queries/clientpositive/vector_deserialize_row.q 
b/ql/src/test/queries/clientpositive/vector_deserialize_row.q
new file mode 100644
index 000..38c8454
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_deserialize_row.q
@@ -0,0 +1,33 @@
+CREATE external TABLE IF NOT EXISTS sessions
+(
+session_id string,
+uid bigint,
+uids array,
+search_ids array,
+total_views int,
+datestamp date
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+STORED AS TEXTFILE
+LOCATION '../../data/files/arrayofIntdata';
+
+CREATE TABLE IF NOT EXISTS sessions_orc
+(
+session_id string,
+uid bigint,
+uids array,
+search_ids array,
+total_views int,
+datestamp date
+);
+
+describe formatted sessions_orc;
+
+INSERT OVERWRITE TABLE sessions_orc
+SELECT * FROM sessions;
+
+select count(1) from sessions_orc;
+select count(1) from sessions;
+drop table sessions;
+drop table sessions_orc;
+
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_deserialize_row.q.out 
b/ql/src/test/results/clientpositive/llap/vector_deserialize_row.q.out
new file mode 100644
index 000..6b0e010
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_deserialize_row.q.out
@@ -0,0 +1,145 @@
+PREHOOK: query: CREATE external TABLE IF NOT EXISTS sessions
+(
+session_id string,
+uid bigint,
+uids array,
+search_ids array,
+tot

[hive] branch master updated: HIVE-23476 : LLAP: Preallocate arenas for mmap case as well (Prasanth J via Gopal V)

2020-05-17 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 28bdeeb  HIVE-23476 :  LLAP: Preallocate arenas for mmap case as well 
(Prasanth J via Gopal V)
28bdeeb is described below

commit 28bdeeb12e786c4830fdc1b138c7113dc1baa095
Author: Prasanth Jayachandran 
AuthorDate: Sun May 17 15:38:59 2020 -0700

HIVE-23476 :  LLAP: Preallocate arenas for mmap case as well (Prasanth J 
via Gopal V)

Signed-off-by: Ashutosh Chauhan 
---
 .../src/java/org/apache/hadoop/hive/llap/cache/BuddyAllocator.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/BuddyAllocator.java 
b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/BuddyAllocator.java
index 341da25..6934dba 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/BuddyAllocator.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/BuddyAllocator.java
@@ -215,7 +215,7 @@ public final class BuddyAllocator
 for (int i = 0; i < maxArenas; ++i) {
   arenas[i] = new Arena();
 }
-int initCount = doPreallocate && !isMapped ? maxArenas : 1;
+int initCount = doPreallocate ? maxArenas : 1;
 for (int i = 0; i < initCount; ++i) {
   arenas[i].init(i);
   metrics.incrAllocatedArena();



[hive] branch master updated: HIVE-23443 : LLAP speculative task pre-emption seems to be not working (Prasanth J via Gopal V, Panos G)

2020-05-17 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new e85731c  HIVE-23443 : LLAP speculative task pre-emption seems to be 
not working (Prasanth J via Gopal V, Panos G)
e85731c is described below

commit e85731c42b6485412deefccf85f17e3ae9e0f403
Author: Prasanth Jayachandran 
AuthorDate: Sun May 17 09:01:21 2020 -0700

HIVE-23443 : LLAP speculative task pre-emption seems to be not working 
(Prasanth J via Gopal V, Panos G)

Signed-off-by: Ashutosh Chauhan 
---
 .../hive/llap/daemon/impl/TaskExecutorService.java |  22 +++-
 .../llap/daemon/impl/TestTaskExecutorService.java  | 134 +
 2 files changed, 152 insertions(+), 4 deletions(-)

diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorService.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorService.java
index d8b517d..1d6e852 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorService.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorService.java
@@ -867,7 +867,8 @@ public class TaskExecutorService extends AbstractService
 return sc;
   }
 
-  private void finishableStateUpdated(TaskWrapper taskWrapper, boolean 
newFinishableState) {
+  @VisibleForTesting
+  void finishableStateUpdated(TaskWrapper taskWrapper, boolean 
newFinishableState) {
 synchronized (lock) {
   LOG.debug("Fragment {} guaranteed state changed to {}; finishable {}, in 
wait queue {}, "
   + "in preemption queue {}", taskWrapper.getRequestId(), 
taskWrapper.isGuaranteed(),
@@ -884,10 +885,20 @@ public class TaskExecutorService extends AbstractService
 taskWrapper.updateCanFinishForPriority(newFinishableState);
 forceReinsertIntoQueue(taskWrapper, isRemoved);
   } else {
-taskWrapper.updateCanFinishForPriority(newFinishableState);
-if (!newFinishableState && !taskWrapper.isInPreemptionQueue()) {
-  // No need to check guaranteed here; if it was false we would 
already be in the queue.
+// if speculative task, any finishable state change should re-order 
the queue as speculative tasks are always
+// not-guaranteed (re-order helps put non-finishable's ahead of 
finishable)
+if (!taskWrapper.isGuaranteed()) {
+  removeFromPreemptionQueue(taskWrapper);
+  taskWrapper.updateCanFinishForPriority(newFinishableState);
   addToPreemptionQueue(taskWrapper);
+} else {
+  // if guaranteed task, if the finishable state changed to 
non-finishable and if the task doesn't exist
+  // pre-emption queue, then add it so that it becomes candidate to 
kill
+  taskWrapper.updateCanFinishForPriority(newFinishableState);
+  if (!newFinishableState && !taskWrapper.isInPreemptionQueue()) {
+// No need to check guaranteed here; if it was false we would 
already be in the queue.
+addToPreemptionQueue(taskWrapper);
+  }
 }
   }
 
@@ -896,6 +907,9 @@ public class TaskExecutorService extends AbstractService
   }
 
   private void addToPreemptionQueue(TaskWrapper taskWrapper) {
+if (taskWrapper.isInPreemptionQueue()) {
+  return;
+}
 synchronized (lock) {
   insertIntoPreemptionQueueOrFailUnlocked(taskWrapper);
   taskWrapper.setIsInPreemptableQueue(true);
diff --git 
a/llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TestTaskExecutorService.java
 
b/llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TestTaskExecutorService.java
index ce9fce9..ff61fdd 100644
--- 
a/llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TestTaskExecutorService.java
+++ 
b/llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TestTaskExecutorService.java
@@ -236,6 +236,140 @@ public class TestTaskExecutorService {
 }
   }
 
+  @Test(timeout = 1)
+  public void testPreemptionQueueOnFinishableStateUpdates() throws 
InterruptedException {
+
+long r1WorkTime = 1000L;
+long r2WorkTime = 2000L;
+long r3WorkTime = 2000L;
+// all tasks start with non-finishable state
+MockRequest r1 = createMockRequest(1, 2, 100, 200, false, r1WorkTime, 
false);
+MockRequest r2 = createMockRequest(2, 1, 100, 200, false, r2WorkTime, 
false);
+MockRequest r3 = createMockRequest(3, 3, 50, 200, false, r3WorkTime, 
false);
+
+
+TaskExecutorServiceForTest taskExecutorService =
+  new TaskExecutorServiceForTest(4, 2, 
ShortestJobFirstComparator.class.getName(), true, mockMetrics);
+taskExecutorService.init(new Configuration());
+taskExecutorService.start();
+
+try {
+  String fragmentId1 = r1.getRequestId();
+  Scheduler.SubmissionState submissionState

[hive] branch master updated: HIVE-23376 : Avoid repeated SHA computation in GenericUDTFGetSplits for hive-exec jar (Ramesh Kumar via Rajesh Balamohan)

2020-05-16 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new b53a62f  HIVE-23376 : Avoid repeated SHA computation in 
GenericUDTFGetSplits for hive-exec jar (Ramesh Kumar via Rajesh Balamohan)
b53a62f is described below

commit b53a62ff3b94efd2c9da7b0d805f076e6b1da21e
Author: RAMESH KUMAR THANGARAJAN 
AuthorDate: Sat May 16 20:20:35 2020 -0700

HIVE-23376 : Avoid repeated SHA computation in GenericUDTFGetSplits for 
hive-exec jar (Ramesh Kumar via Rajesh Balamohan)

Signed-off-by: Ashutosh Chauhan 
---
 .../org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java  | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
index 7682e1f..9568096 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
@@ -128,6 +128,7 @@ import com.google.common.base.Preconditions;
 @UDFType(deterministic = false)
 public class GenericUDTFGetSplits extends GenericUDTF {
   private static final Logger LOG = 
LoggerFactory.getLogger(GenericUDTFGetSplits.class);
+  private static String sha = null;
 
   protected transient StringObjectInspector stringOI;
   protected transient IntObjectInspector intOI;
@@ -703,7 +704,9 @@ public class GenericUDTFGetSplits extends GenericUDTF {
 Path destDirPath = destDirStatus.getPath();
 
 Path localFile = new Path(localJarPath);
-String sha = getSha(localFile, conf);
+if (sha == null || !destDirPath.toString().contains(sha)) {
+  sha = getSha(localFile, conf);
+}
 
 String destFileName = localFile.getName();
 



[hive] branch master updated: HIVE-23446 : LLAP: Reduce IPC connection misses to AM for short queries (Rajesh Balamohan via Ashutosh Chauhan)

2020-05-16 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new eeffb0e  HIVE-23446 : LLAP: Reduce IPC connection misses to AM for 
short queries (Rajesh Balamohan via Ashutosh Chauhan)
eeffb0e is described below

commit eeffb0e4e7feab7cea0dba9e7a2b63808b2023f7
Author: Rajesh Balamohan 
AuthorDate: Sat May 16 20:05:27 2020 -0700

HIVE-23446 : LLAP: Reduce IPC connection misses to AM for short queries 
(Rajesh Balamohan via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hive/llap/daemon/impl/ContainerRunnerImpl.java | 82 +-
 .../hadoop/hive/llap/daemon/impl/QueryInfo.java| 25 +++
 .../hadoop/hive/llap/daemon/impl/QueryTracker.java |  5 +-
 .../llap/daemon/impl/TaskExecutorTestHelpers.java  |  2 +-
 4 files changed, 92 insertions(+), 22 deletions(-)

diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java
index 9c73747..a4de3d9 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java
@@ -15,7 +15,6 @@
 package org.apache.hadoop.hive.llap.daemon.impl;
 
 import java.io.IOException;
-import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.nio.ByteBuffer;
 import java.util.Arrays;
@@ -23,10 +22,19 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.function.Supplier;
 
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.RemovalListener;
+import com.google.common.cache.RemovalNotification;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.common.UgiFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -103,6 +111,7 @@ public class ContainerRunnerImpl extends CompositeService 
implements ContainerRu
   private static final Logger LOG = 
LoggerFactory.getLogger(ContainerRunnerImpl.class);
   public static final String THREAD_NAME_FORMAT_PREFIX = "ContainerExecutor ";
 
+  private UgiPool ugiPool;
   private final AMReporter amReporter;
   private final QueryTracker queryTracker;
   private final Scheduler executorService;
@@ -131,6 +140,7 @@ public class ContainerRunnerImpl extends CompositeService 
implements ContainerRu
 super("ContainerRunnerImpl");
 Preconditions.checkState(numExecutors > 0,
 "Invalid number of executors: " + numExecutors + ". Must be > 0");
+this.ugiPool = new UgiPool(numExecutors);
 this.localAddress = localAddress;
 this.localShufflePort = localShufflePort;
 this.amReporter = amReporter;
@@ -270,7 +280,7 @@ public class ContainerRunnerImpl extends CompositeService 
implements ContainerRu
   queryIdentifier, qIdProto.getApplicationIdString(), dagId,
   vertex.getDagName(), vertex.getHiveQueryId(), dagIdentifier,
   vertex.getVertexName(), request.getFragmentNumber(), 
request.getAttemptNumber(),
-  vertex.getUser(), vertex, jobToken, fragmentIdString, tokenInfo, 
amNodeId);
+  vertex.getUser(), vertex, jobToken, fragmentIdString, tokenInfo, 
amNodeId, ugiPool);
 
   // May need to setup localDir for re-localization, which is usually 
setup as Environment.PWD.
   // Used for re-localization, to add the user specified configuration 
(conf_pb_binary_stream)
@@ -593,4 +603,72 @@ public class ContainerRunnerImpl extends CompositeService 
implements ContainerRu
 return executorService.getNumActiveForReporting();
   }
 
+  static class UgiPool {
+// Pool of UGI for a given appTokenIdentifier (AM). Expires after 3 hours 
of last access
+private final Cache> ugiPool =
+CacheBuilder
+.newBuilder().removalListener(new RemovalListener>() {
+  @Override
+  public void onRemoval(
+  RemovalNotification> 
notification) {
+LOG.debug("Removing " + notification.getValue()  + " from 
pool.Pool size: " + ugiPool.size());
+  }
+}).expireAfterAccess(60 * 3, TimeUnit.MINUTES).build();
+
+private final int numExecutors;
+
+public UgiPool(int numExecutors) {
+  this.numExecutors = numExecutors;
+}
+
+/**
+ * Get UGI for a given AM and appToken. It is

[hive] branch master updated: HIVE-23449 : LLAP: Reduce mkdir and config creations in submitWork hotpath (Rajesh Balamohan via Ashutosh Chauhan)

2020-05-16 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 781b7fc  HIVE-23449 : LLAP: Reduce mkdir and config creations in 
submitWork hotpath (Rajesh Balamohan via Ashutosh Chauhan)
781b7fc is described below

commit 781b7fc3e450f5a15e1afa2096189884b772b115
Author: Rajesh Balamohan 
AuthorDate: Sat May 16 09:12:49 2020 -0700

HIVE-23449 : LLAP: Reduce mkdir and config creations in submitWork hotpath 
(Rajesh Balamohan via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java   | 13 +
 .../apache/hadoop/hive/llap/daemon/impl/QueryTracker.java   |  4 ++--
 .../hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java| 13 -
 .../hadoop/hive/llap/shufflehandler/ShuffleHandler.java |  4 
 .../hive/llap/daemon/impl/TaskExecutorTestHelpers.java  |  3 ++-
 .../hive/llap/daemon/impl/TestContainerRunnerImpl.java  |  6 --
 6 files changed, 25 insertions(+), 18 deletions(-)

diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java
index 6a13b55..9c73747 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java
@@ -25,6 +25,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Supplier;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.common.UgiFactory;
@@ -271,23 +272,19 @@ public class ContainerRunnerImpl extends CompositeService 
implements ContainerRu
   vertex.getVertexName(), request.getFragmentNumber(), 
request.getAttemptNumber(),
   vertex.getUser(), vertex, jobToken, fragmentIdString, tokenInfo, 
amNodeId);
 
-  String[] localDirs = fragmentInfo.getLocalDirs();
-  Preconditions.checkNotNull(localDirs);
-  if (LOG.isDebugEnabled()) {
-LOG.debug("Dirs are: " + Arrays.toString(localDirs));
-  }
   // May need to setup localDir for re-localization, which is usually 
setup as Environment.PWD.
   // Used for re-localization, to add the user specified configuration 
(conf_pb_binary_stream)
 
-  Configuration callableConf = new Configuration(getConfig());
+  // Lazy create conf object, as it gets expensive in this codepath.
+  Supplier callableConf = () -> new 
Configuration(getConfig());
   UserGroupInformation fsTaskUgi = fsUgiFactory == null ? null : 
fsUgiFactory.createUgi();
   boolean isGuaranteed = request.hasIsGuaranteed() && 
request.getIsGuaranteed();
 
   // enable the printing of (per daemon) LLAP task queue/run times via 
LLAP_TASK_TIME_SUMMARY
   ConfVars tezSummary = ConfVars.TEZ_EXEC_SUMMARY;
   ConfVars llapTasks = ConfVars.LLAP_TASK_TIME_SUMMARY;
-  boolean addTaskTimes = callableConf.getBoolean(tezSummary.varname, 
tezSummary.defaultBoolVal)
- && callableConf.getBoolean(llapTasks.varname, 
llapTasks.defaultBoolVal);
+  boolean addTaskTimes = getConfig().getBoolean(tezSummary.varname, 
tezSummary.defaultBoolVal)
+ && getConfig().getBoolean(llapTasks.varname, 
llapTasks.defaultBoolVal);
 
   final String llapHost;
   if (UserGroupInformation.isSecurityEnabled()) {
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryTracker.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryTracker.java
index eae8e08..bf4eea0 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryTracker.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryTracker.java
@@ -211,9 +211,9 @@ public class QueryTracker extends AbstractService {
 LOG.debug("Registering request for {} with the ShuffleHandler", 
queryIdentifier);
   }
   if (!vertex.getIsExternalSubmission()) {
+String[] localDirs = (ShuffleHandler.get().isDirWatcherEnabled()) ? 
queryInfo.getLocalDirs() : null;
 ShuffleHandler.get()
-.registerDag(appIdString, dagIdentifier, appToken,
-user, queryInfo.getLocalDirs());
+.registerDag(appIdString, dagIdentifier, appToken, user, 
localDirs);
   }
 
   return queryInfo.registerFragment(
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
index 3619252..bc26dc0 100644
--- 
a/llap-server/s

[hive] branch master updated: HIVE-23375: Introduce TezCounter to track MJ HashTable Load time

2020-05-16 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 31f20c8  HIVE-23375: Introduce TezCounter to track MJ HashTable Load 
time
31f20c8 is described below

commit 31f20c8a29cd75bbf244ce9da5b5dfef37900280
Author: Panos Garefalakis 
AuthorDate: Wed May 6 12:00:35 2020 +0100

HIVE-23375: Introduce TezCounter to track MJ HashTable Load time

Change-Id: I277696aa46b8a6343f53c36f773ee53575f513ac
Signed-off-by: Ashutosh Chauhan 
---
 .../java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java  |  4 
 .../org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java   | 11 +++
 .../vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java | 11 +++
 .../hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java   |  6 +-
 4 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java
index 02cdae0..6681522 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java
@@ -29,6 +29,10 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
  */
 public interface HashTableLoader {
 
+  enum HashTableLoaderCounters {
+HASHTABLE_LOAD_TIME_MS
+  };
+
   void init(ExecMapperContext context, MapredContext mrContext, Configuration 
hconf,
   MapJoinOperator joinOp);
 
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
index 151d1b3..9cb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
@@ -26,7 +26,10 @@ import java.util.Map;
 
 import org.apache.hadoop.hive.llap.LlapDaemonInfo;
 import org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError;
+import org.apache.tez.common.counters.TezCounter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -66,6 +69,7 @@ public class HashTableLoader implements 
org.apache.hadoop.hive.ql.exec.HashTable
   private MapJoinDesc desc;
   private TezContext tezContext;
   private String cacheKey;
+  private TezCounter htLoadCounter;
 
   @Override
   public void init(ExecMapperContext context, MapredContext mrContext, 
Configuration hconf,
@@ -74,6 +78,10 @@ public class HashTableLoader implements 
org.apache.hadoop.hive.ql.exec.HashTable
 this.hconf = hconf;
 this.desc = joinOp.getConf();
 this.cacheKey = joinOp.getCacheKey();
+String counterGroup = HiveConf.getVar(hconf, 
HiveConf.ConfVars.HIVECOUNTERGROUP);
+String vertexName = hconf.get(Operator.CONTEXT_NAME_KEY, "");
+String counterName = 
Utilities.getVertexCounterName(HashTableLoaderCounters.HASHTABLE_LOAD_TIME_MS.name(),
 vertexName);
+this.htLoadCounter = 
tezContext.getTezProcessorContext().getCounters().findCounter(counterGroup, 
counterName);
   }
 
   @Override
@@ -238,6 +246,7 @@ public class HashTableLoader implements 
org.apache.hadoop.hive.ql.exec.HashTable
   cacheKey, tableContainer.getClass().getSimpleName(), pos);
 
 tableContainer.setSerde(keyCtx, valCtx);
+long startTime = System.currentTimeMillis();
 while (kvReader.next()) {
   tableContainer.putRow((Writable) kvReader.getCurrentKey(), 
(Writable) kvReader.getCurrentValue());
   numEntries++;
@@ -258,6 +267,8 @@ public class HashTableLoader implements 
org.apache.hadoop.hive.ql.exec.HashTable
 }
   }
 }
+long delta = System.currentTimeMillis() - startTime;
+htLoadCounter.increment(delta);
 tableContainer.seal();
 mapJoinTables[pos] = tableContainer;
 if (doMemCheck) {
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
index 8d9c546..a011027 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
@@ -23,7 +23,10 @@ import java.util.Map;
 
 import org.apache.hadoop.hive.llap.LlapDaemonInfo;
 import org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhau

[hive] branch master updated: HIVE-23423 : Check of disabling hash aggregation ignores grouping set ( Gopal V via Ashutosh Chauhan)

2020-05-13 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 9f40d7c  HIVE-23423 : Check of disabling hash aggregation ignores 
grouping set ( Gopal V via Ashutosh Chauhan)
9f40d7c is described below

commit 9f40d7cc1d889aa3079f3f494cf810fabe326e44
Author: Gopal V 
AuthorDate: Wed May 13 21:42:29 2020 -0700

HIVE-23423 : Check of disabling hash aggregation ignores grouping set ( 
Gopal V via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hive/ql/exec/vector/VectorGroupByOperator.java |  21 ++-
 .../ql/exec/vector/TestVectorGroupByOperator.java  | 207 +++--
 2 files changed, 209 insertions(+), 19 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
index f104c13..d4d18ef 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
@@ -645,9 +645,26 @@ public class VectorGroupByOperator extends 
Operator
   LOG.debug(String.format("checkHashModeEfficiency: HT:%d RC:%d 
MIN:%d",
   numEntriesHashTable, sumBatchSize, (long)(sumBatchSize * 
minReductionHashAggr)));
 }
-if (numEntriesHashTable > sumBatchSize * minReductionHashAggr) {
+/*
+ * The grouping sets expand the hash sizes by producing intermediate 
keys. 3 grouping sets
+ * of (),(col1),(col1,col2), will turn 10 rows into 30 rows. If the 
col1 has an nDV of 2 and
+ * col2 has nDV of 5, then this turns into a maximum of 1+3+(2*5) or 
14 keys into the
+ * hashtable.
+ * 
+ * So you get 10 rows in and 14 rows out, which is a reduction of ~2x 
vs Streaming mode,
+ * but it is an increase if the grouping-set is not accounted for.
+ * 
+ * For performance, it is definitely better to send 14 rows out to 
shuffle and not 30.
+ * 
+ * Particularly if the same nDVs are repeated for a thousand rows, 
this would send a
+ * thousand rows via streaming to a single reducer which owns the 
empty grouping set,
+ * instead of sending 1 from the hash.
+ * 
+ */
+final int groupingExpansion = (groupingSets != null) ? 
groupingSets.length : 1;
+final long intermediateKeyCount = sumBatchSize * groupingExpansion;
+if (numEntriesHashTable > intermediateKeyCount * minReductionHashAggr) 
{
   flush(true);
-
   changeToStreamingMode();
 }
   }
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
index e8586fc..12df385 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
@@ -54,6 +54,7 @@ import 
org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.AggregationDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.GroupByDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
@@ -111,9 +112,10 @@ public class TestVectorGroupByOperator {
   String column,
   TypeInfo typeInfo) {
 
-ExprNodeDesc inputColumn = buildColumnDesc(ctx, column, typeInfo);
 
-ArrayList params = new ArrayList();
+TypeInfo[] typeInfos = new TypeInfo[] {typeInfo};
+ArrayList params = new ArrayList(1);
+ExprNodeDesc inputColumn = buildColumnDesc(ctx, column, typeInfo);
 params.add(inputColumn);
 
 AggregationDesc agg = new AggregationDesc();
@@ -121,10 +123,7 @@ public class TestVectorGroupByOperator {
 agg.setMode(mode);
 agg.setParameters(params);
 
-TypeInfo[] typeInfos = new TypeInfo[] { typeInfo };
-
 final GenericUDAFEvaluator evaluator;
-PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) 
typeInfo).getPrimitiveCategory();
 try {
   switch (aggregate) {
   case "count":
@@ -232,14 +231,13 @@ public class TestVectorGroupByOperator {
 return new Pair(desc, vectorDesc);
   }
 
-
   private static Pair buildKeyGroupByDesc(
   VectorizationContext ctx,
   String aggregate,
   String column,
   TypeInfo dataTypeInfo,
-  String key,
-  TypeInfo keyTypeInfo) {
+  String[] keys,
+  TypeInfo[] keyTypeInfos) {
 
 Pair pair =
 buildGroupByDescType(ctx, aggregate, 
GenericU

[hive] branch master updated: HIVE-23451 : FileSinkOperator calls deleteOnExit (hdfs call) twice for the same file ( Rajesh Balamohan via Ashutosh Chauhan )

2020-05-13 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new b63c35a  HIVE-23451 : FileSinkOperator calls deleteOnExit (hdfs call) 
twice for the same file ( Rajesh Balamohan via Ashutosh Chauhan )
b63c35a is described below

commit b63c35a10dedc390e7be832cc7ebcf1bac030f43
Author: Rajesh Balamohan 
AuthorDate: Wed May 13 15:44:58 2020 -0700

HIVE-23451 : FileSinkOperator calls deleteOnExit (hdfs call) twice for the 
same file ( Rajesh Balamohan via Ashutosh Chauhan )

Signed-off-by: Ashutosh Chauhan 
---
 .../hadoop/hive/ql/exec/FileSinkOperator.java  |   5 -
 .../clientpositive/llap/orc_llap_counters.q.out|  84 -
 .../clientpositive/llap/orc_llap_counters1.q.out   |   4 +-
 .../clientpositive/llap/orc_ppd_basic.q.out| 102 ++---
 .../llap/orc_ppd_schema_evol_3a.q.out  | 100 ++--
 5 files changed, 145 insertions(+), 150 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
index 082f1cb..1b84ba2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
@@ -791,11 +791,6 @@ public class FileSinkOperator extends 
TerminalOperator implements
 filesIdx++;
   }
   assert filesIdx == numFiles;
-
-  // in recent hadoop versions, use deleteOnExit to clean tmp files.
-  if (isNativeTable() && fs != null && fsp != null && !conf.isMmTable() && 
!conf.isDirectInsert()) {
-autoDelete = fs.deleteOnExit(fsp.outPaths[0]);
-  }
 } catch (Exception e) {
   throw new HiveException(e);
 }
diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out 
b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
index f8190af..a5fb0da 100644
--- a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
@@ -257,7 +257,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ###
 Stage-1 FILE SYSTEM COUNTERS:
HDFS_BYTES_READ: 16680
HDFS_BYTES_WRITTEN: 104
-   HDFS_READ_OPS: 6
+   HDFS_READ_OPS: 5
HDFS_LARGE_READ_OPS: 0
HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
@@ -296,7 +296,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ###
 Stage-1 FILE SYSTEM COUNTERS:
HDFS_BYTES_READ: 0
HDFS_BYTES_WRITTEN: 101
-   HDFS_READ_OPS: 3
+   HDFS_READ_OPS: 2
HDFS_LARGE_READ_OPS: 0
HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
@@ -319,7 +319,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ###
 Stage-1 FILE SYSTEM COUNTERS:
HDFS_BYTES_READ: 1047
HDFS_BYTES_WRITTEN: 101
-   HDFS_READ_OPS: 4
+   HDFS_READ_OPS: 3
HDFS_LARGE_READ_OPS: 0
HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
@@ -360,7 +360,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ###
 Stage-1 FILE SYSTEM COUNTERS:
HDFS_BYTES_READ: 0
HDFS_BYTES_WRITTEN: 102
-   HDFS_READ_OPS: 3
+   HDFS_READ_OPS: 2
HDFS_LARGE_READ_OPS: 0
HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
@@ -399,7 +399,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ###
 Stage-1 FILE SYSTEM COUNTERS:
HDFS_BYTES_READ: 0
HDFS_BYTES_WRITTEN: 102
-   HDFS_READ_OPS: 3
+   HDFS_READ_OPS: 2
HDFS_LARGE_READ_OPS: 0
HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
@@ -438,7 +438,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ###
 Stage-1 FILE SYSTEM COUNTERS:
HDFS_BYTES_READ: 0
HDFS_BYTES_WRITTEN: 102
-   HDFS_READ_OPS: 3
+   HDFS_READ_OPS: 2
HDFS_LARGE_READ_OPS: 0
HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
@@ -477,7 +477,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ###
 Stage-1 FILE SYSTEM COUNTERS:
HDFS_BYTES_READ: 0
HDFS_BYTES_WRITTEN: 101
-   HDFS_READ_OPS: 3
+   HDFS_READ_OPS: 2
HDFS_LARGE_READ_OPS: 0
HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
@@ -516,7 +516,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ###
 Stage-1 FILE SYSTEM COUNTERS:
HDFS_BYTES_READ: 0
HDFS_BYTES_WRITTEN: 102
-   HDFS_READ_OPS: 3
+   HDFS_READ_OPS: 2
HDFS_LARGE_READ_OPS: 0
HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
@@ -555,7 +555,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ###
 Stage-1 FILE SYSTEM COUNTERS:
HDFS_BYTES_READ: 0
HDFS_BYTES_WRITTEN: 102
-   HDFS_READ_OPS: 3
+   HDFS_READ_OPS: 2
HDFS_LARGE_READ_OPS: 0
HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
@@ -594,7 +594,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ###
 Stage-1 FILE SYSTEM COUNTERS:
HDFS_BYTES_READ: 0
HDFS_BYTES_WRITTEN: 104
-   HDFS_READ_OPS: 3
+   HDFS_READ_OPS: 2
HDFS_LARGE_READ_OPS: 0
HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
@@ -633,7 +633,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ###
 Stage-1 FILE SYSTEM COUNTERS:
HDFS_BYTES_READ: 0
HDFS_BYTES_W

[hive] branch master updated: HIVE-23445 : Remove mapreduce.workflow.* configs (Ashutosh Chauhan via Gopal V)

2020-05-12 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 57c1593  HIVE-23445 : Remove mapreduce.workflow.* configs (Ashutosh 
Chauhan via Gopal V)
57c1593 is described below

commit 57c15936d7a69e215c986d62aa959e70cb352da4
Author: Ashutosh Chauhan 
AuthorDate: Mon May 11 21:33:48 2020 -0700

HIVE-23445 : Remove mapreduce.workflow.* configs (Ashutosh Chauhan via 
Gopal V)
---
 .../java/org/apache/hadoop/hive/ql/Compiler.java   |  3 ---
 .../java/org/apache/hadoop/hive/ql/Executor.java   |  2 --
 .../org/apache/hadoop/hive/ql/exec/DagUtils.java   |  4 +---
 .../org/apache/hadoop/hive/ql/exec/Utilities.java  | 22 --
 4 files changed, 1 insertion(+), 30 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Compiler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/Compiler.java
index aa42fd5..b5e093d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Compiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Compiler.java
@@ -339,9 +339,6 @@ public class Compiler {
 plan.setOptimizedCBOPlan(context.getCalcitePlan());
 plan.setOptimizedQueryString(context.getOptimizedSql());
 
-driverContext.getConf().set("mapreduce.workflow.id", "hive_" + 
driverContext.getQueryId());
-driverContext.getConf().set("mapreduce.workflow.name", 
driverContext.getQueryString());
-
 // initialize FetchTask right here
 if (plan.getFetchTask() != null) {
   plan.getFetchTask().initialize(driverContext.getQueryState(), plan, 
null, context);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Executor.java 
b/ql/src/java/org/apache/hadoop/hive/ql/Executor.java
index e9909a9..7af3cfb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Executor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Executor.java
@@ -346,8 +346,6 @@ public class Executor {
   if (noName) {
 driverContext.getConf().set(MRJobConfig.JOB_NAME, jobName + " (" + 
task.getId() + ")");
   }
-  driverContext.getConf().set(DagUtils.MAPREDUCE_WORKFLOW_NODE_NAME, 
task.getId());
-  Utilities.setWorkflowAdjacencies(driverContext.getConf(), 
driverContext.getPlan());
   taskQueue.incCurJobNo(1);
   CONSOLE.printInfo("Launching Job " + taskQueue.getCurJobNo() + " out of 
" + jobCount);
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DagUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/DagUtils.java
index 480d679..48d8dba 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DagUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DagUtils.java
@@ -26,14 +26,12 @@ import com.google.common.base.Strings;
 
 public class DagUtils {
 
-  public static final String MAPREDUCE_WORKFLOW_NODE_NAME = 
"mapreduce.workflow.node.name";
-
   public static String getQueryName(Configuration conf) {
 String name = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYNAME);
 if (Strings.isNullOrEmpty(name)) {
   return conf.get(MRJobConfig.JOB_NAME);
 } else {
-  return name + " (" + conf.get(DagUtils.MAPREDUCE_WORKFLOW_NODE_NAME) + 
")";
+  return name;
 }
   }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index ace24be..0e4ce78 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -543,28 +543,6 @@ public final class Utilities {
 }
   }
 
-  public static void setWorkflowAdjacencies(Configuration conf, QueryPlan 
plan) {
-try {
-  Graph stageGraph = plan.getQueryPlan().getStageGraph();
-  if (stageGraph == null) {
-return;
-  }
-  List adjList = stageGraph.getAdjacencyList();
-  if (adjList == null) {
-return;
-  }
-  for (Adjacency adj : adjList) {
-List children = adj.getChildren();
-if (CollectionUtils.isEmpty(children)) {
-  return;
-}
-conf.setStrings("mapreduce.workflow.adjacency." + adj.getNode(),
-children.toArray(new String[0]));
-  }
-} catch (IOException e) {
-}
-  }
-
   public static List getFieldSchemaString(List fl) {
 if (fl == null) {
   return null;



[hive] branch master updated: HIVE-23429 : LLAP: Optimize retrieving queryId details in LlapTaskCommunicator (Rajesh Balamohan via Ashutosh Chauhan)

2020-05-11 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 7e77f25  HIVE-23429 : LLAP: Optimize retrieving queryId details in 
LlapTaskCommunicator (Rajesh Balamohan via Ashutosh Chauhan)
7e77f25 is described below

commit 7e77f25f1361d0fff8b97e6fc45052bc280e5f7b
Author: Rajesh Balamohan 
AuthorDate: Mon May 11 07:24:04 2020 -0700

HIVE-23429 : LLAP: Optimize retrieving queryId details in 
LlapTaskCommunicator (Rajesh Balamohan via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hive/llap/tezplugins/LlapTaskCommunicator.java | 18 ++
 .../org/apache/hadoop/hive/ql/exec/tez/TezTask.java|  3 +++
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git 
a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java
 
b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java
index b168f76..36a2d6b 100644
--- 
a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java
+++ 
b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java
@@ -94,12 +94,14 @@ import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.api.UserPayload;
 import org.apache.tez.dag.api.event.VertexStateUpdate;
 import org.apache.tez.dag.app.TezTaskCommunicatorImpl;
+import org.apache.tez.dag.app.dag.DAG;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.runtime.api.TaskFailureType;
 import org.apache.tez.runtime.api.impl.TaskSpec;
 import org.apache.tez.runtime.api.impl.TezHeartbeatRequest;
 import org.apache.tez.runtime.api.impl.TezHeartbeatResponse;
 import org.apache.tez.serviceplugins.api.ContainerEndReason;
+import org.apache.tez.serviceplugins.api.DagInfo;
 import org.apache.tez.serviceplugins.api.ServicePluginErrorDefaults;
 import org.apache.tez.serviceplugins.api.TaskAttemptEndReason;
 import org.apache.tez.serviceplugins.api.TaskCommunicatorContext;
@@ -398,11 +400,9 @@ public class LlapTaskCommunicator extends 
TezTaskCommunicatorImpl {
 credentialsChanged, priority);
 int dagId = 
taskSpec.getTaskAttemptID().getTaskID().getVertexID().getDAGId().getId();
 if (currentQueryIdentifierProto == null || (dagId != 
currentQueryIdentifierProto.getDagIndex())) {
-  // TODO HiveQueryId extraction by parsing the Processor payload is ugly. 
This can be improved
-  // once TEZ-2672 is fixed.
-  String hiveQueryId;
+  String hiveQueryId = extractQueryIdFromContext();
   try {
-hiveQueryId = extractQueryId(taskSpec);
+hiveQueryId = (hiveQueryId == null) ? extractQueryId(taskSpec) : 
hiveQueryId;
   } catch (IOException e) {
 throw new RuntimeException("Failed to extract query id from task spec: 
" + taskSpec, e);
   }
@@ -820,12 +820,22 @@ public class LlapTaskCommunicator extends 
TezTaskCommunicatorImpl {
 // is likely already happening.
   }
 
+  // Needed for GenericUDTFGetSplits, where TaskSpecs are generated
   private String extractQueryId(TaskSpec taskSpec) throws IOException {
 UserPayload processorPayload = 
taskSpec.getProcessorDescriptor().getUserPayload();
 Configuration conf = TezUtils.createConfFromUserPayload(processorPayload);
 return HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID);
   }
 
+  private String extractQueryIdFromContext() {
+//TODO: Remove following instance of check, When TEZ-2672 exposes getConf 
from DagInfo
+DagInfo dagInfo = getContext().getCurrentDagInfo();
+if (dagInfo instanceof DAG) {
+  return ((DAG)dagInfo).getConf().get(ConfVars.HIVEQUERYID.varname);
+}
+return null;
+  }
+
   private SubmitWorkRequestProto constructSubmitWorkRequest(ContainerId 
containerId,
 TaskSpec taskSpec,
 
FragmentRuntimeInfo fragmentRuntimeInfo,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
index 854bc89..b1bf2f8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
@@ -424,6 +424,9 @@ public class TezTask extends Task {
 .put("description", ctx.getCmd());
 String dagInfo = json.toString();
 
+String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID);
+dag.setConf(HiveConf.ConfVars.HIVEQUERYID.varname, queryId);
+
 if (LOG.isDebugEnabled()) {
   LOG.debug("DagInfo: " + dagInfo);
 }



[hive] branch master updated: HIVE-22320 : Cluster and fs type settings can be replaced with a single minicluster setting in CliConfigs (Laszlo Bodor via Miklos Gergely)

2020-05-10 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new b0dc673  HIVE-22320 : Cluster and fs type settings can be replaced 
with a single minicluster setting in CliConfigs (Laszlo Bodor via Miklos 
Gergely)
b0dc673 is described below

commit b0dc6734cdc0945ac9b3ebff800a63bccc4a7e61
Author: Laszlo Bodor 
AuthorDate: Sun May 10 21:16:35 2020 -0700

HIVE-22320 : Cluster and fs type settings can be replaced with a single 
minicluster setting in CliConfigs (Laszlo Bodor via Miklos Gergely)

Signed-off-by: Ashutosh Chauhan 
---
 .../apache/hadoop/hive/cli/control/AbstractCliConfig.java|  1 +
 .../java/org/apache/hadoop/hive/cli/control/CliConfigs.java  | 12 ++--
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git 
a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/AbstractCliConfig.java
 
b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/AbstractCliConfig.java
index 712af82..353a4aa 100644
--- 
a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/AbstractCliConfig.java
+++ 
b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/AbstractCliConfig.java
@@ -368,6 +368,7 @@ public abstract class AbstractCliConfig {
 if (clusterType == null) {
   throw new RuntimeException("clustertype cant be null");
 }
+this.setFsType(clusterType.getDefaultFsType());
   }
 
   protected FsType getFsType() {
diff --git 
a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java 
b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
index 1ecd0d1..473a9f8 100644
--- 
a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
+++ 
b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
@@ -147,7 +147,6 @@ public class CliConfigs {
 
 setHiveConfDir("data/conf/tez");
 setClusterType(MiniClusterType.TEZ);
-setFsType(QTestMiniClusters.FsType.HDFS);
   } catch (Exception e) {
 throw new RuntimeException("can't construct cliconfig", e);
   }
@@ -196,7 +195,6 @@ public class CliConfigs {
 setCleanupScript("q_test_cleanup_druid.sql");
 setHiveConfDir("data/conf/llap");
 setClusterType(MiniClusterType.DRUID);
-setFsType(QTestMiniClusters.FsType.HDFS);
   } catch (Exception e) {
 throw new RuntimeException("can't construct cliconfig", e);
   }
@@ -216,7 +214,6 @@ public class CliConfigs {
 setCleanupScript("q_test_cleanup_druid.sql");
 setHiveConfDir("data/conf/llap");
 setClusterType(MiniClusterType.DRUID_KAFKA);
-setFsType(QTestMiniClusters.FsType.HDFS);
   } catch (Exception e) {
 throw new RuntimeException("can't construct cliconfig", e);
   }
@@ -233,7 +230,6 @@ public class CliConfigs {
 setLogDir("itests/qtest/target/tmp/log");
 setHiveConfDir("data/conf/llap");
 setClusterType(MiniClusterType.KAFKA);
-setFsType(QTestMiniClusters.FsType.HDFS);
   } catch (Exception e) {
 throw new RuntimeException("can't construct cliconfig", e);
   }
@@ -265,7 +261,6 @@ public class CliConfigs {
 
 setHiveConfDir("data/conf/llap");
 setClusterType(MiniClusterType.LLAP_LOCAL);
-setFsType(QTestMiniClusters.FsType.LOCAL);
   } catch (Exception e) {
 throw new RuntimeException("can't construct cliconfig", e);
   }
@@ -288,7 +283,7 @@ public class CliConfigs {
 
 
 setClusterType(MiniClusterType.MR);
-setFsType(QTestMiniClusters.FsType.ENCRYPTED_HDFS);
+setFsType(QTestMiniClusters.FsType.ENCRYPTED_HDFS); // override 
default FsType.HDFS
 if (getClusterType() == MiniClusterType.TEZ) {
   setHiveConfDir("data/conf/tez");
 } else {
@@ -723,7 +718,7 @@ public class CliConfigs {
 setCleanupScript("q_test_cleanup_src.sql");
 
 setClusterType(MiniClusterType.MR);
-setFsType(QTestMiniClusters.FsType.ERASURE_CODED_HDFS);
+setFsType(QTestMiniClusters.FsType.ERASURE_CODED_HDFS); // override 
default FsType.HDFS
 setHiveConfDir(getClusterType());
   } catch (Exception e) {
 throw new RuntimeException("can't construct cliconfig", e);
@@ -766,7 +761,6 @@ public class CliConfigs {
 setCleanupScript("q_test_cleanup_druid.sql");
 setHiveConfDir("data/conf/llap");
 setClusterType(MiniClusterType.DRUID_LOCAL);
-setFsType(QTestMiniClusters.FsType.LOCAL);
   } catch (Exception e) {
 throw new RuntimeException("can't construct cliconfig", e);
 

[hive] branch master updated: HIVE-22343 : Fix incorrect spelling of 'artifectId' in pom.xml ( ice bai via Ashutosh Chauhan)

2020-05-10 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 6491151  HIVE-22343 : Fix incorrect spelling of 'artifectId' in 
pom.xml ( ice bai via Ashutosh Chauhan)
6491151 is described below

commit 64911519f1eefe700bed0e9368743487c0df751e
Author: Ashutosh Chauhan 
AuthorDate: Sun May 10 18:07:59 2020 -0700

HIVE-22343 : Fix incorrect spelling of 'artifectId' in pom.xml ( ice bai 
via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 beeline/pom.xml   | 2 +-
 cli/pom.xml   | 2 +-
 common/pom.xml| 2 +-
 contrib/pom.xml   | 2 +-
 druid-handler/pom.xml | 2 +-
 hbase-handler/pom.xml | 2 +-
 hcatalog/core/pom.xml | 2 +-
 hcatalog/hcatalog-pig-adapter/pom.xml | 2 +-
 hcatalog/server-extensions/pom.xml| 2 +-
 hcatalog/streaming/pom.xml| 4 ++--
 hcatalog/webhcat/java-client/pom.xml  | 2 +-
 itests/hcatalog-unit/pom.xml  | 2 +-
 itests/hive-unit-hadoop2/pom.xml  | 2 +-
 itests/qtest-accumulo/pom.xml | 2 +-
 itests/qtest-spark/pom.xml| 2 +-
 itests/qtest/pom.xml  | 2 +-
 itests/test-serde/pom.xml | 2 +-
 jdbc/pom.xml  | 2 +-
 metastore/pom.xml | 4 ++--
 packaging/pom.xml | 2 +-
 pom.xml   | 8 
 ql/pom.xml| 4 ++--
 serde/pom.xml | 2 +-
 service/pom.xml   | 2 +-
 shims/0.23/pom.xml| 2 +-
 shims/aggregator/pom.xml  | 2 +-
 shims/common/pom.xml  | 2 +-
 shims/scheduler/pom.xml   | 2 +-
 standalone-metastore/metastore-common/pom.xml | 2 +-
 streaming/pom.xml | 2 +-
 testutils/pom.xml | 2 +-
 testutils/ptest2/pom.xml  | 6 +++---
 upgrade-acid/pre-upgrade/pom.xml  | 2 +-
 vector-code-gen/pom.xml   | 2 +-
 34 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/beeline/pom.xml b/beeline/pom.xml
index bb627f2..5ba92e3 100644
--- a/beeline/pom.xml
+++ b/beeline/pom.xml
@@ -33,7 +33,7 @@
   
 
   
-
+
 
 
   org.apache.hive
diff --git a/cli/pom.xml b/cli/pom.xml
index eca0282..7dca2dd 100644
--- a/cli/pom.xml
+++ b/cli/pom.xml
@@ -32,7 +32,7 @@
   
 
   
-
+
 
 
   org.apache.hive
diff --git a/common/pom.xml b/common/pom.xml
index 74586e5..5cae80c 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -32,7 +32,7 @@
   
 
   
-
+
 
 
   org.apache.hive
diff --git a/contrib/pom.xml b/contrib/pom.xml
index 4e28cb5..577130e 100644
--- a/contrib/pom.xml
+++ b/contrib/pom.xml
@@ -32,7 +32,7 @@
   
 
   
-
+
 
 
   org.apache.hive
diff --git a/druid-handler/pom.xml b/druid-handler/pom.xml
index e6ca298..4b99cc3 100644
--- a/druid-handler/pom.xml
+++ b/druid-handler/pom.xml
@@ -33,7 +33,7 @@
   
 
   
-
+
 
 
   com.fasterxml.jackson.dataformat
diff --git a/hbase-handler/pom.xml b/hbase-handler/pom.xml
index 4c3c8ef..2528697 100644
--- a/hbase-handler/pom.xml
+++ b/hbase-handler/pom.xml
@@ -32,7 +32,7 @@
   
 
   
-
+
 
 
   org.apache.hive
diff --git a/hcatalog/core/pom.xml b/hcatalog/core/pom.xml
index 920547e..9227199 100644
--- a/hcatalog/core/pom.xml
+++ b/hcatalog/core/pom.xml
@@ -38,7 +38,7 @@
   
 
   
-
+
 
 
   org.apache.hive
diff --git a/hcatalog/hcatalog-pig-adapter/pom.xml 
b/hcatalog/hcatalog-pig-adapter/pom.xml
index 7c321be..aba63af 100644
--- a/hcatalog/hcatalog-pig-adapter/pom.xml
+++ b/hcatalog/hcatalog-pig-adapter/pom.xml
@@ -38,7 +38,7 @@
   
 
   
-
+
 
 
   org.apache.hive.hcatalog
diff --git a/hcatalog/server-extensions/pom.xml 
b/hcatalog/server-extensions/pom.xml
index a6807fb..73a462b 100644
--- a/hcatalog/server-extensions/pom.xml
+++ b/hcatalog/server-extensions/pom.xml
@@ -38,7 +38,7 @@
   
 
   
-
+
 
 
   org.apache.hive.hcatalog
diff --git a/hcatalog/streaming/pom.xml b/hcatalog/streaming/pom.xml
index af9dc59..99af4e6 100644
--- a/hcatalog/streaming/pom.xml
+++ b/hcatalog/streaming/pom.xml
@@ -33,7 +33,7 @@
   
 
   
-
+
 
 
   org.apache.hive
@@ -137,7 +137,7 @@
 
 
 
-  
+  
   
 org.apache.maven.plugins
 maven-jar-plugin
diff --git a/hcatalog/webhcat/java-client/pom.xml 
b/hcatalog

[hive] branch master updated: HIVE-22933 : Allow connecting kerberos-enabled Hive to connect to a non-kerberos druid cluster ( Nishant Bangarwa via Slim Bouguerra)

2020-05-10 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 75f7762  HIVE-22933 : Allow connecting kerberos-enabled Hive to 
connect to a non-kerberos druid cluster ( Nishant Bangarwa via Slim Bouguerra)
75f7762 is described below

commit 75f77621b52218f4781a08441d4c3f7be572e299
Author: Nishant Bangarwa 
AuthorDate: Sun May 10 16:46:38 2020 -0700

HIVE-22933 : Allow connecting kerberos-enabled Hive to connect to a 
non-kerberos druid cluster ( Nishant Bangarwa via Slim Bouguerra)

Signed-off-by: Ashutosh Chauhan 
---
 common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 2 ++
 .../java/org/apache/hadoop/hive/druid/DruidStorageHandler.java| 8 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 60ae06a..4f2ea9a 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3033,6 +3033,8 @@ public class HiveConf extends Configuration {
 "Wait time in ms default to 30 seconds."
 ),
 HIVE_DRUID_BITMAP_FACTORY_TYPE("hive.druid.bitmap.type", "roaring", new 
PatternSet("roaring", "concise"), "Coding algorithm use to encode the bitmaps"),
+HIVE_DRUID_KERBEROS_ENABLE("hive.druid.kerberos.enable", true,
+"Enable/Disable Kerberos authentication explicitly while connecting to 
a druid cluster."),
 // For HBase storage handler
 HIVE_HBASE_WAL_ENABLED("hive.hbase.wal.enabled", true,
 "Whether writes to HBase should be forced to the write-ahead log. \n" +
diff --git 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java
index beaf249..36b2cdf 100644
--- 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java
+++ 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java
@@ -86,6 +86,7 @@ import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
 import org.apache.hadoop.hive.ql.metadata.StorageHandlerInfo;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
@@ -760,7 +761,8 @@ import static 
org.apache.hadoop.hive.druid.DruidStorageHandlerUtils.JSON_MAPPER;
   }
 
   @Override public void configureJobConf(TableDesc tableDesc, JobConf jobConf) 
{
-if (UserGroupInformation.isSecurityEnabled()) {
+final boolean kerberosEnabled = HiveConf.getBoolVar(getConf(), 
HiveConf.ConfVars.HIVE_DRUID_KERBEROS_ENABLE);
+if (kerberosEnabled && UserGroupInformation.isSecurityEnabled()) {
   // AM can not do Kerberos Auth so will do the input split generation in 
the HS2
   LOG.debug("Setting {} to {} to enable split generation on HS2",
   HiveConf.ConfVars.HIVE_AM_SPLIT_GENERATION.toString(),
@@ -928,7 +930,9 @@ import static 
org.apache.hadoop.hive.druid.DruidStorageHandlerUtils.JSON_MAPPER;
 .withNumConnections(numConnection)
 .withReadTimeout(new Period(readTimeout).toStandardDuration())
 .build(), lifecycle);
-if (UserGroupInformation.isSecurityEnabled()) {
+final boolean kerberosEnabled =
+HiveConf.getBoolVar(SessionState.getSessionConf(), 
HiveConf.ConfVars.HIVE_DRUID_KERBEROS_ENABLE);
+if (kerberosEnabled && UserGroupInformation.isSecurityEnabled()) {
   LOG.info("building Kerberos Http Client");
   return new KerberosHttpClient(httpClient);
 }



[hive] branch master updated: HIVE-22419 : Improve Messages Emitted From HiveMetaStoreClient (David Mollitor via Ashutosh Chauhan)

2020-05-10 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 19e67cd  HIVE-22419 : Improve Messages Emitted From 
HiveMetaStoreClient (David Mollitor via Ashutosh Chauhan)
19e67cd is described below

commit 19e67cd54e2f980be901bed4e6d21ce002ebc9e1
Author: David Mollitor 
AuthorDate: Sat May 9 23:17:50 2020 -0700

HIVE-22419 : Improve Messages Emitted From HiveMetaStoreClient (David 
Mollitor via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hadoop/hive/metastore/HiveMetaStoreClient.java | 54 --
 .../hadoop/hive/metastore/utils/LogUtils.java  |  9 
 2 files changed, 29 insertions(+), 34 deletions(-)

diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
index 1ec5839..64d3833 100644
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
+++ 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
@@ -70,7 +70,6 @@ import org.apache.hadoop.hive.metastore.utils.FilterUtils;
 import org.apache.hadoop.hive.metastore.utils.JavaUtils;
 import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
 import org.apache.hadoop.hive.metastore.utils.SecurityUtils;
-import org.apache.hadoop.hive.metastore.utils.LogUtils;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.StringUtils;
@@ -440,8 +439,8 @@ public class HiveMetaStoreClient implements 
IMetaStoreClient, AutoCloseable {
   String newVar = MetastoreConf.getAsString(conf, oneVar);
   if (oldVar == null ||
   (oneVar.isCaseSensitive() ? !oldVar.equals(newVar) : 
!oldVar.equalsIgnoreCase(newVar))) {
-LOG.info("Mestastore configuration " + oneVar.toString() +
-" changed from " + oldVar + " to " + newVar);
+LOG.info("Mestastore configuration {} changed from {} to {}",
+oneVar, oldVar, newVar);
 compatible = false;
   }
 }
@@ -457,8 +456,8 @@ public class HiveMetaStoreClient implements 
IMetaStoreClient, AutoCloseable {
   public void reconnect() throws MetaException {
 if (localMetaStore) {
   // For direct DB connections we don't yet support reestablishing 
connections.
-  throw new MetaException("For direct MetaStore DB connections, we don't 
support retries" +
-  " at the client level.");
+  throw new MetaException("Retries for direct MetaStore DB connections "
+  + "are not supported by this client");
 } else {
   close();
 
@@ -593,10 +592,13 @@ public class HiveMetaStoreClient implements 
IMetaStoreClient, AutoCloseable {
   // Create an SSL socket and connect
   transport = SecurityUtils.getSSLSocket(store.getHost(), 
store.getPort(), clientSocketTimeout,
   trustStorePath, trustStorePassword);
-  LOG.debug("Opened an SSL connection to metastore, current 
connections: " + connCount.incrementAndGet());
+  final int newCount = connCount.incrementAndGet();
+  LOG.debug(
+  "Opened an SSL connection to metastore, current connections: 
{}",
+  newCount);
   if (LOG.isTraceEnabled()) {
-LOG.trace("", new LogUtils.StackTraceLogger("METASTORE SSL 
CONNECTION TRACE - open - " +
-System.identityHashCode(this)));
+LOG.trace("METASTORE SSL CONNECTION TRACE - open [{}]",
+System.identityHashCode(this), new Exception());
   }
 } catch (IOException e) {
   throw new IllegalArgumentException(e);
@@ -632,7 +634,7 @@ public class HiveMetaStoreClient implements 
IMetaStoreClient, AutoCloseable {
   transport = MetaStorePlainSaslHelper.getPlainTransport(userName, 
passwd, transport);
 } catch (IOException sasle) {
   // IOException covers SaslException
-  LOG.error("Couldn't create client transport", sasle);
+  LOG.error("Could not create client transport", sasle);
   throw new MetaException(sasle.toString());
 }
   } else if (useSasl) {
@@ -665,7 +667,7 @@ public class HiveMetaStoreClient implements 
IMetaStoreClient, AutoCloseable {
 transport, MetaStoreUtils.getMetaStoreSaslProperties(conf, 
useSSL));
   }
 } catch (I

[hive] branch master updated: HIVE-22423 : Improve Logging In HadoopThriftAuthBridge (David Mollitor via Ashutosh Chauhan)

2020-05-10 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 28f6463  HIVE-22423 : Improve Logging In HadoopThriftAuthBridge (David 
Mollitor via Ashutosh Chauhan)
28f6463 is described below

commit 28f6463ed5bc899d7fd554e4e977034411347b27
Author: David Mollitor 
AuthorDate: Sat May 9 23:00:38 2020 -0700

HIVE-22423 : Improve Logging In HadoopThriftAuthBridge (David Mollitor via 
Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../metastore/security/HadoopThriftAuthBridge.java | 59 +++---
 1 file changed, 29 insertions(+), 30 deletions(-)

diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/security/HadoopThriftAuthBridge.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/security/HadoopThriftAuthBridge.java
index 299bdd2..53ed214 100644
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/security/HadoopThriftAuthBridge.java
+++ 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/security/HadoopThriftAuthBridge.java
@@ -97,10 +97,12 @@ public abstract class HadoopThriftAuthBridge {
   throw new IllegalStateException("Unable to get current login user: " + 
e, e);
 }
 if (loginUserHasCurrentAuthMethod(ugi, authMethod)) {
-  LOG.debug("Not setting UGI conf as passed-in authMethod of " + 
authMethod + " = current.");
+  LOG.debug("Not setting UGI conf as passed-in authMethod of {} = current",
+  authMethod);
   return new Client();
 } else {
-  LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + 
" != current.");
+  LOG.debug("Setting UGI conf as passed-in authMethod of {} != current",
+  authMethod);
   Configuration conf = new Configuration();
   conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod);
   UserGroupInformation.setConfiguration(conf);
@@ -150,10 +152,12 @@ public abstract class HadoopThriftAuthBridge {
   throw new IllegalStateException("Unable to get current user: " + e, e);
 }
 if (loginUserHasCurrentAuthMethod(ugi, authMethod)) {
-  LOG.debug("Not setting UGI conf as passed-in authMethod of " + 
authMethod + " = current.");
+  LOG.debug("Not setting UGI conf as passed-in authMethod of {} = current",
+  authMethod);
   return ugi;
 } else {
-  LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + 
" != current.");
+  LOG.debug("Setting UGI conf as passed-in authMethod of {} != current",
+  authMethod);
   Configuration conf = new Configuration();
   conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod);
   UserGroupInformation.setConfiguration(conf);
@@ -177,7 +181,7 @@ public abstract class HadoopThriftAuthBridge {
   throw new IllegalArgumentException("Invalid attribute value for " +
   HADOOP_SECURITY_AUTHENTICATION + " of " + sAuthMethod, iae);
 }
-LOG.debug("Current authMethod = " + ugi.getAuthenticationMethod());
+LOG.debug("Current authMethod = {}", ugi.getAuthenticationMethod());
 return ugi.getAuthenticationMethod().equals(authMethod);
   }
 
@@ -283,22 +287,16 @@ public abstract class HadoopThriftAuthBridge {
   }
 }
 if (nc != null) {
-  if (LOG.isDebugEnabled()) {
-LOG.debug("SASL client callback: setting username: " + userName);
-  }
+  LOG.debug("SASL client callback: setting username: {}", userName);
   nc.setName(userName);
 }
 if (pc != null) {
-  if (LOG.isDebugEnabled()) {
-LOG.debug("SASL client callback: setting userPassword");
-  }
+  LOG.debug("SASL client callback: setting userPassword");
   pc.setPassword(userPassword);
 }
 if (rc != null) {
-  if (LOG.isDebugEnabled()) {
-LOG.debug("SASL client callback: setting realm: "
-+ rc.getDefaultText());
-  }
+  LOG.debug("SASL client callback: setting realm: {}",
+  rc.getDefaultText());
   rc.setText(rc.getDefaultText());
 }
   }
@@ -343,14 +341,15 @@ public abstract class HadoopThriftAuthBridge {
   }
   if (clientConf == null || clientConf.isEmpty()) {
 // Don't bust existing setups.
-LOG.warn("Client-facing principal not set. Using server-side setting: 
" + principalConf);
+LOG.warn("Client-facing principal not set. Using server-side se

[hive] branch master updated: HIVE-22437 : LLAP Metadata cache NPE on locking metadata. (Slim Bouguerra via Ashutosh Chauhan)

2020-05-09 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 6664208  HIVE-22437 : LLAP Metadata cache NPE on locking metadata. 
(Slim Bouguerra via Ashutosh Chauhan)
6664208 is described below

commit 6664208992bd8ecc788ee0dcd0ddd8e6b87582b0
Author: Slim Bouguerra 
AuthorDate: Sat May 9 22:55:47 2020 -0700

HIVE-22437 : LLAP Metadata cache NPE on locking metadata. (Slim Bouguerra 
via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hive/llap/io/metadata/MetadataCache.java   | 16 +++---
 .../hive/llap/cache/TestOrcMetadataCache.java  | 25 ++
 2 files changed, 33 insertions(+), 8 deletions(-)

diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/MetadataCache.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/MetadataCache.java
index 10bd736..ce1e236 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/MetadataCache.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/MetadataCache.java
@@ -63,7 +63,7 @@ public class MetadataCache implements LlapIoDebugDump, 
FileMetadataCache {
 this.policy = policy;
 this.metrics = metrics;
 this.estimateErrors = useEstimateCache
-? new ConcurrentHashMap() : null;
+? new ConcurrentHashMap<>() : null;
   }
 
   public void putIncompleteCbs(Object fileKey, DiskRange[] ranges, long 
baseOffset, AtomicBoolean isStopped) {
@@ -235,7 +235,7 @@ public class MetadataCache implements LlapIoDebugDump, 
FileMetadataCache {
 if (maxAlloc < length) {
   largeBuffers = new LlapMetadataBuffer[length / maxAlloc];
   for (int i = 0; i < largeBuffers.length; ++i) {
-largeBuffers[i] = new LlapMetadataBuffer(fileKey, tag);
+largeBuffers[i] = new LlapMetadataBuffer<>(fileKey, tag);
   }
   allocator.allocateMultiple(largeBuffers, maxAlloc, null, isStopped);
   for (int i = 0; i < largeBuffers.length; ++i) {
@@ -256,7 +256,7 @@ public class MetadataCache implements LlapIoDebugDump, 
FileMetadataCache {
 LlapMetadataBuffer[] cacheData = new 
LlapMetadataBuffer[largeBuffers.length + 1];
 System.arraycopy(largeBuffers, 0, cacheData, 0, largeBuffers.length);
 cacheData[largeBuffers.length] = smallBuffer[0];
-return new LlapMetadataBuffers(cacheData);
+return new LlapMetadataBuffers<>(cacheData);
   }
 }
   }
@@ -340,16 +340,16 @@ public class MetadataCache implements LlapIoDebugDump, 
FileMetadataCache {
 if (result != null) return result;
 if (tailBuffer.remaining() <= allocator.getMaxAllocation()) {
   // The common case by far.
-  return wrapSmallBb(new LlapMetadataBuffer(key, tag), tailBuffer, 
isStopped);
+  return wrapSmallBb(new LlapMetadataBuffer<>(key, tag), tailBuffer, 
isStopped);
 } else {
   int allocCount = determineAllocCount(tailBuffer);
   @SuppressWarnings("unchecked")
   LlapMetadataBuffer[] results = new LlapMetadataBuffer[allocCount];
   for (int i = 0; i < allocCount; ++i) {
-results[i] = new LlapMetadataBuffer(key, tag);
+results[i] = new LlapMetadataBuffer<>(key, tag);
   }
   wrapLargeBb(results, tailBuffer, isStopped);
-  return new LlapMetadataBuffers(results);
+  return new LlapMetadataBuffers<>(results);
 }
   }
 
@@ -406,7 +406,7 @@ public class MetadataCache implements LlapIoDebugDump, 
FileMetadataCache {
 for (int i = 0; i < bufferArray.length; ++i) {
   if (lockOneBuffer(bufferArray[i], doNotifyPolicy)) continue;
   for (int j = 0; j < i; ++j) {
-unlockSingleBuffer(buffer, true);
+unlockSingleBuffer(bufferArray[j], true);
   }
   discardMultiBuffer(buffers);
   return false;
@@ -497,7 +497,7 @@ public class MetadataCache implements LlapIoDebugDump, 
FileMetadataCache {
 }
   }
 
-  public static interface LlapBufferOrBuffers extends MemoryBufferOrBuffers {
+  public interface LlapBufferOrBuffers extends MemoryBufferOrBuffers {
 LlapAllocatorBuffer getSingleLlapBuffer();
 LlapAllocatorBuffer[] getMultipleLlapBuffers();
   }
diff --git 
a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestOrcMetadataCache.java
 
b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestOrcMetadataCache.java
index d8632b0..1534864 100644
--- 
a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestOrcMetadataCache.java
+++ 
b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestOrcMetadataCache.java
@@ -34,6 +34,7 @@ import 
org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapBufferOrBuffers
 import 
org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapMetadataBuffer;
 import 

[hive] branch master updated: HIVE-22491 : Use Collections emptyList (David Mollitor via Ashutosh Chauhan)

2020-05-09 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 0bf24c8  HIVE-22491 : Use Collections emptyList (David Mollitor via 
Ashutosh Chauhan)
0bf24c8 is described below

commit 0bf24c805467db2c04098fac5029914ce0b0985c
Author: David Mollitor 
AuthorDate: Sat May 9 22:23:33 2020 -0700

HIVE-22491 : Use Collections emptyList (David Mollitor via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java | 3 ++-
 .../main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java | 3 ++-
 .../org/apache/hive/hcatalog/templeton/tool/ZooKeeperCleanup.java | 3 +--
 .../org/apache/hive/hcatalog/templeton/tool/ZooKeeperStorage.java | 3 ++-
 .../java/org/apache/hadoop/hive/kudu/KuduPredicateHandler.java| 2 +-
 .../org/apache/hadoop/hive/ql/exec/SerializationUtilities.java| 4 ++--
 ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java | 2 +-
 ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlan.java   | 7 +++
 ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java | 3 ++-
 ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java  | 8 +++-
 ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java | 3 ++-
 ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java | 3 ++-
 .../hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java | 7 ---
 ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java | 2 +-
 ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java| 5 +++--
 ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java   | 4 ++--
 serde/src/java/org/apache/hadoop/hive/serde2/NullStructSerDe.java | 6 +++---
 .../java/org/apache/hadoop/hive/metastore/utils/FileUtils.java| 2 +-
 .../main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java | 6 ++
 .../apache/hadoop/hive/metastore/MetastoreDefaultTransformer.java | 7 ---
 .../main/java/org/apache/hadoop/hive/metastore/ObjectStore.java   | 6 +++---
 .../apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java  | 3 ++-
 22 files changed, 48 insertions(+), 44 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java 
b/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java
index 7641610..10ff548 100644
--- a/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hive.common;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
@@ -66,7 +67,7 @@ public class HiveStatsUtils {
 // does not exist. But getFileStatus() throw IOException. To mimic the
 // similar behavior we will return empty array on exception. For 
external
 // tables, the path of the table will not exists during table creation
-return new ArrayList<>(0);
+return Collections.emptyList();
   }
   return result;
 }
diff --git 
a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java
 
b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java
index b73b6fe..2f92733 100644
--- 
a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java
+++ 
b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java
@@ -21,6 +21,7 @@ package org.apache.hive.hcatalog.api;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -1007,7 +1008,7 @@ public class HCatClientHMSImpl extends HCatClient {
   }
 });
   } else {
-return new ArrayList();
+return Collections.emptyList();
   }
 } catch (TException e) {
   throw new ConnectionFailureException("TException while getting 
notifications", e);
diff --git 
a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperCleanup.java
 
b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperCleanup.java
index 4ebc34b..ecf3ef5 100644
--- 
a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperCleanup.java
+++ 
b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperCleanup.java
@@ -19,7 +19,6 @@
 package org.apache.hive.hcatalog.templeton.tool;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Date;
@@ -138,7 +137,7

[hive] branch master updated: HIVE-22531 : Fix a couple whitespace errors in error messages (Jacob Tolar via Ashutosh Chauhan)

2020-05-09 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new d12a308  HIVE-22531 : Fix a couple whitespace errors in error messages 
(Jacob Tolar via Ashutosh Chauhan)
d12a308 is described below

commit d12a30804f17b774cb75c6d0ec32dbc67512975a
Author: Jacob Tolar 
AuthorDate: Sat May 9 22:03:14 2020 -0700

HIVE-22531 : Fix a couple whitespace errors in error messages (Jacob Tolar 
via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hive/ql/ddl/table/info/show/tables/ShowTablesOperation.java   | 4 ++--
 .../apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java| 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/tables/ShowTablesOperation.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/tables/ShowTablesOperation.java
index bb2356a..a776155 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/tables/ShowTablesOperation.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/tables/ShowTablesOperation.java
@@ -72,7 +72,7 @@ public class ShowTablesOperation extends 
DDLOperation {
 try (DataOutputStream os = DDLUtils.getOutputStream(new 
Path(desc.getResFile()), context)) {
   context.getFormatter().showTables(os, tableNames);
 } catch (Exception e) {
-  throw new HiveException(e, ErrorMsg.GENERIC_ERROR, "in database" + 
desc.getDbName());
+  throw new HiveException(e, ErrorMsg.GENERIC_ERROR, "in database " + 
desc.getDbName());
 }
   }
 
@@ -91,7 +91,7 @@ public class ShowTablesOperation extends 
DDLOperation {
 try (DataOutputStream os = DDLUtils.getOutputStream(new 
Path(desc.getResFile()), context)) {
   context.getFormatter().showTablesExtended(os, tableObjects);
 } catch (Exception e) {
-  throw new HiveException(e, ErrorMsg.GENERIC_ERROR, "in database" + 
desc.getDbName());
+  throw new HiveException(e, ErrorMsg.GENERIC_ERROR, "in database " + 
desc.getDbName());
 }
   }
 }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
index f958720..b8b227c 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
@@ -1222,7 +1222,7 @@ public class SessionHiveMetaStoreClient extends 
HiveMetaStoreClient implements I
 if (partition == null) {
   throw new NoSuchObjectException("Partition with partition values " +
   (pvals != null ? Arrays.toString(pvals.toArray()) : "null") +
-  " for table " + tableName + " in database " + dbName + "and for 
user " +
+  " for table " + tableName + " in database " + dbName + " and for 
user " +
   userName + " and group names " + (groupNames != null ? 
Arrays.toString(groupNames.toArray()) : "null") +
   " is not found.");
 }



[hive] branch master updated: HIVE-22569 : PartitionPruner use Collections Class (David Mollitor via Vineet Garg)

2020-05-09 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 4dccfc2  HIVE-22569 : PartitionPruner use Collections Class (David 
Mollitor via Vineet Garg)
4dccfc2 is described below

commit 4dccfc289e5922e03a91175bd57ab32475d23357
Author: David Mollitor 
AuthorDate: Sat May 9 21:48:22 2020 -0700

HIVE-22569 : PartitionPruner use Collections Class (David Mollitor via 
Vineet Garg)

Signed-off-by: Ashutosh Chauhan 
---
 .../hive/ql/optimizer/calcite/RelOptHiveTable.java |  7 ++--
 .../hive/ql/optimizer/ppr/PartitionPruner.java |  5 +--
 .../apache/hadoop/hive/ql/parse/ParseContext.java  |  2 +-
 .../hadoop/hive/ql/parse/PrunedPartitionList.java  | 37 +++---
 4 files changed, 26 insertions(+), 25 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
index 1f6e1bc..5b0021f 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
@@ -515,10 +515,11 @@ public class RelOptHiveTable implements RelOptTable {
   computePartitionList(hiveConf, null, new HashSet());
 }
 
-ColumnStatsList colStatsCached = colStatsCache.get(partitionList.getKey());
+String partitionListKey = partitionList.getKey().orElse(null);
+ColumnStatsList colStatsCached = colStatsCache.get(partitionListKey);
 if (colStatsCached == null) {
   colStatsCached = new ColumnStatsList();
-  colStatsCache.put(partitionList.getKey(), colStatsCached);
+  colStatsCache.put(partitionListKey, colStatsCached);
 }
 
 // 2. Obtain Col Stats for Non Partition Cols
@@ -751,7 +752,7 @@ public class RelOptHiveTable implements RelOptTable {
   }
 
   public String getPartitionListKey() {
-return partitionList != null ? partitionList.getKey() : null;
+return partitionList != null ? partitionList.getKey().orElse(null) : null;
   }
 
 }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
index 673d858..709b221 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.optimizer.ppr;
 
 import java.util.AbstractSequentialList;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.LinkedHashSet;
 import java.util.LinkedList;
@@ -207,7 +208,7 @@ public class PartitionPruner extends Transform {
 if (compactExpr == null || isBooleanExpr(compactExpr)) {
   if (isFalseExpr(compactExpr)) {
 return new PrunedPartitionList(tab, key + 
compactExpr.getExprString(true),
-new LinkedHashSet(0), new ArrayList(0), false);
+Collections.emptySet(), Collections.emptyList(), false);
   }
   // For null and true values, return every partition
   return getAllPartsFromCacheOrServer(tab, key, true, prunedPartitionsMap);
@@ -242,7 +243,7 @@ public class PartitionPruner extends Transform {
 } catch (HiveException e) {
   throw new SemanticException(e);
 }
-ppList = new PrunedPartitionList(tab, key, parts, null, unknownPartitions);
+ppList = new PrunedPartitionList(tab, key, parts, Collections.emptyList(), 
unknownPartitions);
 if (partsCache != null) {
   partsCache.put(key, ppList);
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
index bef0217..da443f4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
@@ -431,7 +431,7 @@ public class ParseContext {
* @return col stats
*/
   public ColumnStatsList getColStatsCached(PrunedPartitionList partList) {
-return ctx.getOpContext().getColStatsCache().get(partList.getKey());
+return 
ctx.getOpContext().getColStatsCache().get(partList.getKey().orElse(null));
   }
 
   /**
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java
index 4068735..398dbf5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java
@@ -19,7 +19,10 @@
 package org.apache.hadoop.hive.ql.parse;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
 import java.util.Set;
 
 import

[hive] branch master updated: HIVE-23083 : Enable fast serialization in xprod edge (Rajesh Balamohan via Ashutosh Chauhan)

2020-05-09 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new f7e5de1  HIVE-23083 : Enable fast serialization in xprod edge (Rajesh 
Balamohan via Ashutosh Chauhan)
f7e5de1 is described below

commit f7e5de1e9896dca984e72072a97472d6553ca278
Author: Rajesh Balamohan 
AuthorDate: Sat May 9 20:18:22 2020 -0700

HIVE-23083 : Enable fast serialization in xprod edge (Rajesh Balamohan via 
Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
index 78b5e32..3e8ba08 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
@@ -565,6 +565,8 @@ public class DagUtils {
 UnorderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass,
   ValueHashPartitioner.class.getName())
 .setFromConfiguration(conf)
+
.setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null)
+
.setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null)
 .build();
   return cpEdgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor);
 case SIMPLE_EDGE:



[hive] branch master updated: HIVE-22699 : Mask UDFs should mask numeric value 0 (Quanlong Huang via Madhan Neetiraj, Ashutosh Chauhan)

2020-05-09 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 3f1358d  HIVE-22699 : Mask UDFs should mask numeric value 0 (Quanlong 
Huang via Madhan Neetiraj, Ashutosh Chauhan)
3f1358d is described below

commit 3f1358d57d3a43894b912169a2d66cd6c77ce1d4
Author: Quanlong Huang 
AuthorDate: Sat May 9 18:43:26 2020 -0700

HIVE-22699 : Mask UDFs should mask numeric value 0 (Quanlong Huang via 
Madhan Neetiraj, Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../apache/hadoop/hive/ql/udf/generic/GenericUDFMask.java| 12 
 .../hadoop/hive/ql/udf/generic/GenericUDFMaskFirstN.java | 12 
 .../hadoop/hive/ql/udf/generic/GenericUDFMaskLastN.java  | 12 
 .../hadoop/hive/ql/udf/generic/GenericUDFMaskShowFirstN.java | 12 
 .../hadoop/hive/ql/udf/generic/GenericUDFMaskShowLastN.java  | 12 
 ql/src/test/queries/clientpositive/udf_mask.q|  4 
 ql/src/test/queries/clientpositive/udf_mask_first_n.q|  3 +++
 ql/src/test/queries/clientpositive/udf_mask_last_n.q |  3 +++
 ql/src/test/queries/clientpositive/udf_mask_show_first_n.q   |  3 +++
 ql/src/test/queries/clientpositive/udf_mask_show_last_n.q|  3 +++
 ql/src/test/results/clientpositive/udf_mask.q.out| 10 +-
 ql/src/test/results/clientpositive/udf_mask_first_n.q.out|  8 +++-
 ql/src/test/results/clientpositive/udf_mask_last_n.q.out |  8 +++-
 .../test/results/clientpositive/udf_mask_show_first_n.q.out  |  8 +++-
 .../test/results/clientpositive/udf_mask_show_last_n.q.out   |  8 +++-
 15 files changed, 113 insertions(+), 5 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMask.java
index 27c3bf8..812b028 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMask.java
@@ -121,6 +121,9 @@ class MaskTransformer extends AbstractTransformer {
 
   @Override
   Byte transform(final Byte value) {
+if (value == 0) {
+  return (byte) maskedNumber;
+}
 byte val = value;
 
 if(value < 0) {
@@ -145,6 +148,9 @@ class MaskTransformer extends AbstractTransformer {
 
   @Override
   Short transform(final Short value) {
+if (value == 0) {
+  return (short) maskedNumber;
+}
 short val = value;
 
 if(value < 0) {
@@ -169,6 +175,9 @@ class MaskTransformer extends AbstractTransformer {
 
   @Override
   Integer transform(final Integer value) {
+if (value == 0) {
+  return maskedNumber;
+}
 int val = value;
 
 if(value < 0) {
@@ -193,6 +202,9 @@ class MaskTransformer extends AbstractTransformer {
 
   @Override
   Long transform(final Long value) {
+if (value == 0) {
+  return (long) maskedNumber;
+}
 long val = value;
 
 if(value < 0) {
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskFirstN.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskFirstN.java
index 76ee292..c1ea157 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskFirstN.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskFirstN.java
@@ -81,6 +81,9 @@ class MaskFirstNTransformer extends MaskTransformer {
 
   @Override
   Byte transform(final Byte value) {
+if (value == 0) {
+  return charCount > 0 ? (byte) maskedNumber : 0;
+}
 byte val = value;
 
 if(value < 0) {
@@ -118,6 +121,9 @@ class MaskFirstNTransformer extends MaskTransformer {
 
   @Override
   Short transform(final Short value) {
+if (value == 0) {
+  return charCount > 0 ? (short) maskedNumber : 0;
+}
 short val = value;
 
 if(value < 0) {
@@ -155,6 +161,9 @@ class MaskFirstNTransformer extends MaskTransformer {
 
   @Override
   Integer transform(final Integer value) {
+if (value == 0) {
+  return charCount > 0 ? maskedNumber : 0;
+}
 int val = value;
 
 if(value < 0) {
@@ -192,6 +201,9 @@ class MaskFirstNTransformer extends MaskTransformer {
 
   @Override
   Long transform(final Long value) {
+if (value == 0) {
+  return charCount > 0 ? maskedNumber : 0L;
+}
 long val = value;
 
 if(value < 0) {
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskLastN.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskLastN.java
index c0c5c61..684c049 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskLastN.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskLastN.java
@@ -81,6 +81,9 @@ class MaskLastNTransformer extends MaskTransforme

[hive] branch master updated: HIVE-22769 : Incorrect query results and query failure during split generation for compressed text files (Panos G via Ashutosh Chauhan , Mustafa Iman) Change-Id: Ifb68bd9

2020-05-09 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new c4d5dc6  HIVE-22769 : Incorrect query results and query failure during 
split generation for compressed text files (Panos G via Ashutosh Chauhan , 
Mustafa Iman) Change-Id: Ifb68bd9e3b103424aed2b9d6871b536a5437a095 
Signed-off-by: Ashutosh Chauhan 
c4d5dc6 is described below

commit c4d5dc68843b3764b22fc8bccf211962abf6549d
Author: Panos Garefalakis 
AuthorDate: Tue Apr 28 14:24:25 2020 +0100

HIVE-22769 : Incorrect query results and query failure during split 
generation for compressed text files (Panos G via Ashutosh Chauhan , Mustafa 
Iman)
Change-Id: Ifb68bd9e3b103424aed2b9d6871b536a5437a095
Signed-off-by: Ashutosh Chauhan 
---
 data/files/compressed_4line_file1.csv  |   3 +
 data/files/compressed_4line_file1.csv.bz2  | Bin 0 -> 55 bytes
 data/files/compressed_4line_file2.csv  |   3 +
 data/files/compressed_4line_file2.csv.bz2  | Bin 0 -> 60 bytes
 .../test/resources/testconfiguration.properties|   5 +-
 .../hive/llap/io/encoded/LineRrOffsetReader.java   |  20 +-
 .../hive/llap/io/encoded/PassThruOffsetReader.java |  65 ++-
 .../llap/io/encoded/SerDeEncodedDataReader.java|  19 +-
 .../hive/ql/io/HiveContextAwareRecordReader.java   |   2 +
 .../apache/hadoop/hive/ql/io/HiveInputFormat.java  |  21 ++-
 .../hadoop/hive/ql/io/SkippingTextInputFormat.java |  14 +-
 .../{LineBufferTest.java => TestLineBuffer.java}   |   2 +-
 .../hive/ql/io/TestSkippingTextInputFormat.java| 205 +
 .../compressed_skip_header_footer_aggr.q   |  28 +++
 ...ter_aggregation.q => skip_header_footer_aggr.q} |   0
 .../{skiphf_aggr2.q => skip_header_footer_proj.q}  |   0
 .../llap/compressed_skip_header_footer_aggr.q.out  |  64 +++
 ...egation.q.out => skip_header_footer_aggr.q.out} |   0
 ...f_aggr2.q.out => skip_header_footer_proj.q.out} |  16 +-
 19 files changed, 435 insertions(+), 32 deletions(-)

diff --git a/data/files/compressed_4line_file1.csv 
b/data/files/compressed_4line_file1.csv
new file mode 100644
index 000..efe52db
--- /dev/null
+++ b/data/files/compressed_4line_file1.csv
@@ -0,0 +1,3 @@
+1,2019-12-31
+2,2019-12-31
+3,2019-12-31
diff --git a/data/files/compressed_4line_file1.csv.bz2 
b/data/files/compressed_4line_file1.csv.bz2
new file mode 100644
index 000..ada697d
Binary files /dev/null and b/data/files/compressed_4line_file1.csv.bz2 differ
diff --git a/data/files/compressed_4line_file2.csv 
b/data/files/compressed_4line_file2.csv
new file mode 100644
index 000..629a850
--- /dev/null
+++ b/data/files/compressed_4line_file2.csv
@@ -0,0 +1,3 @@
+1,2019-12-31 00
+2,2019-12-31 01
+3,2019-12-31 02
diff --git a/data/files/compressed_4line_file2.csv.bz2 
b/data/files/compressed_4line_file2.csv.bz2
new file mode 100644
index 000..4b5353a
Binary files /dev/null and b/data/files/compressed_4line_file2.csv.bz2 differ
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index de14c81..39e78d6 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -406,7 +406,9 @@ minillap.query.files=acid_bucket_pruning.q,\
   reduce_deduplicate_distinct.q, \
   remote_script.q,\
   file_with_header_footer.q,\
-  file_with_header_footer_aggregation.q,\
+  skip_header_footer_aggr.q,\
+  skip_header_footer_proj.q,\
+  compressed_skip_header_footer_aggr.q,\
   external_table_purge.q,\
   external_table_with_space_in_location_path.q,\
   import_exported_table.q,\
@@ -1041,7 +1043,6 @@ minillaplocal.query.files=\
   smb_mapjoin_15.q,\
   vectorized_nested_mapjoin.q,\
   skiphf_aggr.q,\
-  skiphf_aggr2.q,\
   multi_insert_lateral_view.q,\
   smb_mapjoin_4.q,\
   cbo_udf_udaf.q,\
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/LineRrOffsetReader.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/LineRrOffsetReader.java
index 3fc1fa2..a0f949b 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/LineRrOffsetReader.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/LineRrOffsetReader.java
@@ -24,6 +24,7 @@ import java.lang.reflect.Method;
 import org.apache.hadoop.hive.llap.io.api.impl.LlapIoImpl;
 import 
org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.ReaderWithOffsets;
 import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.LineRecordReader;
 
 final class LineRrOffsetReader extends PassThruOffsetReader {
@@ -43,24 +44,29 @@ final class LineRrOffsetReader extends PassThruOffsetReader 
{
 isCompressedMethod = isCompressedMethodTmp;
   }
 
-  static Rea

[hive] branch master updated: HIVE-22967 : Support hive.reloadable.aux.jars.path for Hive on Tez ( Toshihiko Uchida via Ashutosh Chauhan)

2020-05-09 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 48c5ff1  HIVE-22967 : Support hive.reloadable.aux.jars.path for Hive 
on Tez ( Toshihiko Uchida via Ashutosh Chauhan)
48c5ff1 is described below

commit 48c5ff1dc34582cc1d597b984e9e8122c017c99e
Author: Ashutosh Chauhan 
AuthorDate: Sat May 9 18:01:36 2020 -0700

HIVE-22967 : Support hive.reloadable.aux.jars.path for Hive on Tez ( 
Toshihiko Uchida via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
index c5b7171..78b5e32 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
@@ -51,6 +51,7 @@ import java.util.zip.ZipOutputStream;
 
 import org.apache.commons.io.FilenameUtils;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.hive.common.util.HiveStringUtils;
 import org.apache.tez.mapreduce.common.MRInputSplitDistributor;
 import org.apache.tez.mapreduce.hadoop.InputSplitInfo;
 import org.apache.tez.mapreduce.output.MROutput;
@@ -1074,7 +1075,9 @@ public class DagUtils {
 String addedFiles = Utilities.getLocalResourceFiles(conf, 
SessionState.ResourceType.FILE);
 String addedJars = Utilities.getLocalResourceFiles(conf, 
SessionState.ResourceType.JAR);
 String auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS);
-String allFiles = auxJars + "," + addedJars + "," + addedFiles;
+String reloadableAuxJars = SessionState.get() == null ? null : 
SessionState.get().getReloadableAuxJars();
+String allFiles =
+HiveStringUtils.joinIgnoringEmpty(new String[]{auxJars, 
reloadableAuxJars, addedJars, addedFiles}, ',');
 return allFiles.split(",");
   }
 
@@ -1091,10 +1094,12 @@ public class DagUtils {
   HiveConf.setVar(conf, ConfVars.HIVEADDEDJARS, addedJars);
 }
 String auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS);
+String reloadableAuxJars = SessionState.get() == null ? null : 
SessionState.get().getReloadableAuxJars();
 
 // need to localize the additional jars and files
 // we need the directory on hdfs to which we shall put all these files
-String allFiles = auxJars + "," + addedJars + "," + addedFiles;
+String allFiles =
+HiveStringUtils.joinIgnoringEmpty(new String[]{auxJars, 
reloadableAuxJars, addedJars, addedFiles}, ',');
 return allFiles.split(",");
   }
 



[hive] branch master updated: HIVE-23184 : Upgrade druid to 0.17.1 ( Nishant Bangarwa via Ashutosh Chauhan)

2020-05-09 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 9a6ec1e  HIVE-23184 : Upgrade druid to 0.17.1 ( Nishant Bangarwa via 
Ashutosh Chauhan)
9a6ec1e is described below

commit 9a6ec1e351d59b76419a21d1f2c8781e306b02d0
Author: Nishant Bangarwa 
AuthorDate: Mon Apr 13 22:29:26 2020 +0530

HIVE-23184 : Upgrade druid to 0.17.1 ( Nishant Bangarwa via Ashutosh 
Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 data/scripts/kafka_init_data.csv   |   2 +-
 druid-handler/pom.xml  |   6 +
 .../apache/hadoop/hive/druid/DruidKafkaUtils.java  |  43 ++-
 .../hadoop/hive/druid/DruidStorageHandler.java |  61 ++--
 .../hive/druid/DruidStorageHandlerUtils.java   |  62 ++---
 .../hadoop/hive/druid/io/DruidOutputFormat.java|  22 +-
 .../hive/druid/io/DruidQueryBasedInputFormat.java  |  56 +---
 .../hadoop/hive/druid/io/DruidRecordWriter.java|  10 +-
 .../druid/json/KafkaIndexTaskTuningConfig.java | 128 +
 .../hive/druid/json/KafkaSupervisorSpec.java   |  20 +-
 .../druid/json/KafkaSupervisorTuningConfig.java| 208 +-
 .../hadoop/hive/druid/json/KafkaTuningConfig.java  | 307 
 .../json/SeekableStreamIndexTaskTuningConfig.java  | 308 +
 .../json/SeekableStreamSupervisorTuningConfig.java |  59 
 .../hive/druid/serde/DruidQueryRecordReader.java   |   3 +-
 .../druid/serde/DruidSelectQueryRecordReader.java  |  92 --
 .../hadoop/hive/druid/TestDruidStorageHandler.java |   8 +-
 .../druid/TestHiveDruidQueryBasedInputFormat.java  |  44 ---
 .../hadoop/hive/druid/serde/TestDruidSerDe.java| 244 
 .../hadoop/hive/ql/io/TestDruidRecordWriter.java   |  21 +-
 itests/qtest-druid/pom.xml |  22 ++
 pom.xml|   2 +-
 .../druid/druidkafkamini_delimited.q.out   |   2 +-
 .../druidmini_semijoin_reduction_all_types.q.out   |  24 +-
 24 files changed, 753 insertions(+), 1001 deletions(-)

diff --git a/data/scripts/kafka_init_data.csv b/data/scripts/kafka_init_data.csv
index 5dc094e..d818144 100644
--- a/data/scripts/kafka_init_data.csv
+++ b/data/scripts/kafka_init_data.csv
@@ -1,4 +1,4 @@
-"2013-08-31T01:02:33Z", "Gypsy 
Danger","en","nuclear","true","true","false","false","article","North 
America","United States","Bay Area","San Francisco",57,200,-143
+"2013-08-31T01:02:33Z","Gypsy 
Danger","en","nuclear","true","true","false","false","article","North 
America","United States","Bay Area","San Francisco",57,200,-143
 "2013-08-31T03:32:45Z","Striker 
Eureka","en","speed","false","true","true","false","wikipedia","Australia","Australia","Cantebury","Syndey",459,129,330
 "2013-08-31T07:11:21Z","Cherno 
Alpha","ru","masterYi","false","true","true","false","article","Asia","Russia","Oblast","Moscow",123,12,111
 "2013-08-31T11:58:39Z","Crimson 
Typhoon","zh","triplets","true","false","true","false","wikipedia","Asia","China","Shanxi","Taiyuan",905,5,900
diff --git a/druid-handler/pom.xml b/druid-handler/pom.xml
index c7a2d4c..e6ca298 100644
--- a/druid-handler/pom.xml
+++ b/druid-handler/pom.xml
@@ -293,6 +293,12 @@
 
   
 
+
+  org.apache.logging.log4j
+  log4j-api
+  ${log4j2.version}
+  test
+
   
 
   
diff --git 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidKafkaUtils.java 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidKafkaUtils.java
index b56d48a..fb6ce30 100644
--- a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidKafkaUtils.java
+++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidKafkaUtils.java
@@ -29,8 +29,8 @@ import org.apache.druid.data.input.impl.JSONParseSpec;
 import org.apache.druid.data.input.impl.StringInputRowParser;
 import org.apache.druid.data.input.impl.TimestampSpec;
 import org.apache.druid.java.util.http.client.Request;
-import org.apache.druid.java.util.http.client.response.FullResponseHandler;
-import org.apache.druid.java.util.http.client.response.FullResponseHolder;
+import 
org.apache.druid.java.util.http.client.response.StringFullResponseHandler;
+import

[hive] branch master updated: HIVE-23350 : Upgrade DBCP To DBCP 2.7.0 (David Mollitor via Ashutosh Chauhan)

2020-05-09 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 809a859  HIVE-23350 : Upgrade DBCP To DBCP 2.7.0 (David Mollitor via 
Ashutosh Chauhan)
809a859 is described below

commit 809a859f98b520f2989db6aff7bd5451bb546596
Author: David Mollitor 
AuthorDate: Sat May 9 10:24:02 2020 -0700

HIVE-23350 : Upgrade DBCP To DBCP 2.7.0 (David Mollitor via Ashutosh 
Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 itests/hive-unit/pom.xml   |  6 --
 jdbc-handler/pom.xml   |  1 -
 metastore/pom.xml  | 10 
 pom.xml| 10 ++--
 standalone-metastore/metastore-common/pom.xml  |  4 --
 standalone-metastore/metastore-server/pom.xml  |  4 +-
 .../datasource/DbCPDataSourceProvider.java | 67 +++---
 .../datasource/TestDataSourceProviderFactory.java  |  3 +-
 .../schematool/TestSchemaToolForMetastore.java |  2 +-
 standalone-metastore/pom.xml   | 12 ++--
 10 files changed, 52 insertions(+), 67 deletions(-)

diff --git a/itests/hive-unit/pom.xml b/itests/hive-unit/pom.xml
index 2219002..026c465 100644
--- a/itests/hive-unit/pom.xml
+++ b/itests/hive-unit/pom.xml
@@ -431,12 +431,6 @@
   ${plexus.version}
   test
 
-
-  commons-dbcp
-  commons-dbcp
-  ${commons-dbcp.version}
-  test
-
   
 
   
diff --git a/jdbc-handler/pom.xml b/jdbc-handler/pom.xml
index b2d9ec8..da137cd 100644
--- a/jdbc-handler/pom.xml
+++ b/jdbc-handler/pom.xml
@@ -89,7 +89,6 @@
 
   org.apache.commons
   commons-dbcp2
-  ${commons-dbcp2.version}
 
 
 
diff --git a/metastore/pom.xml b/metastore/pom.xml
index cca56a5..f1d5b66 100644
--- a/metastore/pom.xml
+++ b/metastore/pom.xml
@@ -87,16 +87,6 @@
   ${derby.version}
 
 
-  commons-pool
-  commons-pool
-  ${commons-pool.version}
-
-
-  commons-dbcp
-  commons-dbcp
-  ${commons-dbcp.version}
-
-
   org.antlr
   antlr-runtime
   ${antlr.version}
diff --git a/pom.xml b/pom.xml
index 2e1e9bb..1432bcf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -141,10 +141,7 @@
 1.1
 2.6
 3.9
-1.5.4
-1.4
-2.6.1
-2.6.0
+2.7.0
 1.8
 10.14.1.0
 3.1.0
@@ -351,6 +348,11 @@
 ${commons-io.version}
   
   
+org.apache.commons
+commons-dbcp2
+${commons-dbcp2.version}
+  
+  
 io.netty
 netty-all
 ${netty.version}
diff --git a/standalone-metastore/metastore-common/pom.xml 
b/standalone-metastore/metastore-common/pom.xml
index ab26915..e8748c7 100644
--- a/standalone-metastore/metastore-common/pom.xml
+++ b/standalone-metastore/metastore-common/pom.xml
@@ -69,10 +69,6 @@
   HikariCP
 
 
-  commons-dbcp
-  commons-dbcp
-
-
   io.dropwizard.metrics
   metrics-core
 
diff --git a/standalone-metastore/metastore-server/pom.xml 
b/standalone-metastore/metastore-server/pom.xml
index 7355e93..021c25a 100644
--- a/standalone-metastore/metastore-server/pom.xml
+++ b/standalone-metastore/metastore-server/pom.xml
@@ -71,8 +71,8 @@
   HikariCP
 
 
-  commons-dbcp
-  commons-dbcp
+  org.apache.commons
+  commons-dbcp2
 
 
   io.dropwizard.metrics
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DbCPDataSourceProvider.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DbCPDataSourceProvider.java
index c687a25..1a5a1d2 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DbCPDataSourceProvider.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DbCPDataSourceProvider.java
@@ -17,24 +17,26 @@
  */
 package org.apache.hadoop.hive.metastore.datasource;
 
-import org.apache.commons.dbcp.BasicDataSource;
-import org.apache.commons.dbcp.ConnectionFactory;
-import org.apache.commons.dbcp.DataSourceConnectionFactory;
-import org.apache.commons.dbcp.PoolableConnectionFactory;
-import org.apache.commons.dbcp.PoolingDataSource;
-import org.apache.commons.pool.impl.GenericObjectPool;
+import static org.apache.hadoop.hive.metastore.DatabaseProduct.MYSQL;
+import static 
org.apache.hadoop.hive.metastore.DatabaseProduct.determineDatabaseProduct;
+
+import java.sql.SQLException;
+
+import javax.sql.DataSource;
+
+import org.apache.commons.dbcp2.BasicDataSource;
+import org.apache.commons.dbcp2.ConnectionFactory;
+import org.apache.commons.dbcp2.DataSourceConnectionFactory;
+import org.apache.commons.dbcp2.PoolableConnectionFactory;
+import

[hive] branch master updated: HIVE-23393 : LLapInputFormat reader policy for Random IO formats (Panos G via Ashutosh Chauhan)

2020-05-09 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 8c88676  HIVE-23393 : LLapInputFormat reader policy for Random IO 
formats (Panos G via Ashutosh Chauhan)
8c88676 is described below

commit 8c88676a91115be56eee1a2fca5c9c2c3ee0402d
Author: Panagiotis Garefalakis 
AuthorDate: Sat May 9 10:17:42 2020 -0700

HIVE-23393 : LLapInputFormat reader policy for Random IO formats (Panos G 
via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java  | 11 +++
 ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java |  4 ++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
index ac1aca8..e184655 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
@@ -19,6 +19,9 @@
 
 package org.apache.hadoop.hive.llap.io.api.impl;
 
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.s3a.S3AInputPolicy;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport;
 import org.apache.hadoop.hive.ql.io.BatchToRowInputFormat;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -62,6 +65,9 @@ import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hive.common.util.HiveStringUtils;
 
+import static org.apache.hadoop.hive.common.FileUtils.isS3a;
+import static 
org.apache.hadoop.hive.ql.io.HiveInputFormat.isRandomAccessInputFormat;
+
 public class LlapInputFormat implements InputFormat,
 VectorizedInputFormatInterface, SelfDescribingInputFormatInterface,
 AvoidSplitCombination {
@@ -100,6 +106,11 @@ public class LlapInputFormat implements 
InputFormat
* @param inputFormat
* @return
*/
-  private static boolean isRandomAccessInputFormat(InputFormat inputFormat) {
+  public static boolean isRandomAccessInputFormat(InputFormat inputFormat) {
 if (inputFormat instanceof OrcInputFormat ||
 inputFormat instanceof VectorizedParquetInputFormat) {
   return true;
@@ -449,7 +449,7 @@ public class HiveInputFormat
 
 FileSystem splitFileSystem = splitPath.getFileSystem(job);
 if (isS3a(splitFileSystem) && isRandomAccessInputFormat(inputFormat)) {
-  LOG.debug("Changing S3A input policy to RANDOM for split {}", splitPath);
+  LOG.debug("Changing S3A input policy to RANDOM");
   ((S3AFileSystem) splitFileSystem).setInputPolicy(S3AInputPolicy.Random);
 }
 



[hive] branch master updated: HIVE-23424 : Remove Dependency on Log4J from hive-shims-common (David Mollitor via Ashutosh Chauhan)

2020-05-09 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new fd82258  HIVE-23424 : Remove Dependency on Log4J from 
hive-shims-common (David Mollitor via Ashutosh Chauhan)
fd82258 is described below

commit fd82258d0e1c043579b4378e2bedf73bd5bc74fc
Author: David Mollitor 
AuthorDate: Sat May 9 09:43:53 2020 -0700

HIVE-23424 : Remove Dependency on Log4J from hive-shims-common (David 
Mollitor via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 shims/common/pom.xml | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/shims/common/pom.xml b/shims/common/pom.xml
index e2a4711..34e3b43 100644
--- a/shims/common/pom.xml
+++ b/shims/common/pom.xml
@@ -36,11 +36,6 @@
 
 
 
-  org.apache.logging.log4j
-  log4j-slf4j-impl
-  ${log4j2.version}
-
-
   com.google.guava
   guava
   ${guava.version}
@@ -86,6 +81,10 @@
   
 
 
+  org.slf4j
+  slf4j-api
+
+
   junit
   junit
   test



[hive] branch master updated: HIVE-23282 : Reduce number of DB calls in ObjectStore::getPartitionsByExprInternal (Attila Magyar via Ashutosh Chauhan)

2020-05-06 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 1caa309  HIVE-23282 : Reduce number of DB calls in 
ObjectStore::getPartitionsByExprInternal (Attila Magyar via Ashutosh Chauhan)
1caa309 is described below

commit 1caa3092789a010d87d8ea6a3edf63c6a6e0feb0
Author: Attila Magyar 
AuthorDate: Wed May 6 08:44:09 2020 -0700

HIVE-23282 : Reduce number of DB calls in 
ObjectStore::getPartitionsByExprInternal (Attila Magyar via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../ql/metadata/SessionHiveMetaStoreClient.java|   3 +-
 .../hadoop/hive/metastore/MetaStoreDirectSql.java  |  94 
 .../apache/hadoop/hive/metastore/ObjectStore.java  | 267 ++---
 .../hive/metastore/parser/ExpressionTree.java  |  73 +++---
 4 files changed, 310 insertions(+), 127 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
index 4f58cd9..f958720 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
@@ -1543,9 +1543,10 @@ public class SessionHiveMetaStoreClient extends 
HiveMetaStoreClient implements I
   private String generateJDOFilter(org.apache.hadoop.hive.metastore.api.Table 
table, ExpressionTree exprTree)
   throws MetaException {
 
+assert table != null;
 ExpressionTree.FilterBuilder filterBuilder = new 
ExpressionTree.FilterBuilder(true);
 Map params = new HashMap<>();
-exprTree.generateJDOFilterFragment(conf, table, params, filterBuilder);
+exprTree.generateJDOFilterFragment(conf, params, filterBuilder, 
table.getPartitionKeys());
 StringBuilder stringBuilder = new StringBuilder(filterBuilder.getFilter());
 // replace leading &&
 stringBuilder.replace(0, 4, "");
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index d155887..b69277e 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -43,7 +43,6 @@ import javax.jdo.Query;
 import javax.jdo.Transaction;
 import javax.jdo.datastore.JDOConnection;
 
-import com.google.common.collect.ImmutableMap;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.metastore.AggregateStatsCache.AggrColStats;
@@ -93,13 +92,14 @@ import 
org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator;
 import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode;
 import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeVisitor;
 import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils;
-import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
 import 
org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
 import org.apache.hive.common.util.BloomFilter;
 import org.datanucleus.store.rdbms.query.ForwardQueryResult;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 
 /**
@@ -530,13 +530,10 @@ class MetaStoreDirectSql {
* @param max The maximum number of partitions to return.
* @return List of partitions.
*/
-  public List getPartitionsViaSqlFilter(
+  public List getPartitionsViaSqlFilter(String catName, String 
dbName, String tableName,
   SqlFilterForPushdown filter, Integer max) throws MetaException {
-Boolean isViewTable = isViewTable(filter.table);
-String catName = filter.table.isSetCatName() ? filter.table.getCatName() :
-DEFAULT_CATALOG_NAME;
 List partitionIds = getPartitionIdsViaSqlFilter(catName,
-filter.table.getDbName(), filter.table.getTableName(), filter.filter, 
filter.params,
+dbName, tableName, filter.filter, filter.params,
 filter.joins, max);
 if (partitionIds.isEmpty()) {
   return Collections.emptyList(); // no partitions, bail early.
@@ -544,8 +541,8 @@ class MetaStoreDirectSql {
 return Batchable.runBatched(batchSize, partitionIds, new Batchable() {
   @Override
   public List run(List input) throws MetaException {
-return getPartitionsFromPartitionIds(catName, filter.table.getDbName(),
-filter.table.getTableName(), isViewTable, input

[hive] branch master updated: HIVE-23321 : Tolerate in tests that metastore is not removing rows from the skewed_string_list_values table (Zoltan Haindrich via Ashutosh Chauhan)

2020-05-03 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new b283bbd  HIVE-23321 : Tolerate in tests that metastore is not removing 
rows from the skewed_string_list_values table (Zoltan Haindrich via Ashutosh 
Chauhan)
b283bbd is described below

commit b283bbda0aa93f10e3a4418bef2534cbb510b5e5
Author: Zoltan Haindrich 
AuthorDate: Sun May 3 10:15:58 2020 -0700

HIVE-23321 : Tolerate in tests that metastore is not removing rows from the 
skewed_string_list_values table (Zoltan Haindrich via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 ql/src/test/queries/clientpositive/sysdb.q  |  5 +++--
 ql/src/test/results/clientpositive/llap/sysdb.q.out | 10 --
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/ql/src/test/queries/clientpositive/sysdb.q 
b/ql/src/test/queries/clientpositive/sysdb.q
index 2463ce6..c2201a9 100644
--- a/ql/src/test/queries/clientpositive/sysdb.q
+++ b/ql/src/test/queries/clientpositive/sysdb.q
@@ -81,9 +81,10 @@ select skewed_col_name from skewed_col_names order by 
skewed_col_name limit 5;
 
 select count(*) from skewed_col_value_loc_map;
 
-select count(*) from skewed_string_list;
+-- HIVE-23289: there are fallout in these tables from previous tests
+select * from skewed_string_list limit 0;
 
-select count(*) from skewed_string_list_values;
+select * from skewed_string_list_values limit 0;
 
 select count(*) from skewed_values;
 
diff --git a/ql/src/test/results/clientpositive/llap/sysdb.q.out 
b/ql/src/test/results/clientpositive/llap/sysdb.q.out
index 619c0d2..bffbedb 100644
--- a/ql/src/test/results/clientpositive/llap/sysdb.q.out
+++ b/ql/src/test/results/clientpositive/llap/sysdb.q.out
@@ -1228,24 +1228,22 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: sys@skewed_col_value_loc_map
  A masked pattern was here 
 0
-PREHOOK: query: select count(*) from skewed_string_list
+PREHOOK: query: select * from skewed_string_list limit 0
 PREHOOK: type: QUERY
 PREHOOK: Input: sys@skewed_string_list
  A masked pattern was here 
-POSTHOOK: query: select count(*) from skewed_string_list
+POSTHOOK: query: select * from skewed_string_list limit 0
 POSTHOOK: type: QUERY
 POSTHOOK: Input: sys@skewed_string_list
  A masked pattern was here 
-3
-PREHOOK: query: select count(*) from skewed_string_list_values
+PREHOOK: query: select * from skewed_string_list_values limit 0
 PREHOOK: type: QUERY
 PREHOOK: Input: sys@skewed_string_list_values
  A masked pattern was here 
-POSTHOOK: query: select count(*) from skewed_string_list_values
+POSTHOOK: query: select * from skewed_string_list_values limit 0
 POSTHOOK: type: QUERY
 POSTHOOK: Input: sys@skewed_string_list_values
  A masked pattern was here 
-3
 PREHOOK: query: select count(*) from skewed_values
 PREHOOK: type: QUERY
 PREHOOK: Input: sys@skewed_values



[hive] branch master updated: HIVE-23252 : Change spark related tests to be optional (Zoltan Haindrich via Ashutosh Chauhan)

2020-05-03 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new f85ccf2  HIVE-23252 : Change spark related tests to be optional 
(Zoltan Haindrich via Ashutosh Chauhan)
f85ccf2 is described below

commit f85ccf2eb4077ce1a0eb13063cd540737fea3458
Author: Zoltan Haindrich 
AuthorDate: Sun May 3 10:06:45 2020 -0700

HIVE-23252 : Change spark related tests to be optional (Zoltan Haindrich 
via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 itests/hive-unit/pom.xml | 5 -
 itests/pom.xml   | 5 -
 pom.xml  | 7 +--
 3 files changed, 1 insertion(+), 16 deletions(-)

diff --git a/itests/hive-unit/pom.xml b/itests/hive-unit/pom.xml
index 103975f..2219002 100644
--- a/itests/hive-unit/pom.xml
+++ b/itests/hive-unit/pom.xml
@@ -442,11 +442,6 @@
   
 
   spark-test
-  
-
-  !skipSparkTests
-
-  
   
 
   org.apache.spark
diff --git a/itests/pom.xml b/itests/pom.xml
index 3dc95bf..d4fb252 100644
--- a/itests/pom.xml
+++ b/itests/pom.xml
@@ -476,11 +476,6 @@
   
 
   spark-test
-  
-
-  !skipSparkTests
-
-  
   
 qtest-spark
   
diff --git a/pom.xml b/pom.xml
index 29cde34..2e1e9bb 100644
--- a/pom.xml
+++ b/pom.xml
@@ -99,7 +99,7 @@
 
 
 
-
+
**/TestSparkStatistics*,**/TestSparkSessionTimeout*,**/TestJdbcWithMiniHS2ErasureCoding*,**/TestLocalHiveSparkClient*
 
 
 1.0b3
@@ -1656,11 +1656,6 @@
 
 
   spark-test
-  
-   
-  !skipSparkTests
-   
-  
   
 
   
**/ql/exec/spark/session/TestSparkSessionManagerImpl.java,**/TestMultiSessionsHS2WithLocalClusterSpark.java,**/TestJdbcWithLocalClusterSpark.java



[hive] branch master updated: HIVE-23215 : Make FilterContext and MutableFilterContext interfaces (Owen O'malley, Panos G via Ashutosh Chauhan)

2020-05-03 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 1f4bc02  HIVE-23215 : Make FilterContext and MutableFilterContext 
interfaces (Owen O'malley, Panos G via Ashutosh Chauhan)
1f4bc02 is described below

commit 1f4bc024d202ff69ff512d4ba50ad9719cbe2854
Author: Owen O'Malley 
AuthorDate: Sun May 3 01:05:47 2020 -0700

HIVE-23215 : Make FilterContext and MutableFilterContext interfaces (Owen 
O'malley, Panos G via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hive/ql/io/filter/TestFilterContext.java   | 38 +++-
 .../hive/ql/exec/vector/VectorizedRowBatch.java| 63 +++-
 .../hadoop/hive/ql/io/filter/FilterContext.java| 28 ++---
 .../hive/ql/io/filter/MutableFilterContext.java| 68 +++---
 4 files changed, 84 insertions(+), 113 deletions(-)

diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java
index 0bda620..c59cc09 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hive.ql.io.filter;
 
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -43,7 +44,7 @@ public class TestFilterContext {
 
   @Test
   public void testInitFilterContext(){
-MutableFilterContext mutableFilterContext = new MutableFilterContext();
+MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0);
 int[] selected = makeValidSelected();
 
 mutableFilterContext.setFilterContext(true, selected, selected.length);
@@ -57,7 +58,7 @@ public class TestFilterContext {
 
   @Test
   public void testResetFilterContext(){
-MutableFilterContext mutableFilterContext = new MutableFilterContext();
+MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0);
 int[] selected = makeValidSelected();
 
 mutableFilterContext.setFilterContext(true, selected, selected.length);
@@ -67,55 +68,32 @@ public class TestFilterContext {
 Assert.assertEquals(512, filterContext.getSelectedSize());
 Assert.assertEquals(512, filterContext.getSelected().length);
 
-filterContext.resetFilterContext();
+filterContext.reset();
 
 Assert.assertEquals(false, filterContext.isSelectedInUse());
 Assert.assertEquals(0, filterContext.getSelectedSize());
-Assert.assertEquals(null, filterContext.getSelected());
   }
 
   @Test(expected=AssertionError.class)
   public void testInitInvalidFilterContext(){
-MutableFilterContext mutableFilterContext = new MutableFilterContext();
+MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0);
 int[] selected = makeInvalidSelected();
 
 mutableFilterContext.setFilterContext(true, selected, selected.length);
   }
 
-
-  @Test
-  public void testCopyFilterContext(){
-MutableFilterContext mutableFilterContext = new MutableFilterContext();
-int[] selected = makeValidSelected();
-
-mutableFilterContext.setFilterContext(true, selected, selected.length);
-
-MutableFilterContext mutableFilterContextToCopy = new 
MutableFilterContext();
-mutableFilterContextToCopy.setFilterContext(true, new int[] {100}, 1);
-
-mutableFilterContext.copyFilterContextFrom(mutableFilterContextToCopy);
-FilterContext filterContext = mutableFilterContext.immutable();
-
-Assert.assertEquals(true, filterContext.isSelectedInUse());
-Assert.assertEquals(1, filterContext.getSelectedSize());
-Assert.assertEquals(100, filterContext.getSelected()[0]);
-// make sure we kept the remaining array space
-Assert.assertEquals(512, filterContext.getSelected().length);
-  }
-
-
   @Test
   public void testBorrowSelected(){
-MutableFilterContext mutableFilterContext = new MutableFilterContext();
+MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0);
 mutableFilterContext.setFilterContext(true, new int[] {100, 200}, 2);
 
-int[] borrowedSelected = mutableFilterContext.borrowSelected(1);
+int[] borrowedSelected = mutableFilterContext.updateSelected(1);
 // make sure we borrowed the existing array
 Assert.assertEquals(2, borrowedSelected.length);
 Assert.assertEquals(100, borrowedSelected[0]);
 Assert.assertEquals(200, borrowedSelected[1]);
 
-borrowedSelected = mutableFilterContext.borrowSelected(3);
+borrowedSelected = mutableFilterContext.updateSelected(3);
 Assert.assertEquals(3, borrowedSelected.length);
 Assert.assertEquals(0, borrowedSelected[0]);
 Assert.assertEquals(0, borrowedSelected[1]);
diff --git 
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec

[hive] branch master updated: HIVE-23322 : Update some tests to better tolerate a more busy environment (Zoltan Haindrich via Ashutosh Chauhan)

2020-05-03 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new e48a682  HIVE-23322 : Update some tests to better tolerate a more busy 
environment (Zoltan Haindrich via Ashutosh Chauhan)
e48a682 is described below

commit e48a6827dc78c38b282b590e95152cdabdd79d85
Author: Zoltan Haindrich 
AuthorDate: Sun May 3 01:00:06 2020 -0700

HIVE-23322 : Update some tests to better tolerate a more busy environment 
(Zoltan Haindrich via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hive/common/metrics/metrics2/TestCodahaleMetrics.java |  7 ++-
 .../hive/llap/tezplugins/TestLlapTaskCommunicator.java|  4 ++--
 .../hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java  | 15 +--
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git 
a/common/src/test/org/apache/hadoop/hive/common/metrics/metrics2/TestCodahaleMetrics.java
 
b/common/src/test/org/apache/hadoop/hive/common/metrics/metrics2/TestCodahaleMetrics.java
index 1c49d95..9c4e475 100644
--- 
a/common/src/test/org/apache/hadoop/hive/common/metrics/metrics2/TestCodahaleMetrics.java
+++ 
b/common/src/test/org/apache/hadoop/hive/common/metrics/metrics2/TestCodahaleMetrics.java
@@ -154,7 +154,12 @@ public class TestCodahaleMetrics {
   public void testFileReporting() throws Exception {
 int runs = 5;
 String  counterName = "count2";
-for (int i = 0; i < runs; i++) {
+
+// on the first write the metrics writer should initialize stuff
+MetricsFactory.getInstance().incrementCounter(counterName);
+sleep(5 * REPORT_INTERVAL_MS);
+
+for (int i = 1; i <= runs; i++) {
   MetricsFactory.getInstance().incrementCounter(counterName);
   sleep(REPORT_INTERVAL_MS + REPORT_INTERVAL_MS / 2);
   Assert.assertEquals(i + 1, getCounterValue(counterName));
diff --git 
a/llap-tez/src/test/org/apache/hadoop/hive/llap/tezplugins/TestLlapTaskCommunicator.java
 
b/llap-tez/src/test/org/apache/hadoop/hive/llap/tezplugins/TestLlapTaskCommunicator.java
index 9b4ac27..3bbbdf3 100644
--- 
a/llap-tez/src/test/org/apache/hadoop/hive/llap/tezplugins/TestLlapTaskCommunicator.java
+++ 
b/llap-tez/src/test/org/apache/hadoop/hive/llap/tezplugins/TestLlapTaskCommunicator.java
@@ -66,7 +66,7 @@ import org.mockito.stubbing.Answer;
 
 public class TestLlapTaskCommunicator {
 
-  @Test (timeout = 5000)
+  @Test (timeout = 3)
   public void testEntityTracker1() {
 LlapTaskCommunicator.EntityTracker entityTracker = new 
LlapTaskCommunicator.EntityTracker();
 
@@ -121,7 +121,7 @@ public class TestLlapTaskCommunicator {
   }
 
 
-  @Test(timeout = 5000)
+  @Test(timeout = 3)
   public void testFinishableStateUpdateFailure() throws Exception {
 
 LlapTaskCommunicatorWrapperForTest wrapper = null;
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java
index 0d3..fbf2b8b 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java
@@ -25,7 +25,6 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.junit.Assert;
-import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -41,11 +40,6 @@ public class TestSQL11ReservedKeyWordsNegative {
   private static HiveConf conf = new HiveConf(SemanticAnalyzer.class);
   private static ParseDriver pd = new ParseDriver();
 
-  @BeforeClass
-  public static void initialize() {
-SessionState.start(conf);
-  }
-
   private static ASTNode parse(String query) throws ParseException {
 ASTNode nd = null;
 try {
@@ -57,6 +51,11 @@ public class TestSQL11ReservedKeyWordsNegative {
   }
 
   public static class TestSQL11ReservedKeyWordsNegativeMisc {
+@BeforeClass
+public static void initialize() {
+  SessionState.start(conf);
+}
+
 @Test
 public void testSQL11ReservedKeyWords_KILL() {
   try {
@@ -72,6 +71,10 @@ public class TestSQL11ReservedKeyWordsNegative {
 
   @RunWith(Parameterized.class)
   public static class TestSQL11ReservedKeyWordsNegativeParametrized {
+@BeforeClass
+public static void initialize() {
+  SessionState.start(conf);
+}
 
 @Parameters(name = "{0}")
 public static Collection data() {



[hive] branch master updated: HIVE-23240 : loadDynamicPartition complains about static partitions even when they are provided in the description (Reza Safi via Ashutosh Chauhan)

2020-05-03 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new d3ee2ae  HIVE-23240 : loadDynamicPartition complains about static 
partitions even when they are provided in the description (Reza Safi via 
Ashutosh Chauhan)
d3ee2ae is described below

commit d3ee2ae414686de90f13e5db1d857ec51e783bad
Author: Ashutosh Chauhan 
AuthorDate: Sun May 3 00:55:38 2020 -0700

HIVE-23240 : loadDynamicPartition complains about static partitions even 
when they are provided in the description (Reza Safi via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 7bb90d3..bd1fae1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -2879,7 +2879,12 @@ private void constructOneLBLocationMap(FileStatus fSta,
 // calculate full path spec for each valid partition path
 validPartitions.forEach(partPath -> {
   Map fullPartSpec = Maps.newLinkedHashMap(partSpec);
-  if (!Warehouse.makeSpecFromName(fullPartSpec, partPath, new 
HashSet<>(partSpec.keySet( {
+  String staticParts =  Warehouse.makeDynamicPartName(partSpec);
+  Path computedPath = partPath;
+  if (!staticParts.isEmpty() ) {
+computedPath = new Path(new Path(partPath.getParent(), staticParts), 
partPath.getName());
+  }
+  if (!Warehouse.makeSpecFromName(fullPartSpec, computedPath, new 
HashSet<>(partSpec.keySet( {
 Utilities.FILE_OP_LOGGER.warn("Ignoring invalid DP directory " + 
partPath);
   } else {
 PartitionDetails details = new PartitionDetails();



[hive] branch master updated: HIVE-22737 : Concurrency: FunctionRegistry::getFunctionInfo is static object locked (Ashutosh Chauhan via Rajesh Balamohan)

2020-05-03 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 2b177db  HIVE-22737 : Concurrency: FunctionRegistry::getFunctionInfo 
is static object locked (Ashutosh Chauhan via Rajesh Balamohan)
2b177db is described below

commit 2b177db2fd71ccd602247fae87362801a9095f1a
Author: Ashutosh Chauhan 
AuthorDate: Sat Apr 25 19:00:13 2020 -0700

HIVE-22737 : Concurrency: FunctionRegistry::getFunctionInfo is static 
object locked (Ashutosh Chauhan via Rajesh Balamohan)
---
 .../org/apache/hadoop/hive/ql/exec/Registry.java   | 25 ++
 .../results/clientpositive/llap/udf_substr.q.out   |  2 +-
 .../clientpositive/llap/udf_substring.q.out|  2 +-
 3 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java
index 40e9e97..6ceea2f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java
@@ -78,7 +78,7 @@ public class Registry {
   /**
* The mapping from expression function names to expression classes.
*/
-  private final Map mFunctions = new 
LinkedHashMap();
+  private final Map mFunctions = new 
ConcurrentHashMap();
   private final Set> builtIns = Collections.synchronizedSet(new 
HashSet>());
   /**
* Persistent map contains refcounts that are only modified in synchronized 
methods for now,
@@ -91,6 +91,7 @@ public class Registry {
   /**
* The epic lock for the registry. This was added to replace the 
synchronized methods with
* minimum disruption; the locking should really be made more granular here.
+   * This lock is protecting mFunctions, builtIns and persistent maps.
*/
   private final ReentrantLock lock = new ReentrantLock();
 
@@ -331,11 +332,9 @@ public class Registry {
* @return
*/
   public FunctionInfo getFunctionInfo(String functionName) throws 
SemanticException {
-lock.lock();
-try {
   functionName = functionName.toLowerCase();
   if (FunctionUtils.isQualifiedFunctionName(functionName)) {
-FunctionInfo functionInfo = 
getQualifiedFunctionInfoUnderLock(functionName);
+FunctionInfo functionInfo = getQualifiedFunctionInfo(functionName);
 addToCurrentFunctions(functionName, functionInfo);
 return functionInfo;
   }
@@ -348,14 +347,10 @@ public class Registry {
   if (functionInfo == null) {
 functionName = FunctionUtils.qualifyFunctionName(
 functionName, 
SessionState.get().getCurrentDatabase().toLowerCase());
-functionInfo = getQualifiedFunctionInfoUnderLock(functionName);
+functionInfo = getQualifiedFunctionInfo(functionName);
   }
   addToCurrentFunctions(functionName, functionInfo);
   return functionInfo;
-} finally {
-  lock.unlock();
-}
-
   }
 
   private void addToCurrentFunctions(String functionName, FunctionInfo 
functionInfo) {
@@ -633,7 +628,7 @@ public class Registry {
 return null;
   }
 
-  private FunctionInfo getQualifiedFunctionInfoUnderLock(String qualifiedName) 
throws SemanticException {
+  private FunctionInfo getQualifiedFunctionInfo(String qualifiedName) throws 
SemanticException {
 FunctionInfo info = mFunctions.get(qualifiedName);
 if (info != null && info.isBlockedFunction()) {
   throw new SemanticException ("UDF " + qualifiedName + " is not allowed");
@@ -658,15 +653,7 @@ public class Registry {
 if (conf == null || !HiveConf.getBoolVar(conf, 
ConfVars.HIVE_ALLOW_UDF_LOAD_ON_DEMAND)) {
   return null;
 }
-// This is a little bit weird. We'll do the MS call outside of the lock. 
Our caller calls us
-// under lock, so we'd preserve the lock state for them; their finally 
block will release the
-// lock correctly. See the comment on the lock field - the locking needs 
to be reworked.
-lock.unlock();
-try {
-  return getFunctionInfoFromMetastoreNoLock(qualifiedName, conf);
-} finally {
-  lock.lock();
-}
+return getFunctionInfoFromMetastoreNoLock(qualifiedName, conf);
   }
 
   // should be called after session registry is checked
diff --git a/ql/src/test/results/clientpositive/llap/udf_substr.q.out 
b/ql/src/test/results/clientpositive/llap/udf_substr.q.out
index 00fa606..7c1a0f1 100644
--- a/ql/src/test/results/clientpositive/llap/udf_substr.q.out
+++ b/ql/src/test/results/clientpositive/llap/udf_substr.q.out
@@ -8,7 +8,7 @@ PREHOOK: type: DESCFUNCTION
 POSTHOOK: query: DESCRIBE FUNCTION EXTENDED substr
 POSTHOOK: type: DESCFUNCTION
 substr(str, pos[, len]) - returns the substring of str that starts at pos and 
is of length len orsubstr(bin, pos[, len]) - returns the slice of byte array 
that starts at pos and is of le

[hive] branch master updated: HIVE-23345: Enable Parquet timestamps types (INT64 and INT96) conversion to Hive BIGINT type Adding test cases (Panos G via Ashutosh Chauhan)

2020-05-02 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 659e28d  HIVE-23345: Enable Parquet timestamps types (INT64 and INT96) 
conversion to Hive BIGINT type Adding test cases (Panos G via Ashutosh Chauhan)
659e28d is described below

commit 659e28de0b609d114e20e6294348abb74f49f6e0
Author: Panos Garefalakis 
AuthorDate: Fri May 1 14:24:39 2020 +0100

HIVE-23345: Enable Parquet timestamps types (INT64 and INT96) conversion to 
Hive BIGINT type Adding test cases (Panos G via Ashutosh Chauhan)

Change-Id: I8666a95cc7ff7495a86b960c2ea173cd875bfa4f
Signed-off-by: Ashutosh Chauhan 
---
 .../test/resources/testconfiguration.properties|  1 +
 .../hive/ql/io/parquet/convert/ETypeConverter.java | 40 +--
 .../ql/io/parquet/convert/TestETypeConverter.java  | 29 +++
 .../clientpositive/parquet_timestampt_to_bigint.q  | 25 +
 .../llap/parquet_timestampt_to_bigint.q.out| 60 ++
 5 files changed, 152 insertions(+), 3 deletions(-)

diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index b639718..5468728 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -695,6 +695,7 @@ minillaplocal.query.files=\
   parquet_legacy_mixed_date.q,\
   parquet_legacy_mixed_timestamp.q,\
   parquet_proleptic_mixed_date.q,\
+  parquet_timestampt_to_bigint.q,\
   partition_ctas.q,\
   partition_multilevels.q,\
   partition_shared_scan.q,\
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
index 6082321..8e436bc 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
@@ -14,6 +14,8 @@
 package org.apache.hadoop.hive.ql.io.parquet.convert;
 
 import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
 import java.util.ArrayList;
 import java.util.Map;
 import java.util.Optional;
@@ -43,6 +45,7 @@ import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 
+import org.apache.parquet.Preconditions;
 import org.apache.parquet.column.Dictionary;
 import org.apache.parquet.io.api.Binary;
 import org.apache.parquet.io.api.PrimitiveConverter;
@@ -662,9 +665,25 @@ public enum ETypeConverter {
   };
 }
   },
-  ETIMESTAMP_CONVERTER(TimestampWritableV2.class) {
+  EINT96_TIMESTAMP_CONVERTER(TimestampWritableV2.class) {
 @Override
 PrimitiveConverter getConverter(final PrimitiveType type, final int index, 
final ConverterParent parent, TypeInfo hiveTypeInfo) {
+  if (hiveTypeInfo != null) {
+String typeName = 
TypeInfoUtils.getBaseName(hiveTypeInfo.getTypeName());
+switch (typeName) {
+  case serdeConstants.BIGINT_TYPE_NAME:
+return new BinaryConverter(type, parent, index) {
+  @Override
+  protected LongWritable convert(Binary binary) {
+Preconditions.checkArgument(binary.length() == 12, "Must be 12 
bytes");
+ByteBuffer buf = binary.toByteBuffer();
+buf.order(ByteOrder.LITTLE_ENDIAN);
+long longVal = buf.getLong();
+return new LongWritable(longVal);
+  }
+};
+}
+  }
   return new BinaryConverter(type, parent, index) {
 @Override
 protected TimestampWritableV2 convert(Binary binary) {
@@ -690,6 +709,22 @@ public enum ETypeConverter {
 @Override
 PrimitiveConverter getConverter(final PrimitiveType type, final int index, 
final ConverterParent parent,
 TypeInfo hiveTypeInfo) {
+  if (hiveTypeInfo != null) {
+String typeName = 
TypeInfoUtils.getBaseName(hiveTypeInfo.getTypeName());
+switch (typeName) {
+  case serdeConstants.BIGINT_TYPE_NAME:
+return new BinaryConverter(type, parent, index) {
+  @Override
+  protected LongWritable convert(Binary binary) {
+Preconditions.checkArgument(binary.length() == 8, "Must be 8 
bytes");
+ByteBuffer buf = binary.toByteBuffer();
+buf.order(ByteOrder.LITTLE_ENDIAN);
+long longVal = buf.getLong();
+return new LongWritable(longVal);
+  }
+};
+}
+  }
   return new PrimitiveConverter() {
 @Override
 public void addLong(final long value) {
@@ -735,8 +770,7 @@ public enum ETypeConverter {
   public static PrimitiveConverter getNewConverter(fina

[hive] branch master updated: HIVE-23356 : Hash aggregation is always disabled while processing querys with grouping sets expressions. (Qiang Kang via Ashutosh Chauhan)

2020-05-02 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 0fd99df  HIVE-23356 : Hash aggregation is always disabled while 
processing querys with grouping sets expressions. (Qiang Kang via Ashutosh 
Chauhan)
0fd99df is described below

commit 0fd99df99dc07540d8818d179bcdcb2972f09752
Author: Qiang Kang 
AuthorDate: Sat May 2 12:12:06 2020 -0700

HIVE-23356 : Hash aggregation is always disabled while processing querys 
with grouping sets expressions. (Qiang Kang via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hadoop/hive/ql/exec/GroupByOperator.java   |  17 +--
 .../apache/hadoop/hive/ql/exec/TestOperators.java  | 131 +
 2 files changed, 140 insertions(+), 8 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
index 7220f33..b94e3fd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
@@ -739,20 +739,21 @@ public class GroupByOperator extends 
Operator implements IConfigure
   // if hash aggregation is not behaving properly, disable it
   if (numRowsInput == numRowsCompareHashAggr) {
 numRowsCompareHashAggr += groupbyMapAggrInterval;
+long numRowsProcessed = groupingSetsPresent ? numRowsInput * 
groupingSets.size() : numRowsInput;
 // map-side aggregation should reduce the entries by at-least half
-if (numRowsHashTbl > numRowsInput * minReductionHashAggr) {
+if (numRowsHashTbl > numRowsProcessed * minReductionHashAggr) {
   LOG.warn("Disable Hash Aggr: #hash table = " + numRowsHashTbl
-  + " #total = " + numRowsInput + " reduction = " + 1.0
-  * (numRowsHashTbl / numRowsInput) + " minReduction = "
-  + minReductionHashAggr);
+  + " #numRowsInput = " + numRowsInput + " reduction = " + 1.0 * 
(numRowsHashTbl / numRowsProcessed)
+  + " minReduction = " + minReductionHashAggr + " 
groupingSetsPresent = " + groupingSetsPresent
+  + " numRowsProcessed = " + numRowsProcessed);
   flushHashTable(true);
   hashAggr = false;
 } else {
   if (LOG.isTraceEnabled()) {
-LOG.trace("Hash Aggr Enabled: #hash table = " + numRowsHashTbl
-+ " #total = " + numRowsInput + " reduction = " + 1.0
-* (numRowsHashTbl / numRowsInput) + " minReduction = "
-+ minReductionHashAggr);
+LOG.trace("Hash Aggr Enabled: #hash table = " + numRowsHashTbl + " 
#numRowsInput = " + numRowsInput
++ " reduction = " + 1.0 * (numRowsHashTbl / numRowsProcessed) 
+ " minReduction = "
++ minReductionHashAggr + " groupingSetsPresent = " + 
groupingSetsPresent + " numRowsProcessed = "
++ numRowsProcessed);
   }
 }
   }
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java
index 8a0606b..3c0a7eb 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java
@@ -32,14 +32,18 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.CompilationOpContext;
 import org.apache.hadoop.hive.ql.Driver;
 import org.apache.hadoop.hive.ql.io.IOContextMap;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin;
 import org.apache.hadoop.hive.ql.optimizer.physical.LlapClusterStateForCompile;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
 import org.apache.hadoop.hive.ql.parse.type.ExprNodeTypeCheck;
 import org.apache.hadoop.hive.ql.parse.type.TypeCheckProcFactory;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
 import org.apache.hadoop.hive.ql.plan.CollectDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
 import org.apache.hadoop.hive.ql.plan.MapredWork;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
@@ -49,6 +53,7 @@ import org.apache.hadoop.hive.ql.plan.SelectDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
 import org.apache.hadoop.hive.ql.sess

[hive] branch master updated: HIVE-23305 : NullPointerException in LlapTaskSchedulerService addNode due to race condition ( Attila Magyar via Ashutosh Chauhan)

2020-04-30 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new f48c267  HIVE-23305 : NullPointerException in LlapTaskSchedulerService 
addNode due to race condition ( Attila Magyar via Ashutosh Chauhan)
f48c267 is described below

commit f48c26781d8a1a51c6a00d2e557ba6da46b28856
Author: Attila Magyar 
AuthorDate: Thu Apr 30 10:27:56 2020 -0700

HIVE-23305 : NullPointerException in LlapTaskSchedulerService addNode due 
to race condition ( Attila Magyar via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java| 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java
 
b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java
index 565afdc..9cb8bc9 100644
--- 
a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java
+++ 
b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java
@@ -813,8 +813,8 @@ public class LlapTaskSchedulerService extends TaskScheduler 
{
   Futures.addCallback(schedulerFuture, new 
LoggingFutureCallback("SchedulerThread", LOG));
 
   registry.start();
-  registry.registerStateChangeListener(new NodeStateChangeListener());
   activeInstances = registry.getInstances();
+  registry.registerStateChangeListener(new NodeStateChangeListener());
   for (LlapServiceInstance inst : activeInstances.getAll()) {
 registerAndAddNode(new NodeInfo(inst, nodeBlacklistConf, clock,
 numSchedulableTasksPerNode, metrics), inst);



[hive] branch master updated: HIVE-23261 : Check whether encryption is enabled in the cluster before moving files (Ramesh Kumar via Rajesh Balamohan)

2020-04-29 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new e13486b  HIVE-23261 : Check whether encryption is enabled in the 
cluster before moving files (Ramesh Kumar via Rajesh Balamohan)
e13486b is described below

commit e13486be06a5160b41a5310388dd9c704361bead
Author: RAMESH KUMAR THANGARAJAN 
AuthorDate: Wed Apr 29 11:29:28 2020 -0700

HIVE-23261 : Check whether encryption is enabled in the cluster before 
moving files (Ramesh Kumar via Rajesh Balamohan)

Signed-off-by: Ashutosh Chauhan 
---
 ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 8d194c3..7bb90d3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -89,6 +89,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.hdfs.DFSUtilClient;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.common.HiveStatsUtils;
@@ -4121,7 +4122,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
   Arrays.sort(files);
   for (final FileStatus srcFile : files) {
 final Path srcP = srcFile.getPath();
-final boolean needToCopy = needToCopy(srcP, destf, srcFs, destFs, 
configuredOwner, isManaged);
+final boolean needToCopy = needToCopy(conf, srcP, destf, srcFs, 
destFs, configuredOwner, isManaged);
 
 final boolean isRenameAllowed = !needToCopy && !isSrcLocal;
 
@@ -4436,7 +4437,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
 destFs.copyFromLocalFile(srcf, destf);
 return true;
   } else {
-if (needToCopy(srcf, destf, srcFs, destFs, configuredOwner, 
isManaged)) {
+if (needToCopy(conf, srcf, destf, srcFs, destFs, configuredOwner, 
isManaged)) {
   //copy if across file system or encryption zones.
   LOG.debug("Copying source " + srcf + " to " + destf + " because HDFS 
encryption zones are different.");
   return FileUtils.copy(srcf.getFileSystem(conf), srcf, 
destf.getFileSystem(conf), destf,
@@ -4561,7 +4562,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
* TODO- consider if need to do this for different file authority.
* @throws HiveException
*/
-  static private boolean needToCopy(Path srcf, Path destf, FileSystem srcFs,
+  static private boolean needToCopy(final HiveConf conf, Path srcf, Path 
destf, FileSystem srcFs,
   FileSystem destFs, String 
configuredOwner, boolean isManaged) throws HiveException {
 //Check if different FileSystems
 if (!FileUtils.equalsFileSystem(srcFs, destFs)) {
@@ -4602,6 +4603,10 @@ private void constructOneLBLocationMap(FileStatus fSta,
   }
 }
 
+// if Encryption not enabled, no copy needed
+if (!DFSUtilClient.isHDFSEncryptionEnabled(conf)) {
+  return false;
+}
 //Check if different encryption zones
 HadoopShims.HdfsEncryptionShim srcHdfsEncryptionShim = 
SessionState.get().getHdfsEncryptionShim(srcFs);
 HadoopShims.HdfsEncryptionShim destHdfsEncryptionShim = 
SessionState.get().getHdfsEncryptionShim(destFs);



[hive] branch master updated: HIVE-23294 : Remove sync bottleneck in TezConfigurationFactory (Rajesh Balamohan via Ashutosh Chauhan)

2020-04-28 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new c4e7376  HIVE-23294 : Remove sync bottleneck in 
TezConfigurationFactory (Rajesh Balamohan via Ashutosh Chauhan)
c4e7376 is described below

commit c4e73766a5c8cc0da0ba5d9387a00d651af4e820
Author: Ashutosh Chauhan 
AuthorDate: Tue Apr 28 21:19:37 2020 -0700

HIVE-23294 : Remove sync bottleneck in TezConfigurationFactory (Rajesh 
Balamohan via Ashutosh Chauhan)
---
 .../hive/ql/exec/tez/TezConfigurationFactory.java   | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java
index a0da0ad..84ae541 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java
@@ -19,6 +19,7 @@
  */
 package org.apache.hadoop.hive.ql.exec.tez;
 
+import java.lang.reflect.Field;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.function.Predicate;
@@ -33,6 +34,7 @@ import static 
org.apache.hadoop.security.ssl.SSLFactory.SSL_CLIENT_CONF_KEY;
 
 public class TezConfigurationFactory {
   private static TezConfiguration defaultConf = new TezConfiguration();
+  private static final Field updatingResource;
 
   private static final Logger LOG = 
LoggerFactory.getLogger(TezConfigurationFactory.class.getName());
 
@@ -41,6 +43,14 @@ public class TezConfigurationFactory {
 String sslConf = defaultConf.get(SSL_CLIENT_CONF_KEY, "ssl-client.xml");
 defaultConf.addResource(sslConf);
 LOG.info("SSL conf : " + sslConf);
+try {
+  //Cache the field handle so that we can avoid expensive 
conf.getPropertySources(key) later
+  updatingResource = 
Configuration.class.getDeclaredField("updatingResource");
+} catch (NoSuchFieldException | SecurityException e) {
+  throw new RuntimeException(e);
+}
+updatingResource.setAccessible(true);
+
   }
 
   public static Configuration copyInto(Configuration target, Configuration src,
@@ -50,7 +60,12 @@ public class TezConfigurationFactory {
   Map.Entry entry = iter.next();
   String name = entry.getKey();
   String value = entry.getValue();
-  String[] sources = src.getPropertySources(name);
+  String[] sources;
+  try {
+sources = ((Map)updatingResource.get(src)).get(name);
+  } catch (IllegalArgumentException | IllegalAccessException e) {
+throw new RuntimeException(e);
+  }
   final String source;
   if (sources == null || sources.length == 0) {
 source = null;



[hive] branch master updated: HIVE-23295 : Possible NPE when on getting predicate literal list when dynamic values are not available (Attila Magyar via Jason Dere)

2020-04-27 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new ea74565  HIVE-23295 : Possible NPE when on getting predicate literal 
list when dynamic values are not available (Attila Magyar via Jason Dere)
ea74565 is described below

commit ea74565b5e0095f2b99be8431a683fab3442c275
Author: Attila Magyar 
AuthorDate: Mon Apr 27 16:02:58 2020 -0700

HIVE-23295 : Possible NPE when on getting predicate literal list when 
dynamic values are not available (Attila Magyar via Jason Dere)

Signed-off-by: Ashutosh Chauhan 
---
 .../src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git 
a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
 
b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
index 8aa97d9..c2b7c4a 100644
--- 
a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
+++ 
b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
@@ -22,6 +22,7 @@ import java.sql.Timestamp;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.Deque;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -125,7 +126,7 @@ public final class SearchArgumentImpl implements 
SearchArgument {
   }
 } catch (NoDynamicValuesException err) {
   LOG.debug("Error while retrieving literalList, returning null", err);
-  return null;
+  return Collections.emptyList();
 }
 return newLiteraList;
   }



[hive] branch master updated: HIVE-23268 : Eliminate beanutils transitive dependency

2020-04-26 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 19314f5  HIVE-23268 : Eliminate beanutils transitive dependency
19314f5 is described below

commit 19314f5dfdd298a469ed8a34afcfb96cfd910f1b
Author: Ashutosh Chauhan 
AuthorDate: Tue Apr 21 23:35:15 2020 -0700

HIVE-23268 : Eliminate beanutils transitive dependency
---
 accumulo-handler/pom.xml   | 24 ++-
 beeline/pom.xml| 16 +-
 cli/pom.xml| 16 +-
 common/pom.xml | 22 --
 contrib/pom.xml| 16 +-
 druid-handler/pom.xml  |  6 +++-
 hbase-handler/pom.xml  | 22 --
 hcatalog/core/pom.xml  | 12 ++--
 hcatalog/hcatalog-pig-adapter/pom.xml  | 22 --
 hcatalog/pom.xml   | 16 +-
 hcatalog/server-extensions/pom.xml | 16 +-
 hcatalog/streaming/pom.xml | 16 +-
 hcatalog/webhcat/java-client/pom.xml   | 16 +-
 hcatalog/webhcat/svr/pom.xml   |  6 +++-
 hplsql/pom.xml | 16 +-
 jdbc/pom.xml   |  8 -
 kafka-handler/pom.xml  |  6 +++-
 kryo-registrator/pom.xml   |  8 -
 kudu-handler/pom.xml   | 16 +-
 llap-client/pom.xml| 12 ++--
 llap-common/pom.xml| 12 ++--
 llap-ext-client/pom.xml| 12 ++--
 llap-server/pom.xml| 12 ++--
 llap-tez/pom.xml   | 12 ++--
 metastore/pom.xml  |  6 +++-
 ql/pom.xml |  6 +++-
 serde/pom.xml  | 34 --
 service/pom.xml|  6 +++-
 shims/0.23/pom.xml | 34 --
 shims/common/pom.xml   | 22 --
 shims/scheduler/pom.xml| 14 +
 standalone-metastore/metastore-common/pom.xml  |  6 +++-
 standalone-metastore/metastore-server/pom.xml  | 25 +++-
 .../metastore-tools/metastore-benchmarks/pom.xml   |  8 +
 standalone-metastore/pom.xml   |  1 +
 storage-api/pom.xml|  4 +++
 streaming/pom.xml  | 16 +-
 upgrade-acid/pre-upgrade/pom.xml   |  4 +++
 38 files changed, 454 insertions(+), 72 deletions(-)

diff --git a/accumulo-handler/pom.xml b/accumulo-handler/pom.xml
index 23433a5..bee3eb8 100644
--- a/accumulo-handler/pom.xml
+++ b/accumulo-handler/pom.xml
@@ -36,7 +36,15 @@
   org.apache.accumulo
   accumulo-core
 
+   
+commons-beanutils
+commons-beanutils-core
+  
  
+commons-digester
+commons-digester
+  
+
 commons-collections
 commons-collections
   
@@ -134,7 +142,21 @@
   hadoop-common
   ${hadoop.version}
   true
-
+ 
+
+org.slf4j
+slf4j-log4j12
+  
+   
+commons-beanutils
+commons-beanutils
+  
+ 
+commons-logging
+commons-logging
+  
+
+   
 
   org.apache.hadoop
   hadoop-mapreduce-client-core
diff --git a/beeline/pom.xml b/beeline/pom.xml
index a5a1e42..bb627f2 100644
--- a/beeline/pom.xml
+++ b/beeline/pom.xml
@@ -81,7 +81,21 @@
   hadoop-common
   ${hadoop.version}
   true
-
+   
+
+  commons-beanutils
+  commons-beanutils
+  
+
+  org.slf4j
+  slf4j-log4j12
+
+
+  commons-logging
+  commons-logging
+
+  
+   
 
   org.apache.thrift
   libthrift
diff --git a/cli/pom.xml b/cli/pom.xml
index 79fa973..eca0282 100644
--- a/cli/pom.xml
+++ b/cli/pom.xml
@@ -97,7 +97,21 @@
   hadoop-common
   ${hadoop.version}
   true
-
+   
+
+  commons-beanutils
+  commons-beanutils
+  
+
+  org.slf4j
+  slf4j-log4j12
+
+
+  commons-logging
+  commons-logging

[hive] branch master updated: HIVE-23262 : Remove dependency on activemq

2020-04-26 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 40ca5f6  HIVE-23262 : Remove dependency on activemq
40ca5f6 is described below

commit 40ca5f65666b007baa13a2c3e2d561712d683506
Author: Ashutosh Chauhan 
AuthorDate: Mon Apr 20 21:53:24 2020 -0700

HIVE-23262 : Remove dependency on activemq
---
 hcatalog/conf/jndi.properties  |  36 ---
 hcatalog/server-extensions/pom.xml |  18 --
 .../hcatalog/listener/TestMsgBusConnection.java| 117 -
 .../listener/TestNotificationListener.java | 270 -
 pom.xml|  17 --
 5 files changed, 458 deletions(-)

diff --git a/hcatalog/conf/jndi.properties b/hcatalog/conf/jndi.properties
deleted file mode 100644
index f718111..000
--- a/hcatalog/conf/jndi.properties
+++ /dev/null
@@ -1,36 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# If ActiveMQ is used then uncomment following properties, else substitute it 
accordingly.
-#java.naming.factory.initial = 
org.apache.activemq.jndi.ActiveMQInitialContextFactory
-
-# use the following property to provide location of MQ broker.
-#java.naming.provider.url = tcp://localhost:61616
-
-# use the following property to specify the JNDI name the connection factory
-# should appear as. 
-#connectionFactoryNames = connectionFactory, queueConnectionFactory, 
topicConnectionFactry
-
-# register some queues in JNDI using the form
-# queue.[jndiName] = [physicalName]
-# queue.MyQueue = example.MyQueue
-
-
-# register some topics in JNDI using the form
-# topic.[jndiName] = [physicalName]
-# topic.MyTopic = example.MyTopic
-
diff --git a/hcatalog/server-extensions/pom.xml 
b/hcatalog/server-extensions/pom.xml
index 9bc28f2..e00dec1 100644
--- a/hcatalog/server-extensions/pom.xml
+++ b/hcatalog/server-extensions/pom.xml
@@ -94,24 +94,6 @@
   test
 
 
-  org.apache.activemq
-  activemq-core
-  ${activemq.version}
-  test
-  
-
-  org.springframework
-  spring-context
-
-  
-
-
-  org.apache.activemq
-  kahadb
-  ${activemq.version}
-  test
-
-
   org.apache.pig
   pig
   ${pig.version}
diff --git 
a/hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestMsgBusConnection.java
 
b/hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestMsgBusConnection.java
deleted file mode 100644
index d793770..000
--- 
a/hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestMsgBusConnection.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hive.hcatalog.listener;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import javax.jms.Connection;
-import javax.jms.ConnectionFactory;
-import javax.jms.Destination;
-import javax.jms.JMSException;
-import javax.jms.Message;
-import javax.jms.MessageConsumer;
-import javax.jms.Session;
-import javax.jms.TextMessage;
-
-import org.apache.activemq.ActiveMQConnectionFactory;
-import org.apache.activemq.broker.BrokerService;
-import org.apache.

[hive] branch master updated: HIVE-23287 : Reduce dependency on icu4j

2020-04-26 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new ca275be  HIVE-23287 : Reduce dependency on icu4j
ca275be is described below

commit ca275befa1b9c39c76e539c6b4506e58ab39f51c
Author: Ashutosh Chauhan 
AuthorDate: Thu Apr 23 09:03:14 2020 -0700

HIVE-23287 : Reduce dependency on icu4j
---
 druid-handler/pom.xml | 4 
 1 file changed, 4 insertions(+)

diff --git a/druid-handler/pom.xml b/druid-handler/pom.xml
index 18dfca0..58a17bc 100644
--- a/druid-handler/pom.xml
+++ b/druid-handler/pom.xml
@@ -124,6 +124,10 @@
   ${druid.version}
   
 
+  com.ibm.icu
+  icu4j
+
+
   org.codehaus.plexus
   plexus-utils
 



[hive] branch master updated: HIVE-23267 : Reduce dependency on groovy

2020-04-26 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 2c1f2fd  HIVE-23267 : Reduce dependency on groovy
2c1f2fd is described below

commit 2c1f2fddf4931a71c67ca63ece6ca232c25ea372
Author: Ashutosh Chauhan 
AuthorDate: Tue Apr 21 13:50:57 2020 -0700

HIVE-23267 : Reduce dependency on groovy

Signed-off-by: Ashutosh Chauhan 
---
 hcatalog/hcatalog-pig-adapter/pom.xml | 4 
 hcatalog/pom.xml  | 4 
 upgrade-acid/pre-upgrade/pom.xml  | 4 
 3 files changed, 12 insertions(+)

diff --git a/hcatalog/hcatalog-pig-adapter/pom.xml 
b/hcatalog/hcatalog-pig-adapter/pom.xml
index bc8bfba..9beb35d 100644
--- a/hcatalog/hcatalog-pig-adapter/pom.xml
+++ b/hcatalog/hcatalog-pig-adapter/pom.xml
@@ -68,6 +68,10 @@
   h2
   
 
+  org.codehaus.groovy
+  groovy-all
+
+
   tomcat
   jasper-runtime
 
diff --git a/hcatalog/pom.xml b/hcatalog/pom.xml
index ae278bc..4f0fd9f 100644
--- a/hcatalog/pom.xml
+++ b/hcatalog/pom.xml
@@ -74,6 +74,10 @@
   test
   
 
+  org.codehaus.groovy
+  groovy-all
+
+
   tomcat
   jasper-compiler
 
diff --git a/upgrade-acid/pre-upgrade/pom.xml b/upgrade-acid/pre-upgrade/pom.xml
index 3abe596..b3abde9 100644
--- a/upgrade-acid/pre-upgrade/pom.xml
+++ b/upgrade-acid/pre-upgrade/pom.xml
@@ -88,6 +88,10 @@
 provided
 
   
+org.codehaus.groovy
+groovy-all
+  
+  
 org.apache.zookeeper
 zookeeper
   



[hive] branch master updated: HIVE-23278 : Remove dependency on bouncycastle

2020-04-26 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new c78d0da  HIVE-23278 : Remove dependency on bouncycastle
c78d0da is described below

commit c78d0da5e8a49355ec1e9cf42c44a9d058488b46
Author: Ashutosh Chauhan 
AuthorDate: Wed Apr 22 23:27:51 2020 -0700

HIVE-23278 : Remove dependency on bouncycastle
---
 kryo-registrator/pom.xml  | 6 ++
 ql/pom.xml| 4 
 spark-client/pom.xml  | 4 
 standalone-metastore/metastore-server/pom.xml | 6 ++
 4 files changed, 20 insertions(+)

diff --git a/kryo-registrator/pom.xml b/kryo-registrator/pom.xml
index ea2eb65..cbba1e7 100644
--- a/kryo-registrator/pom.xml
+++ b/kryo-registrator/pom.xml
@@ -43,6 +43,12 @@
   spark-core_${scala.binary.version}
   ${spark.version}
   true
+  
+
+  net.java.dev.jets3t
+  jets3t
+
+ 
 
 
   org.apache.hadoop
diff --git a/ql/pom.xml b/ql/pom.xml
index a0e77a1..7c42c0e 100644
--- a/ql/pom.xml
+++ b/ql/pom.xml
@@ -730,6 +730,10 @@
  commons-logging


+ net.java.dev.jets3t
+ jets3t
+   
+   
  org.glassfish.jersey.containers
  *

diff --git a/spark-client/pom.xml b/spark-client/pom.xml
index a3cf922..33d03b9 100644
--- a/spark-client/pom.xml
+++ b/spark-client/pom.xml
@@ -82,6 +82,10 @@
   *
 
 
+  net.java.dev.jets3t
+  jets3t
+
+
   org.slf4j
   slf4j-log4j12
 
diff --git a/standalone-metastore/metastore-server/pom.xml 
b/standalone-metastore/metastore-server/pom.xml
index e492ce1..14ea09b 100644
--- a/standalone-metastore/metastore-server/pom.xml
+++ b/standalone-metastore/metastore-server/pom.xml
@@ -250,6 +250,12 @@
   apacheds-server-integ
   ${apache-directory-server.version}
   test
+  
+
+  bouncycastle
+  bcprov-jdk15
+
+  
 
 
 



[hive] branch master updated: HIVE-23169 : Probe runtime support for LLAP (Panagiotis Garefalakis via Ashutosh Chauhan)

2020-04-21 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 8b9fadb  HIVE-23169 : Probe runtime support for LLAP (Panagiotis 
Garefalakis via Ashutosh Chauhan)
8b9fadb is described below

commit 8b9fadb5515aace73db5068cc81317b6f10e0f32
Author: Ashutosh Chauhan 
AuthorDate: Tue Apr 21 16:54:58 2020 -0700

HIVE-23169 : Probe runtime support for LLAP (Panagiotis Garefalakis via 
Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../hive/llap/io/api/impl/LlapRecordReader.java| 51 ++
 .../hive/llap/io/decode/ColumnVectorProducer.java  |  6 +++
 .../llap/io/decode/OrcEncodedDataConsumer.java |  4 ++
 3 files changed, 61 insertions(+)

diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
index acb6b2d..417a42a 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
@@ -25,6 +25,8 @@ import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicReference;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.conf.Configuration;
@@ -41,6 +43,7 @@ import 
org.apache.hadoop.hive.llap.io.decode.ColumnVectorProducer.Includes;
 import 
org.apache.hadoop.hive.llap.io.decode.ColumnVectorProducer.SchemaEvolutionFactory;
 import org.apache.hadoop.hive.llap.io.decode.ReadPipeline;
 import org.apache.hadoop.hive.llap.tezplugins.LlapTezUtils;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
@@ -82,6 +85,7 @@ class LlapRecordReader implements RecordReader
   private final SearchArgument sarg;
   private final VectorizedRowBatchCtx rbCtx;
   private final boolean isVectorized;
+  private final boolean probeDecodeEnabled;
   private VectorizedOrcAcidRowBatchReader acidReader;
   private final Object[] partitionValues;
 
@@ -196,6 +200,12 @@ class LlapRecordReader implements 
RecordReader
 this.includes = new IncludesImpl(tableIncludedCols, isAcidFormat, rbCtx,
 schema, job, isAcidScan && acidReader.includeAcidColumns());
 
+this.probeDecodeEnabled = HiveConf.getBoolVar(jobConf, 
ConfVars.HIVE_OPTIMIZE_SCAN_PROBEDECODE);
+if (this.probeDecodeEnabled) {
+  includes.setProbeDecodeContext(mapWork.getProbeDecodeContext());
+  LOG.info("LlapRecordReader ProbeDecode is enabled");
+}
+
 // Create the consumer of encoded data; it will coordinate decoding to 
CVBs.
 feedback = rp = cvp.createReadPipeline(this, split, includes, sarg, 
counters, includes,
 sourceInputFormat, sourceSerDe, reporter, job, 
mapWork.getPathToPartitionInfo());
@@ -629,6 +639,9 @@ class LlapRecordReader implements 
RecordReader
 private TypeDescription readerSchema;
 private JobConf jobConf;
 
+// ProbeDecode Context for row-level filtering
+private TableScanOperator.ProbeDecodeContext probeDecodeContext = null;
+
 public IncludesImpl(List tableIncludedCols, boolean isAcidScan,
 VectorizedRowBatchCtx rbCtx, TypeDescription readerSchema,
 JobConf jobConf, boolean includeAcidColumns) {
@@ -710,6 +723,10 @@ class LlapRecordReader implements 
RecordReader
   fileSchema, filePhysicalColumnIds, acidStructColumnId);
 }
 
+public void setProbeDecodeContext(TableScanOperator.ProbeDecodeContext 
currProbeDecodeContext) {
+  this.probeDecodeContext = currProbeDecodeContext;
+}
+
 @Override
 public List getPhysicalColumnIds() {
   return filePhysicalColumnIds;
@@ -725,5 +742,39 @@ class LlapRecordReader implements 
RecordReader
   return OrcInputFormat.genIncludedTypes(
   fileSchema, filePhysicalColumnIds, acidStructColumnId);
 }
+
+@Override
+public String getQueryId() {
+  return HiveConf.getVar(jobConf, HiveConf.ConfVars.HIVEQUERYID);
+}
+
+@Override
+public boolean isProbeDecodeEnabled() {
+  return this.probeDecodeContext != null;
+}
+
+@Override
+public byte getProbeMjSmallTablePos() {
+  return this.probeDecodeContext.getMjSmallTablePos();
+}
+
+@Override
+public int getProbeColIdx() {
+  // TODO: is this the best way to get the ColId?
+  Pattern pattern = Pattern.compile("_col([0-9]+)");
+  Matcher matcher = 
pattern.matcher(this.probeDecodeCon

[hive] branch master updated: HIVE-23258 : Remove BoneCP Connection Pool (David Mollitor via Ashutosh Chauhan)

2020-04-20 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 00423c1  HIVE-23258 : Remove BoneCP Connection Pool (David Mollitor 
via Ashutosh Chauhan)
00423c1 is described below

commit 00423c1c31f9aac9efa98570a2b954f3f1ae56fa
Author: David Mollitor 
AuthorDate: Mon Apr 20 20:02:34 2020 -0700

HIVE-23258 : Remove BoneCP Connection Pool (David Mollitor via Ashutosh 
Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 binary-package-licenses/README |   1 -
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   3 +-
 hcatalog/src/test/e2e/hcatalog/drivers/Util.pm |   3 -
 .../org/apache/hive/jdbc/TestRestrictedList.java   |   1 -
 metastore/pom.xml  |  10 --
 pom.xml|   6 -
 standalone-metastore/metastore-common/pom.xml  |   4 -
 .../hadoop/hive/metastore/conf/MetastoreConf.java  |   2 +-
 standalone-metastore/metastore-server/pom.xml  |   4 -
 .../datasource/BoneCPDataSourceProvider.java   | 191 -
 .../metastore/datasource/DataSourceProvider.java   |   7 -
 .../datasource/DataSourceProviderFactory.java  |   1 -
 .../datasource/DbCPDataSourceProvider.java |   6 -
 .../datasource/HikariCPDataSourceProvider.java |   6 -
 .../hadoop/hive/metastore/txn/TxnHandler.java  |  28 ++-
 .../datasource/TestDataSourceProviderFactory.java  |  55 --
 standalone-metastore/pom.xml   |   6 -
 17 files changed, 12 insertions(+), 322 deletions(-)

diff --git a/binary-package-licenses/README b/binary-package-licenses/README
index db6b7fe..02a4d11 100644
--- a/binary-package-licenses/README
+++ b/binary-package-licenses/README
@@ -19,7 +19,6 @@ ant*
 apache-curator
 avatica*
 avro
-bonecp
 calcite*
 classmate
 commons*
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 16bae92..e3ddbf1 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1005,7 +1005,7 @@ public class HiveConf extends Configuration {
  * @deprecated Use MetastoreConf.CONNECTION_POOLING_TYPE
  */
 @Deprecated
-METASTORE_CONNECTION_POOLING_TYPE("datanucleus.connectionPoolingType", 
"HikariCP", new StringSet("BONECP", "DBCP",
+METASTORE_CONNECTION_POOLING_TYPE("datanucleus.connectionPoolingType", 
"HikariCP", new StringSet("DBCP",
   "HikariCP", "NONE"),
 "Specify connection pool library for datanucleus"),
 /**
@@ -4831,7 +4831,6 @@ public class HiveConf extends Configuration {
 "hive.spark.client.rpc.server.address," +
 "hive.spark.client.rpc.server.port," +
 "hive.spark.client.rpc.sasl.mechanisms," +
-"bonecp.," +
 "hive.druid.broker.address.default," +
 "hive.druid.coordinator.address.default," +
 "hikaricp.," +
diff --git a/hcatalog/src/test/e2e/hcatalog/drivers/Util.pm 
b/hcatalog/src/test/e2e/hcatalog/drivers/Util.pm
index 65e6c81..b860480 100644
--- a/hcatalog/src/test/e2e/hcatalog/drivers/Util.pm
+++ b/hcatalog/src/test/e2e/hcatalog/drivers/Util.pm
@@ -424,9 +424,6 @@ sub getHiveLibsForPig($$)
 /jdo-api-.*\.jar$/ && do {
 $cp .= $cfg->{'hivelib'} . '/' . $_ . ':';
 };
-/bonecp-.*\.jar$/ && do {
-$cp .= $cfg->{'hivelib'} . '/' . $_ . ':';
-};
 /commons-pool-.*\.jar$/ && do {
 $cp .= $cfg->{'hivelib'} . '/' . $_ . ':';
 };
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestRestrictedList.java 
b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestRestrictedList.java
index 596c3d6..d5641d9 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestRestrictedList.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestRestrictedList.java
@@ -94,7 +94,6 @@ public class TestRestrictedList {
 addToExpectedRestrictedMap("hive.spark.client.rpc.server.port");
 addToExpectedRestrictedMap("hive.spark.client.rpc.sasl.mechanisms");
 addToExpectedRestrictedMap("hive.query.max.length");
-addToExpectedRestrictedMap("bonecp.test");
 addToExpectedRestrictedMap("hive.druid.broker.address.default");
 addToExpectedRestrictedMap("hive.druid.coordinator.address.default");
 addToExpectedRestrictedMap("hikaricp.test");
diff --git a/metastore/pom.xml b/metastore/pom.xml
index 95e2d07..c82dc26

[hive] branch master updated: HIVE-23241 : Reduce transitive dependencies

2020-04-20 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 57b2024  HIVE-23241 : Reduce transitive dependencies
57b2024 is described below

commit 57b2024cebc680e5b8165a73a9be32259cf78794
Author: Ashutosh Chauhan 
AuthorDate: Mon Apr 20 14:04:34 2020 -0700

HIVE-23241 : Reduce transitive dependencies

Signed-off-by: Ashutosh Chauhan 
---
 druid-handler/pom.xml| 20 
 .../metastore-tools/metastore-benchmarks/pom.xml |  6 ++
 standalone-metastore/metastore-tools/pom.xml |  6 ++
 .../metastore-tools/tools-common/pom.xml | 13 -
 4 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/druid-handler/pom.xml b/druid-handler/pom.xml
index 5ee99b1..18dfca0 100644
--- a/druid-handler/pom.xml
+++ b/druid-handler/pom.xml
@@ -92,6 +92,10 @@
   ${druid.version}
   
 
+  io.tesla.aether
+  tesla-aether
+
+
   com.fasterxml.jackson.core
   jackson-core
 
@@ -120,6 +124,22 @@
   ${druid.version}
   
 
+  org.codehaus.plexus
+  plexus-utils
+
+
+  org.ow2.asm
+  asm-commons
+
+
+  org.ow2.asm
+  asm
+
+
+  org.checkerframework
+  checker-qual
+
+
   
   com.google.code.findbugs
   annotations
diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml 
b/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml
index 13524a4..534583d 100644
--- a/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml
+++ b/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml
@@ -97,6 +97,12 @@
   org.apache.maven.plugins
   maven-jxr-plugin
   2.5
+  
+
+  org.codehaus.plexus
+  plexus-utils
+
+  
 
 
 
diff --git a/standalone-metastore/metastore-tools/pom.xml 
b/standalone-metastore/metastore-tools/pom.xml
index d8c4788..df1b3ab 100644
--- a/standalone-metastore/metastore-tools/pom.xml
+++ b/standalone-metastore/metastore-tools/pom.xml
@@ -115,6 +115,12 @@
   
 org.apache.maven.plugins
 maven-jxr-plugin
+
+  
+org.codehaus.plexus
+plexus-utils
+  
+
 2.5
   
   
diff --git a/standalone-metastore/metastore-tools/tools-common/pom.xml 
b/standalone-metastore/metastore-tools/tools-common/pom.xml
index 4c226fa..44f02b7 100644
--- a/standalone-metastore/metastore-tools/tools-common/pom.xml
+++ b/standalone-metastore/metastore-tools/tools-common/pom.xml
@@ -101,17 +101,4 @@
   
 
   
-
-  
-
-  
-
-org.apache.maven.plugins
-maven-jxr-plugin
-2.5
-  
-
-  
-
-
 



[hive] branch master updated: HIVE-20078 : Remove ATSHook

2020-04-18 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 11abab2  HIVE-20078 : Remove ATSHook
11abab2 is described below

commit 11abab21be0f5fbf6eeb39acbf2963618352b6dd
Author: Ashutosh Chauhan 
AuthorDate: Sat Dec 14 08:50:54 2019 -0800

HIVE-20078 : Remove ATSHook
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   3 -
 .../org/apache/hadoop/hive/ql/hooks/ATSHook.java   | 495 -
 .../apache/hadoop/hive/ql/hooks/TestATSHook.java   |  59 ---
 3 files changed, 557 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 7b3acad..9e46e7b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -582,9 +582,6 @@ public class HiveConf extends Configuration {
 "Comma-separated list of statistics publishers to be invoked on 
counters on each job. \n" +
 "A client stats publisher is specified as the name of a Java class 
which implements the \n" +
 "org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface."),
-ATSHOOKQUEUECAPACITY("hive.ats.hook.queue.capacity", 64,
-"Queue size for the ATS Hook executor. If the number of outstanding 
submissions \n" +
-"to the ATS executor exceed this amount, the Hive ATS Hook will not 
try to log queries to ATS."),
 EXECPARALLEL("hive.exec.parallel", false, "Whether to execute jobs in 
parallel"),
 EXECPARALLETHREADNUMBER("hive.exec.parallel.thread.number", 8,
 "How many jobs at most can be executed in parallel"),
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java 
b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java
deleted file mode 100644
index 0632f6e..000
--- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java
+++ /dev/null
@@ -1,495 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.hooks;
-
-import java.io.IOException;
-import java.net.InetAddress;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.BlockingQueue;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.ThreadFactory;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-
-import com.google.common.annotations.VisibleForTesting;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.llap.registry.impl.LlapRegistryService;
-import org.apache.hadoop.hive.ql.QueryPlan;
-import org.apache.hadoop.hive.ql.QueryState;
-import org.apache.hadoop.hive.ql.exec.ExplainTask;
-import org.apache.hadoop.hive.ql.exec.TaskFactory;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.exec.tez.TezTask;
-import org.apache.hadoop.hive.ql.log.PerfLogger;
-import org.apache.hadoop.hive.ql.parse.ExplainConfiguration;
-import org.apache.hadoop.hive.ql.plan.ExplainWork;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.timeline.TimelineDomain;
-import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity;
-import org.apache.hadoop.yarn.api.records.timeline.TimelineEvent;
-import org.apache.hadoop.yarn.client.api.TimelineClient;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hive.common.util.ShutdownHookManager;
-import org.apache.tez.dag.api.TezConfiguration;
-import org.json.JSONObject;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.util.concurr

[hive] branch master updated: HIVE-22684 : Run Eclipse Cleanup Against hbase-handler Module (David Mollitor via Ashutosh Chauhan)

2020-04-18 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new c3fb689  HIVE-22684 : Run Eclipse Cleanup Against hbase-handler Module 
(David Mollitor via Ashutosh Chauhan)
c3fb689 is described below

commit c3fb689ffbac7f8dd45ee5219145d8c3d231e27f
Author: David Mollitor 
AuthorDate: Sat Apr 18 12:31:07 2020 -0700

HIVE-22684 : Run Eclipse Cleanup Against hbase-handler Module (David 
Mollitor via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../org/apache/hadoop/hive/hbase/HBaseScanRange.java   |  1 +
 .../java/org/apache/hadoop/hive/hbase/HBaseSerDe.java  |  1 -
 .../apache/hadoop/hive/hbase/HBaseSerDeParameters.java |  1 +
 .../apache/hadoop/hive/hbase/HBaseStorageHandler.java  |  1 -
 .../hadoop/hive/hbase/HiveHBaseTableOutputFormat.java  |  2 ++
 .../hive/hbase/HiveHBaseTableSnapshotInputFormat.java  |  3 ---
 .../hive/hbase/struct/AvroHBaseValueFactory.java   |  1 -
 .../org/apache/hadoop/hive/hbase/TestHBaseSerDe.java   | 11 +--
 .../apache/hadoop/hive/hbase/TestLazyHBaseObject.java  | 18 --
 .../org/apache/hadoop/hive/hbase/avro/Address.java |  5 -
 .../org/apache/hadoop/hive/hbase/avro/ContactInfo.java |  3 +++
 .../org/apache/hadoop/hive/hbase/avro/Employee.java|  3 +++
 .../org/apache/hadoop/hive/hbase/avro/HomePhone.java   |  3 +++
 .../org/apache/hadoop/hive/hbase/avro/OfficePhone.java |  3 +++
 14 files changed, 33 insertions(+), 23 deletions(-)

diff --git 
a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseScanRange.java 
b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseScanRange.java
index f01748c..79d687f 100644
--- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseScanRange.java
+++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseScanRange.java
@@ -87,6 +87,7 @@ public class HBaseScanRange implements Serializable {
 scan.setFilter(new FilterList(filters));
   }
 
+  @Override
   public String toString() {
 return (startRow == null ? "" : new BytesWritable(startRow).toString()) + 
" ~ " +
 (stopRow == null ? "" : new BytesWritable(stopRow).toString());
diff --git 
a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java 
b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
index 1588283..5147d0e 100644
--- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
+++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
@@ -30,7 +30,6 @@ import 
org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.AbstractSerDe;
-import org.apache.hadoop.hive.serde2.AbstractSerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeSpec;
 import org.apache.hadoop.hive.serde2.SerDeStats;
diff --git 
a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java 
b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
index eb3560c..480484c 100644
--- 
a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
+++ 
b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
@@ -167,6 +167,7 @@ public class HBaseSerDeParameters {
 throw new IllegalArgumentException("Invalid column name " + columnName);
   }
 
+  @Override
   public String toString() {
 return "[" + columnMappingString + ":" + getColumnNames() + ":" + 
getColumnTypes() + "]";
   }
diff --git 
a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java 
b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
index f3735a3..16658d0 100644
--- 
a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
+++ 
b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
@@ -42,7 +42,6 @@ import 
org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
 import org.apache.hadoop.hive.metastore.HiveMetaHook;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
-import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer;
 import org.apache.hadoop.hive.ql.index.IndexSearchCondition;
diff --git 
a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableOutputFormat.java
 
b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableOutputFormat.java
index b344e16..f0eccd7 100644
--- 
a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseT

[hive] branch master updated: HIVE-22698 : Support Statement#closeOnCompletion() (Iwao Ave via Ashutosh Chauhan)

2020-04-18 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new bb95ad2  HIVE-22698 : Support Statement#closeOnCompletion() (Iwao Ave 
via Ashutosh Chauhan)
bb95ad2 is described below

commit bb95ad243cc0ab028deed516b5f36616d9fd3354
Author: Iwao AVE 
AuthorDate: Sat Apr 18 12:26:07 2020 -0700

HIVE-22698 : Support Statement#closeOnCompletion() (Iwao Ave via Ashutosh 
Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../java/org/apache/hive/jdbc/TestJdbcDriver2.java | 37 ++
 .../org/apache/hive/jdbc/HiveQueryResultSet.java   |  1 +
 .../java/org/apache/hive/jdbc/HiveStatement.java   | 12 +--
 3 files changed, 48 insertions(+), 2 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java 
b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
index dbe282d..ba1f39c 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
@@ -3250,4 +3250,41 @@ public class TestJdbcDriver2 {
   public void testConnectInvalidDatabase() throws SQLException {
 DriverManager.getConnection("jdbc:hive2:///databasedoesnotexist", "", "");
   }
+
+  @Test
+  public void testStatementCloseOnCompletion() throws SQLException {
+Statement stmt = con.createStatement();
+stmt.closeOnCompletion();
+ResultSet res = stmt.executeQuery("select under_col from " + tableName + " 
limit 1");
+assertTrue(res.next());
+assertFalse(stmt.isClosed());
+assertFalse(res.next());
+assertFalse(stmt.isClosed());
+res.close();
+assertTrue(stmt.isClosed());
+  }
+
+  @Test
+  public void testPreparedStatementCloseOnCompletion() throws SQLException {
+PreparedStatement stmt = con.prepareStatement("select under_col from " + 
tableName + " limit 1");
+stmt.closeOnCompletion();
+ResultSet res = stmt.executeQuery();
+assertTrue(res.next());
+assertFalse(stmt.isClosed());
+assertFalse(res.next());
+assertFalse(stmt.isClosed());
+res.close();
+assertTrue(stmt.isClosed());
+  }
+
+  @Test
+  public void testCloseOnAlreadyOpenedResultSetCompletion() throws Exception {
+PreparedStatement stmt = con.prepareStatement("select under_col from " + 
tableName + " limit 1");
+ResultSet res = stmt.executeQuery();
+assertTrue(res.next());
+stmt.closeOnCompletion();
+assertFalse(stmt.isClosed());
+res.close();
+assertTrue(stmt.isClosed());
+  }
 }
diff --git a/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java 
b/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java
index 8563cee..df31a25 100644
--- a/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java
+++ b/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java
@@ -276,6 +276,7 @@ public class HiveQueryResultSet extends HiveBaseResultSet {
 if (this.statement != null && (this.statement instanceof HiveStatement)) {
   HiveStatement s = (HiveStatement) this.statement;
   s.closeClientOperation();
+  s.closeOnResultSetCompletion();
 } else {
   // for those stmtHandle passed from HiveDatabaseMetaData instead of 
Statement
   closeOperationHandle(stmtHandle);
diff --git a/jdbc/src/java/org/apache/hive/jdbc/HiveStatement.java 
b/jdbc/src/java/org/apache/hive/jdbc/HiveStatement.java
index 543bf8c..a74a3a8 100644
--- a/jdbc/src/java/org/apache/hive/jdbc/HiveStatement.java
+++ b/jdbc/src/java/org/apache/hive/jdbc/HiveStatement.java
@@ -76,6 +76,7 @@ public class HiveStatement implements java.sql.Statement {
   private final int defaultFetchSize;
   private boolean isScrollableResultset = false;
   private boolean isOperationComplete = false;
+  private boolean closeOnResultSetCompletion = false;
   /**
* We need to keep a reference to the result set to support the following:
* 
@@ -233,6 +234,13 @@ public class HiveStatement implements java.sql.Statement {
 stmtHandle = null;
   }
 
+  void closeOnResultSetCompletion() throws SQLException {
+if (closeOnResultSetCompletion) {
+  resultSet = null;
+  close();
+}
+  }
+
   /*
* (non-Javadoc)
*
@@ -254,7 +262,7 @@ public class HiveStatement implements java.sql.Statement {
 
   // JDK 1.7
   public void closeOnCompletion() throws SQLException {
-throw new SQLFeatureNotSupportedException("Method not supported");
+closeOnResultSetCompletion = true;
   }
 
   /*
@@ -752,7 +760,7 @@ public class HiveStatement implements java.sql.Statement {
 
   // JDK 1.7
   public boolean isCloseOnCompletion() throws SQLException {
-return false;
+return closeOnResultSetCompletion;
   }
 
   /*



[hive] branch master updated: HIVE-23051 : Clean up BucketCodec (David Mollitor via Ashutosh Chauhan)

2020-04-18 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new f888527  HIVE-23051 : Clean up BucketCodec (David Mollitor via 
Ashutosh Chauhan)
f888527 is described below

commit f888527feb3cd912850b4c62cf52bc191558bd7a
Author: David Mollitor 
AuthorDate: Sat Apr 18 11:15:52 2020 -0700

HIVE-23051 : Clean up BucketCodec (David Mollitor via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../org/apache/hadoop/hive/ql/io/BucketCodec.java  |  50 +-
 .../apache/hadoop/hive/ql/io/TestBucketCodec.java  | 101 +
 2 files changed, 127 insertions(+), 24 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/BucketCodec.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/BucketCodec.java
index eb9ded7..10d9604 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/BucketCodec.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/BucketCodec.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hive.ql.io;
 
+import com.google.common.base.Preconditions;
+
 /**
  * This class makes sense of {@link RecordIdentifier#getBucketProperty()}.  Up 
until ASF Hive 3.0 this
  * field was simply the bucket ID.  Since 3.0 it does bit packing to store 
several things:
@@ -86,50 +88,50 @@ public enum BucketCodec {
 }
 @Override
 public int encode(AcidOutputFormat.Options options) {
-  int statementId = options.getStatementId() >= 0 ? 
options.getStatementId() : 0;
+  final int statementId = options.getStatementId();
+  final int bucketId = options.getBucketId();
+
+  Preconditions.checkArgument(bucketId >= 0 && bucketId <= MAX_BUCKET_ID, 
"Bucket ID out of range: " + bucketId);
+  Preconditions.checkArgument(statementId >= -1 && statementId <= 
MAX_STATEMENT_ID,
+  "Statement ID out of range: " + statementId);
 
-  assert this.version >=0 && this.version <= MAX_VERSION
-: "Version out of range: " + version;
-  if(!(options.getBucketId() >= 0 && options.getBucketId() <= 
MAX_BUCKET_ID)) {
-throw new IllegalArgumentException("bucketId out of range: " + 
options.getBucketId());
-  }
-  if(!(statementId >= 0 && statementId <= MAX_STATEMENT_ID)) {
-throw new IllegalArgumentException("statementId out of range: " + 
statementId);
-  }
-  return this.version << (1 + NUM_BUCKET_ID_BITS + 4 + 
NUM_STATEMENT_ID_BITS) |
-options.getBucketId() << (4 + NUM_STATEMENT_ID_BITS) | statementId;
+  return this.version << (1 + NUM_BUCKET_ID_BITS + 4 + 
NUM_STATEMENT_ID_BITS)
+  | options.getBucketId() << (4 + NUM_STATEMENT_ID_BITS) | Math.max(0, 
statementId);
 }
   };
   private static final int TOP3BITS_MASK = 
0b1110_______;
   private static final int NUM_VERSION_BITS = 3;
   private static final int NUM_BUCKET_ID_BITS = 12;
   private static final int NUM_STATEMENT_ID_BITS = 12;
-  private static final int MAX_VERSION = (1 << NUM_VERSION_BITS) - 1;
+  public static final int MAX_VERSION = (1 << NUM_VERSION_BITS) - 1;
   public static final int MAX_BUCKET_ID = (1 << NUM_BUCKET_ID_BITS) - 1;
-  private static final int MAX_STATEMENT_ID = (1 << NUM_STATEMENT_ID_BITS) - 1;
+  public static final int MAX_STATEMENT_ID = (1 << NUM_STATEMENT_ID_BITS) - 1;
 
   public static BucketCodec determineVersion(int bucket) {
-assert 7 << 29 == BucketCodec.TOP3BITS_MASK;
-//look at top 3 bits and return appropriate enum
 try {
+  // look at top 3 bits and return appropriate enum
   return getCodec((BucketCodec.TOP3BITS_MASK & bucket) >>> 29);
-}
-catch(IllegalArgumentException ex) {
-  throw new IllegalArgumentException(ex.getMessage() + " Cannot decode 
version from " + bucket);
+} catch (IllegalArgumentException iae) {
+  throw new IllegalArgumentException("Cannot decode version from bucket 
number: " + Integer.toHexString(bucket),
+  iae);
 }
   }
+
   public static BucketCodec getCodec(int version) {
 switch (version) {
-  case 0:
-return BucketCodec.V0;
-  case 1:
-return BucketCodec.V1;
-  default:
-throw new IllegalArgumentException("Illegal 'bucket' format. Version=" 
+ version);
+case 0:
+  return BucketCodec.V0;
+case 1:
+  return BucketCodec.V1;
+default:
+  throw new IllegalArgumentException("Illegal 'bucket' format. Version=" + 
version);
 }
   }
+
   final int version;
+
   BucketCodec(int version) {
+Preconditions.checkPositionIndex(version, MAX_VERSION, "Version out of 
range: " + version);
 this.versio

[hive] branch master updated: HIVE-23239 : Remove snakeyaml lib from Hive distribution via transitive dependency (Roohi Syeda via Ashutosh Chauhan)

2020-04-18 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 9940cab  HIVE-23239 : Remove snakeyaml lib from Hive distribution via 
transitive dependency (Roohi Syeda via Ashutosh Chauhan)
9940cab is described below

commit 9940cab2805527c567f55760f9856b0f9bf98b67
Author: Roohi Syeda 
AuthorDate: Sat Apr 18 10:54:32 2020 -0700

HIVE-23239 : Remove snakeyaml lib from Hive distribution via transitive 
dependency (Roohi Syeda via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 kafka-handler/pom.xml | 8 
 llap-server/pom.xml   | 4 
 ql/pom.xml| 8 
 3 files changed, 20 insertions(+)

diff --git a/kafka-handler/pom.xml b/kafka-handler/pom.xml
index 4e58cb9..0ad3973 100644
--- a/kafka-handler/pom.xml
+++ b/kafka-handler/pom.xml
@@ -68,6 +68,10 @@
   org.slf4j
   slf4j-api
 
+
+  org.yaml
+  snakeyaml
+
   
 
 
@@ -134,6 +138,10 @@
   test
   
 
+  org.yaml
+  snakeyaml
+
+
   org.apache.avro
   avro
 
diff --git a/llap-server/pom.xml b/llap-server/pom.xml
index e03de9c..3d4e3da 100644
--- a/llap-server/pom.xml
+++ b/llap-server/pom.xml
@@ -129,6 +129,10 @@
   commons-logging
   commons-logging
 
+ 
+  org.yaml
+  snakeyaml
+
   
 
 
diff --git a/ql/pom.xml b/ql/pom.xml
index d1846c9..a0e77a1 100644
--- a/ql/pom.xml
+++ b/ql/pom.xml
@@ -386,6 +386,10 @@
   jackson-core
 
 
+  org.yaml
+  snakeyaml
+
+
   org.apache.calcite.avatica
   avatica-core
 
@@ -399,6 +403,10 @@
   org.apache.calcite.avatica
   avatica-core
 
+
+  org.yaml
+  snakeyaml
+
   
 
 



[hive] branch master updated (15ebf9e -> a3f3df0)

2020-04-18 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git.


from 15ebf9e  HIVE-23194 : Use Queue Instead of List for CollectOperator 
(David Mollitor via Ashutosh Chauhan)
 new bc225fc  HIVE-23153 : deregister from zookeeper is not properly worked 
on kerberized environment (Eugene Chung via Ashutosh Chauhan)
 new a3f3df0  HIVE-23196 : Reduce number of delete calls to NN during 
Context::clear (Attila Magyar via Ashutosh Chauhan)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 ql/src/java/org/apache/hadoop/hive/ql/Context.java | 23 --
 .../apache/hive/service/server/HiveServer2.java| 19 +-
 2 files changed, 35 insertions(+), 7 deletions(-)



[hive] 02/02: HIVE-23196 : Reduce number of delete calls to NN during Context::clear (Attila Magyar via Ashutosh Chauhan)

2020-04-18 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit a3f3df0f97d857482a3042feca940251c4557673
Author: Attila Magyar 
AuthorDate: Sat Apr 18 10:44:47 2020 -0700

HIVE-23196 : Reduce number of delete calls to NN during Context::clear 
(Attila Magyar via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 ql/src/java/org/apache/hadoop/hive/ql/Context.java | 23 --
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java 
b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
index d618ef9..9f59d4c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
@@ -23,6 +23,7 @@ import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.net.URI;
 import java.text.SimpleDateFormat;
+import java.util.Collection;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -671,6 +672,10 @@ public class Context {
 for (Map.Entry entry : fsScratchDirs.entrySet()) {
   try {
 Path p = entry.getValue();
+if (p.toUri().getPath().contains(stagingDir) && subDirOf(p, 
fsScratchDirs.values())  ) {
+  LOG.debug("Skip deleting stagingDir: " + p);
+  continue; // staging dir is deleted when deleting the scratch dir
+}
 if(resultCacheDir == null || 
!p.toUri().getPath().contains(resultCacheDir)) {
   // delete only the paths which aren't result cache dir path
   // because that will be taken care by removeResultCacheDir
@@ -687,6 +692,15 @@ public class Context {
 fsScratchDirs.clear();
   }
 
+  private boolean subDirOf(Path path, Collection parents) {
+for (Path each : parents) {
+  if (!path.equals(each) && FileUtils.isPathWithinSubtree(path, each)) {
+return true;
+  }
+}
+return false;
+  }
+
   /**
* Remove any created directories for CTEs.
*/
@@ -843,7 +857,7 @@ public class Context {
   subContext.clear();
 }
 // Then clear this context
-  if (resDir != null) {
+  if (resDir != null && !isInScratchDir(resDir)) { // resDir is inside the 
scratch dir, removeScratchDir will take care of removing it
 try {
   FileSystem fs = resDir.getFileSystem(conf);
   LOG.debug("Deleting result dir: {}", resDir);
@@ -853,7 +867,7 @@ public class Context {
 }
   }
 
-if (resFile != null) {
+if (resFile != null && !isInScratchDir(resFile.getParent())) { // resFile 
is inside the scratch dir, removeScratchDir will take care of removing it
   try {
 FileSystem fs = resFile.getFileSystem(conf);
 LOG.debug("Deleting result file: {}",  resFile);
@@ -871,6 +885,11 @@ public class Context {
 setNeedLockMgr(false);
   }
 
+  private boolean isInScratchDir(Path path) {
+return path.toUri().getPath().startsWith(localScratchDir)
+  || 
path.toUri().getPath().startsWith(nonLocalScratchPath.toUri().getPath());
+  }
+
   public DataInput getStream() {
 try {
   if (!initialized) {



[hive] 01/02: HIVE-23153 : deregister from zookeeper is not properly worked on kerberized environment (Eugene Chung via Ashutosh Chauhan)

2020-04-18 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit bc225fcb0cd9c21ba38a466f98488b74d6c83d13
Author: Eugene Chung 
AuthorDate: Fri Apr 17 21:16:48 2020 -0700

HIVE-23153 : deregister from zookeeper is not properly worked on kerberized 
environment (Eugene Chung via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../org/apache/hive/service/server/HiveServer2.java   | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java 
b/service/src/java/org/apache/hive/service/server/HiveServer2.java
index 42b7e59..d600f3a 100644
--- a/service/src/java/org/apache/hive/service/server/HiveServer2.java
+++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java
@@ -115,6 +115,7 @@ import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.HttpClients;
 import org.apache.http.util.EntityUtils;
 import org.apache.logging.log4j.util.Strings;
+import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.WatchedEvent;
 import org.apache.zookeeper.Watcher;
 import org.apache.zookeeper.ZooDefs.Ids;
@@ -133,10 +134,12 @@ import 
com.google.common.util.concurrent.ThreadFactoryBuilder;
  *
  */
 public class HiveServer2 extends CompositeService {
-  private static CountDownLatch deleteSignal;
   private static final Logger LOG = LoggerFactory.getLogger(HiveServer2.class);
   public static final String INSTANCE_URI_CONFIG = "hive.server2.instance.uri";
   private static final int SHUTDOWN_TIME = 60;
+  private static CountDownLatch zkDeleteSignal;
+  private static volatile KeeperException.Code zkDeleteResultCode;
+
   private CLIService cliService;
   private ThriftCLIService thriftCLIService;
   private CuratorFramework zKClientForPrivSync = null;
@@ -566,7 +569,7 @@ public class HiveServer2 extends CompositeService {
* @return
* @throws Exception
*/
-  private void setUpZooKeeperAuth(HiveConf hiveConf) throws Exception {
+  private static void setUpZooKeeperAuth(HiveConf hiveConf) throws Exception {
 if (ZookeeperUtils.isKerberosEnabled(hiveConf)) {
   String principal = 
hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL);
   if (principal.isEmpty()) {
@@ -1099,6 +1102,7 @@ public class HiveServer2 extends CompositeService {
*/
   static void deleteServerInstancesFromZooKeeper(String versionNumber) throws 
Exception {
 HiveConf hiveConf = new HiveConf();
+setUpZooKeeperAuth(hiveConf);
 CuratorFramework zooKeeperClient = 
hiveConf.getZKConfig().getNewZookeeperClient();
 zooKeeperClient.start();
 String rootNamespace = 
hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_ZOOKEEPER_NAMESPACE);
@@ -1109,7 +1113,7 @@ public class HiveServer2 extends CompositeService {
 // Now for each path that is for the given versionNumber, delete the znode 
from ZooKeeper
 for (int i = 0; i < znodePaths.size(); i++) {
   String znodePath = znodePaths.get(i);
-  deleteSignal = new CountDownLatch(1);
+  zkDeleteSignal = new CountDownLatch(1);
   if (znodePath.contains("version=" + versionNumber + ";")) {
 String fullZnodePath =
 ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + rootNamespace
@@ -1119,7 +1123,11 @@ public class HiveServer2 extends CompositeService {
 zooKeeperClient.delete().guaranteed().inBackground(new 
DeleteCallBack())
 .forPath(fullZnodePath);
 // Wait for the delete to complete
-deleteSignal.await();
+zkDeleteSignal.await();
+final KeeperException.Code rc = HiveServer2.zkDeleteResultCode;
+if (rc != KeeperException.Code.OK) {
+  throw KeeperException.create(rc);
+}
 // Get the updated path list
 znodePathsUpdated =
 zooKeeperClient.getChildren().forPath(
@@ -1138,7 +1146,8 @@ public class HiveServer2 extends CompositeService {
 public void processResult(CuratorFramework zooKeeperClient, CuratorEvent 
event)
 throws Exception {
   if (event.getType() == CuratorEventType.DELETE) {
-deleteSignal.countDown();
+zkDeleteResultCode = KeeperException.Code.get(event.getResultCode());
+zkDeleteSignal.countDown();
   }
 }
   }



[hive] branch master updated: HIVE-23194 : Use Queue Instead of List for CollectOperator (David Mollitor via Ashutosh Chauhan)

2020-04-17 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 15ebf9e  HIVE-23194 : Use Queue Instead of List for CollectOperator 
(David Mollitor via Ashutosh Chauhan)
15ebf9e is described below

commit 15ebf9e208f56b0e54ed513bff099473683dc6eb
Author: David Mollitor 
AuthorDate: Fri Apr 17 20:48:08 2020 -0700

HIVE-23194 : Use Queue Instead of List for CollectOperator (David Mollitor 
via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 ql/src/java/org/apache/hadoop/hive/ql/exec/CollectOperator.java | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CollectOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/CollectOperator.java
index 2ec9a4f..df69bf2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CollectOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CollectOperator.java
@@ -19,7 +19,8 @@
 package org.apache.hadoop.hive.ql.exec;
 
 import java.io.Serializable;
-import java.util.ArrayList;
+import java.util.ArrayDeque;
+import java.util.Queue;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.ql.CompilationOpContext;
@@ -37,7 +38,7 @@ public class CollectOperator extends Operator 
implements
 Serializable {
 
   private static final long serialVersionUID = 1L;
-  protected transient ArrayList rowList;
+  protected transient Queue rowList;
   protected transient ObjectInspector standardRowInspector;
   transient int maxSize;
 
@@ -53,7 +54,7 @@ public class CollectOperator extends Operator 
implements
   @Override
   protected void initializeOp(Configuration hconf) throws HiveException {
 super.initializeOp(hconf);
-rowList = new ArrayList();
+this.rowList = new ArrayDeque<>();
 maxSize = conf.getBufferSize().intValue();
   }
 
@@ -83,7 +84,7 @@ public class CollectOperator extends Operator 
implements
   result.o = null;
   result.oi = null;
 } else {
-  result.o = rowList.remove(0);
+  result.o = rowList.poll();
   result.oi = standardRowInspector;
 }
   }



[hive] branch master updated: HIVE-23233 : Using default operation logs location cause hive service session testing failed (RuiChen via Ashutosh Chauhan)

2020-04-17 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 465f698  HIVE-23233 : Using default operation logs location cause hive 
service session testing failed (RuiChen via Ashutosh Chauhan)
465f698 is described below

commit 465f69809aebecd1e79f03d00678d26beb9213ec
Author: Ashutosh Chauhan 
AuthorDate: Fri Apr 17 20:34:55 2020 -0700

HIVE-23233 : Using default operation logs location cause hive service 
session testing failed (RuiChen via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../service/cli/session/TestSessionCleanup.java| 27 ++
 .../cli/session/TestSessionManagerMetrics.java |  4 
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git 
a/service/src/test/org/apache/hive/service/cli/session/TestSessionCleanup.java 
b/service/src/test/org/apache/hive/service/cli/session/TestSessionCleanup.java
index 51ce2c2..c19d97a 100644
--- 
a/service/src/test/org/apache/hive/service/cli/session/TestSessionCleanup.java
+++ 
b/service/src/test/org/apache/hive/service/cli/session/TestSessionCleanup.java
@@ -23,12 +23,12 @@ import java.io.FilenameFilter;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
+import java.util.Random;
 import java.util.Set;
-
+import java.util.concurrent.atomic.AtomicInteger;
 
 
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hive.service.cli.CLIService;
 import org.apache.hive.service.cli.OperationHandle;
 import org.apache.hive.service.cli.SessionHandle;
@@ -41,6 +41,11 @@ import org.junit.Test;
  * TestSessionCleanup.
  */
 public class TestSessionCleanup {
+
+  private static final AtomicInteger salt = new AtomicInteger(new 
Random().nextInt());
+  private final String TEST_DATA_DIR = System.getProperty("java.io.tmpdir") + 
File.separator +
+  TestSessionCleanup.class.getCanonicalName() + "-" + 
System.currentTimeMillis() + "_" + salt.getAndIncrement();
+
   // Create subclass of EmbeddedThriftBinaryCLIService, just so we can get an 
accessor to the CLIService.
   // Needed for access to the OperationManager.
   private class MyEmbeddedThriftBinaryCLIService extends 
EmbeddedThriftBinaryCLIService {
@@ -61,10 +66,14 @@ public class TestSessionCleanup {
 hiveConf
 .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
 
"org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
+//NOTES: Apply a random tmp directory to avoid default location 
conflicting with other tests
+hiveConf
+.setVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION,
+TEST_DATA_DIR + File.separator + "operation_logs");
 service.init(hiveConf);
 ThriftCLIServiceClient client = new ThriftCLIServiceClient(service);
 
-Set existingPipeoutFiles = new 
HashSet(Arrays.asList(getPipeoutFiles()));
+Set existingPipeoutFiles = new 
HashSet(Arrays.asList(getPipeoutFiles(hiveConf)));
 SessionHandle sessionHandle = client.openSession("user1", "foobar",
   Collections.emptyMap());
 OperationHandle opHandle1 = client.executeStatement(sessionHandle, "set 
a=b", null);
@@ -74,15 +83,15 @@ public class TestSessionCleanup {
 String queryId2 = 
service.getCliService().getQueryId(opHandle2.toTOperationHandle());
 Assert.assertNotNull(queryId2);
 File operationLogRootDir = new File(
-new 
HiveConf().getVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION));
-Assert.assertNotEquals(operationLogRootDir.list().length, 0);
+
hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION));
+Assert.assertNotEquals(0, operationLogRootDir.list().length);
 client.closeSession(sessionHandle);
 
 // Check if session files are removed
-Assert.assertEquals(operationLogRootDir.list().length, 0);
+Assert.assertEquals(0, operationLogRootDir.list().length);
 
 // Check if the pipeout files are removed
-Set finalPipeoutFiles = new 
HashSet(Arrays.asList(getPipeoutFiles()));
+Set finalPipeoutFiles = new 
HashSet(Arrays.asList(getPipeoutFiles(hiveConf)));
 finalPipeoutFiles.removeAll(existingPipeoutFiles);
 Assert.assertTrue(finalPipeoutFiles.isEmpty());
 
@@ -94,9 +103,9 @@ public class TestSessionCleanup {
 
Assert.assertNull(service.getCliService().getSessionManager().getOperationManager().getOperationByQueryId(queryId1));
   }
 
-  private String[] getPipeoutFiles() {
+  private String[] getPipeoutFiles(HiveConf hiveConf) {
 File localScratchDir = new File(
-new HiveConf().getVar(HiveConf.ConfVars.LOCALSCRATCHDIR));
+hiveConf.getVar(HiveConf.ConfVars.LOCALSC

[hive] branch master updated: HIVE-23004 : Support Decimal64 operations across multiple vertices (Ramesh Kumar via Ashutosh Chauhan)

2020-04-17 Thread hashutosh
This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 11d8298  HIVE-23004 : Support Decimal64 operations across multiple 
vertices (Ramesh Kumar via Ashutosh Chauhan)
11d8298 is described below

commit 11d829828e401dd941c8010ecf5f87a8248852fb
Author: RAMESH KUMAR THANGARAJAN 
AuthorDate: Fri Apr 17 20:20:12 2020 -0700

HIVE-23004 : Support Decimal64 operations across multiple vertices (Ramesh 
Kumar via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan 
---
 .../test/resources/testconfiguration.properties|   1 +
 .../ql/exec/spark/SparkReduceRecordHandler.java|  42 +--
 .../hive/ql/exec/tez/ReduceRecordSource.java   |  34 ++-
 .../hive/ql/optimizer/physical/Vectorizer.java |  18 +-
 .../clientpositive/vector_decimal64_multi_vertex.q |  46 +++
 .../results/clientpositive/llap/keep_uniform.q.out |  10 +-
 .../llap/mapjoin_decimal_vectorized.q.out  |   8 +-
 .../clientpositive/llap/vector_case_when_2.q.out   |   8 +-
 .../clientpositive/llap/vector_cast_constant.q.out |   6 +-
 .../llap/vector_decimal64_case_when_nvl.q.out  |  12 +-
 .../llap/vector_decimal64_case_when_nvl_cbo.q.out  |   8 +-
 .../llap/vector_decimal64_multi_vertex.q.out   | 328 +
 .../llap/vector_decimal_aggregate.q.out|   8 +-
 .../clientpositive/llap/vector_decimal_join.q.out  |   4 +-
 .../clientpositive/llap/vector_decimal_round.q.out |  12 +-
 .../llap/vector_decimal_trailing.q.out |   2 +-
 .../clientpositive/llap/vector_decimal_udf.q.out   |   8 +-
 .../llap/vector_outer_reference_windowed.q.out |  40 +--
 .../llap/vector_windowing_rank.q.out   |   4 +-
 .../clientpositive/llap/vectorization_17.q.out |   2 +-
 .../vectorized_dynamic_semijoin_reduction2.q.out   |   4 +-
 .../llap/vectorized_parquet_types.q.out|   2 +-
 .../spark/vector_cast_constant.q.out   |   6 +-
 .../spark/vector_decimal_aggregate.q.out   |   8 +-
 .../clientpositive/spark/vectorization_17.q.out|   2 +-
 .../fast/BinarySortableDeserializeRead.java|  11 +-
 .../lazy/fast/LazySimpleDeserializeRead.java   |   3 +-
 .../lazybinary/fast/LazyBinaryDeserializeRead.java |  16 +-
 28 files changed, 540 insertions(+), 113 deletions(-)

diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 48f90fe..e04f7d5 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -890,6 +890,7 @@ minillaplocal.query.files=\
   vector_decimal_udf.q,\
   vector_decimal64_case_when_nvl.q,\
   vector_decimal64_case_when_nvl_cbo.q,\
+  vector_decimal64_multi_vertex.q,\
   vector_full_outer_join.q,\
   vector_fullouter_mapjoin_1_fast.q,\
   vector_fullouter_mapjoin_1_optimized.q,\
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java
index 07cb5cb..86f3aaa 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java
@@ -24,6 +24,7 @@ import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
 
+import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.ql.exec.MapredContext;
@@ -32,11 +33,6 @@ import org.apache.hadoop.hive.ql.exec.OperatorUtils;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats;
 import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.log.PerfLogger;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
@@ -96,6 +92,7 @@ public class SparkReduceRecordHandler extends 
SparkRecordHandler {
   private VectorDeserializeRow 
valueLazyBinaryDeserializeToRow;
 
   private VectorizedRowBatch batch;
+  private VectorizedRowBatchCtx batchContext;
   private long batchBytes = 0;
   private boolean handleGroupKey = true;  // For now.
 
@@ -131,6 +128,7 @@ public class SparkReduceRecordHandler extends 
SparkRecordHandler {
 reducer = gWork.getReducer();
 vectorized = gWork.getVectorMode();
 reducer.setParentOperators(null); // clear out any parents as reducer

  1   2   3   4   5   6   7   8   9   10   >