[hive] branch master updated: HIVE-24270: Move scratchdir cleanup to background
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new f7e9d9b HIVE-24270: Move scratchdir cleanup to background f7e9d9b is described below commit f7e9d9b14e9f1fb266aefa9cad73d509d9d614af Author: Mustafa Iman AuthorDate: Tue Oct 13 14:14:10 2020 -0700 HIVE-24270: Move scratchdir cleanup to background Signed-off-by: Ashutosh Chauhan --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 5 + ql/src/java/org/apache/hadoop/hive/ql/Context.java | 13 +- .../hadoop/hive/ql/cleanup/CleanupService.java | 38 ++ .../hive/ql/cleanup/EventualCleanupService.java| 145 .../hadoop/hive/ql/cleanup/SyncCleanupService.java | 68 + .../hadoop/hive/ql/session/SessionState.java | 17 ++- .../hadoop/hive/ql/cleanup/TestCleanupService.java | 152 + .../hive/service/cli/session/HiveSessionImpl.java | 7 +- .../hive/service/cli/session/SessionManager.java | 18 +++ 9 files changed, 451 insertions(+), 12 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index edaa75b..45a44e9 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -5238,6 +5238,11 @@ public class HiveConf extends Configuration { HIVE_SCHEDULED_QUERIES_MAX_EXECUTORS("hive.scheduled.queries.max.executors", 4, new RangeValidator(1, null), "Maximal number of scheduled query executors to allow."), + HIVE_ASYNC_CLEANUP_SERVICE_THREAD_COUNT("hive.async.cleanup.service.thread.count", 10, new RangeValidator(0, null), +"Number of threads that run some eventual cleanup operations after queries/sessions close. 0 means cleanup is sync."), + HIVE_ASYNC_CLEANUP_SERVICE_QUEUE_SIZE("hive.async.cleanup.service.queue.size", 1, new RangeValidator(10, Integer.MAX_VALUE), +"Size of the async cleanup queue. If cleanup queue is full, cleanup operations become synchronous. " + +"Applicable only when number of async cleanup is turned on."), HIVE_QUERY_RESULTS_CACHE_ENABLED("hive.query.results.cache.enabled", true, "If the query results cache is enabled. This will keep results of previously executed queries " + "to be reused if the same query is executed again."), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index a41c5c8..e4141fe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -673,22 +673,21 @@ public class Context { if(this.fsResultCacheDirs != null) { resultCacheDir = this.fsResultCacheDirs.toUri().getPath(); } -for (Map.Entry entry : fsScratchDirs.entrySet()) { +SessionState sessionState = SessionState.get(); +for (Path p: fsScratchDirs.values()) { try { -Path p = entry.getValue(); if (p.toUri().getPath().contains(stagingDir) && subDirOf(p, fsScratchDirs.values()) ) { LOG.debug("Skip deleting stagingDir: " + p); FileSystem fs = p.getFileSystem(conf); fs.cancelDeleteOnExit(p); continue; // staging dir is deleted when deleting the scratch dir } -if(resultCacheDir == null || !p.toUri().getPath().contains(resultCacheDir)) { +if (resultCacheDir == null || !p.toUri().getPath().contains(resultCacheDir)) { // delete only the paths which aren't result cache dir path // because that will be taken care by removeResultCacheDir -FileSystem fs = p.getFileSystem(conf); -LOG.debug("Deleting scratch dir: {}", p); -fs.delete(p, true); -fs.cancelDeleteOnExit(p); + FileSystem fs = p.getFileSystem(conf); + LOG.info("Deleting scratch dir: {}", p); + sessionState.getCleanupService().deleteRecursive(p, fs); } } catch (Exception e) { LOG.warn("Error Removing Scratch: " diff --git a/ql/src/java/org/apache/hadoop/hive/ql/cleanup/CleanupService.java b/ql/src/java/org/apache/hadoop/hive/ql/cleanup/CleanupService.java new file mode 100644 index 000..919298e --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/cleanup/CleanupService.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache
[hive] branch master updated: HIVE-24209 : Incorrect search argument conversion for NOT BETWEEN operation when vectorization is enabled (Ganesha Shreedhara via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 041b9c2 HIVE-24209 : Incorrect search argument conversion for NOT BETWEEN operation when vectorization is enabled (Ganesha Shreedhara via Ashutosh Chauhan) 041b9c2 is described below commit 041b9c25c3694c3fa09b132705eecccab96c6385 Author: Ganesha Shreedhara AuthorDate: Mon Oct 5 21:39:18 2020 -0700 HIVE-24209 : Incorrect search argument conversion for NOT BETWEEN operation when vectorization is enabled (Ganesha Shreedhara via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../hive/ql/io/sarg/ConvertAstToSearchArg.java | 9 +- .../clientpositive/vector_between_columns.q| 43 + .../llap/vector_between_columns.q.out | 211 + 3 files changed, 262 insertions(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java index 764c401..fd24eaa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java @@ -471,7 +471,14 @@ public class ConvertAstToSearchArg { } else if (op == GenericUDFIn.class) { createLeaf(PredicateLeaf.Operator.IN, expr, 0); } else if (op == GenericUDFBetween.class) { - createLeaf(PredicateLeaf.Operator.BETWEEN, expr, 1); + // Start with NOT operator when the first child of GenericUDFBetween operator is set to TRUE + if (Boolean.TRUE.equals(((ExprNodeConstantDesc) expression.getChildren().get(0)).getValue())) { +builder.startNot(); +createLeaf(PredicateLeaf.Operator.BETWEEN, expr, 1); +builder.end(); + } else { +createLeaf(PredicateLeaf.Operator.BETWEEN, expr, 1); + } } else if (op == GenericUDFOPNull.class) { createLeaf(PredicateLeaf.Operator.IS_NULL, expr, 0); } else if (op == GenericUDFOPNotNull.class) { diff --git a/ql/src/test/queries/clientpositive/vector_between_columns.q b/ql/src/test/queries/clientpositive/vector_between_columns.q index a8e9ca4..63142f3 100644 --- a/ql/src/test/queries/clientpositive/vector_between_columns.q +++ b/ql/src/test/queries/clientpositive/vector_between_columns.q @@ -5,6 +5,7 @@ SET hive.auto.convert.join=true; set hive.fetch.task.conversion=none; set hive.mapred.mode=nonstrict; set hive.join.inner.residual=false; +set hive.optimize.index.filter=true; -- SORT_QUERY_RESULTS -- @@ -30,6 +31,43 @@ create table TINT stored as orc AS SELECT * FROM TINT_txt; -- Add a single NULL row that will come from ORC as isRepeated. insert into TINT values (NULL, NULL); +CREATE EXTERNAL TABLE test_orc_ppd( + data_release bigint, + data_owner_ver_id bigint, + data_owner_dim_id bigint, + data_source_ver_id bigint, + data_source_dim_id bigint, + data_client_ver_id bigint, + data_client_dim_id bigint, + data_client_sub_ver_id bigint, + data_client_sub_dim_id bigint, + quarter_dim_id bigint, + market_dim_id bigint, + daypart_dim_id bigint, + demo_dim_id bigint, + station_dim_id bigint, + medium_dim_id bigint, + ad_length int, + exclude int, + population int, + client_cpp double, + client_cpm double, + low_cpp double, + mid_cpp double, + high_cpp double, + low_cpm double, + mid_cpm double, + high_cpm double, + low_cpp_index double, + mid_cpp_index double, + high_cpp_index double, + low_cpm_index double, + mid_cpm_index double, + high_cpm_index double) + STORED AS ORC; +LOAD DATA LOCAL INPATH '../../data/files/orc_test_ppd' +OVERWRITE INTO TABLE test_orc_ppd; + explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint; @@ -40,3 +78,8 @@ explain vectorization expression select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint; select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint; + +explain vectorization expression +select data_release, count(*) from test_orc_ppd where NOT (data_release BETWEEN 20191201 AND 20200101) group by data_release; + +select data_release, count(*) from test_orc_ppd where NOT (data_release BETWEEN 20191201 AND 20200101) group by data_release; diff --git a/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out b/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out index 546dc45..cb6d521 100644 --- a/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_between_co
[hive] branch master updated: HIVE-24224: Fix skipping header/footer for Hive on Tez on compressed file ( Panos G via Ashutosh Chauhan)s
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 6946c81 HIVE-24224: Fix skipping header/footer for Hive on Tez on compressed file ( Panos G via Ashutosh Chauhan)s 6946c81 is described below commit 6946c816694f2706098caed95e3219520fd57c5d Author: Panos Garefalakis AuthorDate: Fri Oct 2 12:29:21 2020 +0100 HIVE-24224: Fix skipping header/footer for Hive on Tez on compressed file ( Panos G via Ashutosh Chauhan)s Signed-off-by: Ashutosh Chauhan --- .../test/resources/testconfiguration.properties| 2 +- .../hive/llap/io/encoded/PassThruOffsetReader.java | 9 +- .../apache/hadoop/hive/ql/io/HiveInputFormat.java | 10 +- .../hadoop/hive/ql/io/RecordReaderWrapper.java | 205 ++ .../compressed_skip_header_footer_aggr.q | 74 - .../llap/compressed_skip_header_footer_aggr.q.out | 290 +-- .../tez/compressed_skip_header_footer_aggr.q.out | 308 + 7 files changed, 854 insertions(+), 44 deletions(-) diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index a29a4a3..3680dc7 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -6,6 +6,7 @@ minimr.query.files=\ # Queries ran by both MiniLlapLocal and MiniTez minitez.query.files.shared=\ + compressed_skip_header_footer_aggr.q,\ hybridgrace_hashjoin_1.q,\ hybridgrace_hashjoin_2.q @@ -43,7 +44,6 @@ minillap.query.files=\ binary_output_format.q,\ bucket5.q,\ bucket6.q,\ - compressed_skip_header_footer_aggr.q,\ create_genericudaf.q,\ create_udaf.q,\ create_view.q,\ diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/PassThruOffsetReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/PassThruOffsetReader.java index cab13ee..56faace 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/PassThruOffsetReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/PassThruOffsetReader.java @@ -27,7 +27,7 @@ import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; -@SuppressWarnings("rawtypes") class PassThruOffsetReader implements ReaderWithOffsets { +class PassThruOffsetReader implements ReaderWithOffsets { protected final RecordReader sourceReader; protected final Object key; protected final Writable value; @@ -58,7 +58,7 @@ import org.apache.hadoop.mapred.RecordReader; */ if (!initialized) { // Skip header lines. -opNotEOF = Utilities.skipHeader(sourceReader, skipFooterCnt, key, value); +opNotEOF = Utilities.skipHeader(sourceReader, skipHeaderCnt, key, value); // Initialize footer buffer. if (opNotEOF && skipFooterCnt > 0) { @@ -87,10 +87,9 @@ import org.apache.hadoop.mapred.RecordReader; if (opNotEOF) { // File reached the end return true; - } else { -// Done reading -return false; } + // Done reading + return false; } catch (Exception e) { throw new IOException(e); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index f564ed7..f5c25d6 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -401,7 +401,6 @@ public class HiveInputFormat public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { HiveInputSplit hsplit = (HiveInputSplit) split; -InputSplit inputSplit = hsplit.getInputSplit(); String inputFormatClassName = null; Class inputFormatClass = null; try { @@ -444,7 +443,8 @@ public class HiveInputFormat } RecordReader innerReader = null; try { - innerReader = inputFormat.getRecordReader(inputSplit, job, reporter); + // Handle the special header/footer skipping cases here. + innerReader = RecordReaderWrapper.create(inputFormat, hsplit, part.getTableDesc(), job, reporter); } catch (Exception e) { innerReader = HiveIOExceptionHandlerUtil .handleRecordReaderCreationException(e, job); @@ -531,12 +531,10 @@ public class HiveInputFormat } conf.setInputFormat(inputFormat.getClass()); -int headerCount = 0; -int footerCount = 0; boolean isCompressedFormat = isCompressedInput(finalDirs); if (table != null) { - headerCount = Utilities.getHeaderCount(table); - footerCount = Utilities.getFooterCount(table, conf); +
[hive] branch master updated: HIVE-24205: Optimise CuckooSetBytes (Mustafa Iman via Rajesh Balamohan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 56646de HIVE-24205: Optimise CuckooSetBytes (Mustafa Iman via Rajesh Balamohan) 56646de is described below commit 56646de06047320fa95387318be34ee80a123788 Author: Mustafa Iman AuthorDate: Fri Oct 2 16:35:44 2020 -0700 HIVE-24205: Optimise CuckooSetBytes (Mustafa Iman via Rajesh Balamohan) Signed-off-by: Ashutosh Chauhan --- .../hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java | 7 +++ 1 file changed, 7 insertions(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java index f9a86ae..10e8332 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java @@ -41,6 +41,8 @@ public class CuckooSetBytes { private int rehashCount = 0; private static final long INT_MASK = 0xL; private static final long BYTE_MASK = 0x00ffL; + private int maxLen; + private int minLen = Integer.MAX_VALUE; /** * Allocate a new set to hold expectedSize values. Re-allocation to expand @@ -76,6 +78,9 @@ public class CuckooSetBytes { * and ending at start+len is present in the set. */ public boolean lookup(byte[] b, int start, int len) { +if (len < minLen || len > maxLen) { + return false; +} return entryEqual(t1, h1(b, start, len), b, start, len) || entryEqual(t2, h2(b, start, len), b, start, len); @@ -90,6 +95,8 @@ public class CuckooSetBytes { if (lookup(x, 0, x.length)) { return; } +minLen = Math.min(minLen, x.length); +maxLen = Math.max(maxLen, x.length); // Try to insert up to n times. Rehash if that fails. for(int i = 0; i != n; i++) {
[hive] branch master updated: HIVE-24011: Flaky test AsyncResponseHandlerTest ( Mustafa Iman via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 5d9a5cf HIVE-24011: Flaky test AsyncResponseHandlerTest ( Mustafa Iman via Ashutosh Chauhan) 5d9a5cf is described below commit 5d9a5cf5a36c1d704d2671eb57547ea50249f28b Author: Mustafa Iman AuthorDate: Fri Aug 7 13:56:49 2020 -0700 HIVE-24011: Flaky test AsyncResponseHandlerTest ( Mustafa Iman via Ashutosh Chauhan) Timeout is too low. Also retry logic could cause "java.lang.IllegalArgumentException: timeout value is negative" Signed-off-by: Ashutosh Chauhan --- .../test/org/apache/hadoop/hive/llap/AsyncResponseHandlerTest.java | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llap-common/src/test/org/apache/hadoop/hive/llap/AsyncResponseHandlerTest.java b/llap-common/src/test/org/apache/hadoop/hive/llap/AsyncResponseHandlerTest.java index 3d7bd90..d5d24cf 100644 --- a/llap-common/src/test/org/apache/hadoop/hive/llap/AsyncResponseHandlerTest.java +++ b/llap-common/src/test/org/apache/hadoop/hive/llap/AsyncResponseHandlerTest.java @@ -194,7 +194,7 @@ public class AsyncResponseHandlerTest { } private void assertTrueEventually(AssertTask assertTask) throws InterruptedException { -assertTrueEventually(assertTask, 1); +assertTrueEventually(assertTask, 10); } private void assertTrueEventually(AssertTask assertTask, int timeoutMillis) throws InterruptedException { @@ -207,9 +207,7 @@ public class AsyncResponseHandlerTest { return; } catch (AssertionError e) { assertionError = e; -long millisUntilTimeout = endTime - System.currentTimeMillis(); -sleep(millisUntilTimeout < 50 ? millisUntilTimeout : 50 ); -continue; +sleep(50); } } throw assertionError;
[hive] branch master updated: HIVE-22934 Hive server interactive log counters to error stream ( Ramesh Kumar via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 4457c3e HIVE-22934 Hive server interactive log counters to error stream ( Ramesh Kumar via Ashutosh Chauhan) 4457c3e is described below commit 4457c3ec9360650be021ea84ed1d5d0f007d8308 Author: Ramesh Thangarajan AuthorDate: Wed Jul 1 12:26:49 2020 -0700 HIVE-22934 Hive server interactive log counters to error stream ( Ramesh Kumar via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../hadoop/hive/cli/TestCliDriverMethods.java | 1 + .../java/org/apache/hadoop/hive/ql/QTestUtil.java | 4 +++- .../hadoop/hive/ql/session/SessionState.java | 28 +- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/cli/src/test/org/apache/hadoop/hive/cli/TestCliDriverMethods.java b/cli/src/test/org/apache/hadoop/hive/cli/TestCliDriverMethods.java index 5f21900..37448fe 100644 --- a/cli/src/test/org/apache/hadoop/hive/cli/TestCliDriverMethods.java +++ b/cli/src/test/org/apache/hadoop/hive/cli/TestCliDriverMethods.java @@ -331,6 +331,7 @@ public class TestCliDriverMethods { sessionState.err = new SessionStream(data); sessionState.out = new SessionStream(System.out); +sessionState.setIsQtestLogging(true); try { CliSessionState.start(sessionState); CliDriver cliDriver = new CliDriver(); diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java index 6f42bbe..20ee2fe 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java @@ -623,6 +623,7 @@ public class QTestUtil { } File outf = new File(logDir, stdoutName); setSessionOutputs(fileName, ss, outf); +ss.setIsQtestLogging(true); if (fileName.equals("init_file.q")) { ss.initFiles.add(AbstractCliConfig.HIVE_ROOT + "/data/scripts/test_init_file.sql"); @@ -638,13 +639,14 @@ public class QTestUtil { ss.out.flush(); } if (ss.err != null) { - ss.out.flush(); + ss.err.flush(); } qTestResultProcessor.setOutputs(ss, fo, fileName); ss.err = new CachingPrintStream(fo, true, "UTF-8"); ss.setIsSilent(true); +ss.setIsQtestLogging(true); } public CliSessionState startSessionState(boolean canReuseSession) throws IOException { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index 0bf6735..5d42efb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -161,6 +161,11 @@ public class SessionState implements ISessionAuthState{ protected boolean isSilent; /** + * silent mode. + */ + protected boolean isQtestLogging; + + /** * verbose mode */ protected boolean isVerbose; @@ -372,6 +377,10 @@ public class SessionState implements ISessionAuthState{ } } + public boolean getIsQtestLogging() { +return isQtestLogging; + } + public boolean isHiveServerQuery() { return this.isHiveServerQuery; } @@ -383,6 +392,10 @@ public class SessionState implements ISessionAuthState{ this.isSilent = isSilent; } + public void setIsQtestLogging(boolean isQtestLogging) { +this.isQtestLogging = isQtestLogging; + } + public ReentrantLock getCompileLock() { return compileLock; } @@ -1179,6 +1192,17 @@ public class SessionState implements ISessionAuthState{ return (ss != null) ? ss.getIsSilent() : isSilent; } + +/** + * Is the logging to the info stream is enabled, or not. + * @return True if the logging is disabled to the HiveServer2 or HiveCli info stream + */ +public boolean getIsQtestLogging() { + SessionState ss = SessionState.get(); + // use the session or the one supplied in constructor + return (ss != null) ? ss.getIsQtestLogging() : false; +} + /** * Logs into the log file. * BeeLine uses the operation log file to show the logs to the user, so depending on the @@ -1270,7 +1294,9 @@ public class SessionState implements ISessionAuthState{ * @param detail Extra detail to log which will be not printed if null */ public void printError(String error, String detail) { - getErrStream().println(error); + if(!getIsSilent() || getIsQtestLogging()) { +getErrStream().println(error); + } LOG.error(error + StringUtils.defaultString(detail)); } }
[hive] branch master updated: HIVE-23975: Reuse evicted keys from aggregation buffers (Mustafa Iman via Rajesh Balamohan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new a88871e HIVE-23975: Reuse evicted keys from aggregation buffers (Mustafa Iman via Rajesh Balamohan) a88871e is described below commit a88871e58ecc4d57629e91454d129d8669c06f41 Author: Mustafa Iman AuthorDate: Sun Aug 2 21:25:20 2020 -0700 HIVE-23975: Reuse evicted keys from aggregation buffers (Mustafa Iman via Rajesh Balamohan) Signed-off-by: Ashutosh Chauhan --- .../hive/ql/exec/vector/VectorGroupByOperator.java | 26 +++- .../wrapper/VectorHashKeyWrapperGeneral.java | 141 +++-- 2 files changed, 158 insertions(+), 9 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index b6cd405..f6b38d6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterF import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase; import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBatch; +import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperGeneral; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; @@ -297,6 +298,8 @@ public class VectorGroupByOperator extends Operator */ final class ProcessingModeHashAggregate extends ProcessingModeBase { +private Queue reusableKeyWrapperBuffer; + /** * The global key-aggregation hash map. */ @@ -405,6 +408,10 @@ public class VectorGroupByOperator extends Operator } computeMemoryLimits(); LOG.debug("using hash aggregation processing mode"); + + if (keyWrappersBatch.getVectorHashKeyWrappers()[0] instanceof VectorHashKeyWrapperGeneral) { +reusableKeyWrapperBuffer = new ArrayDeque<>(VectorizedRowBatch.DEFAULT_SIZE); + } } @VisibleForTesting @@ -488,6 +495,9 @@ public class VectorGroupByOperator extends Operator @Override public void close(boolean aborted) throws HiveException { reusableAggregationBufferRows.clear(); + if (reusableKeyWrapperBuffer != null) { +reusableKeyWrapperBuffer.clear(); + } if (!aborted) { flush(true); } @@ -536,7 +546,8 @@ public class VectorGroupByOperator extends Operator // is very important to clone the keywrapper, the one we have from our // keyWrappersBatch is going to be reset/reused on next batch. aggregationBuffer = allocateAggregationBuffer(); - mapKeysAggregationBuffers.put(kw.copyKey(), aggregationBuffer); + KeyWrapper copyKeyWrapper = cloneKeyWrapper(kw); + mapKeysAggregationBuffers.put(copyKeyWrapper, aggregationBuffer); numEntriesHashTable++; numEntriesSinceCheck++; } else { @@ -548,6 +559,16 @@ public class VectorGroupByOperator extends Operator } } +private KeyWrapper cloneKeyWrapper(VectorHashKeyWrapperBase from) { + if (reusableKeyWrapperBuffer != null && reusableKeyWrapperBuffer.size() > 0) { +KeyWrapper keyWrapper = reusableKeyWrapperBuffer.poll(); +from.copyKey(keyWrapper); +return keyWrapper; + } else { +return from.copyKey(); + } +} + /** * Computes the memory limits for hash table flush (spill). */ @@ -637,6 +658,9 @@ public class VectorGroupByOperator extends Operator totalAccessCount -= bufferRow.getAccessCount(); reusableAggregationBufferRows.add(bufferRow); bufferRow.resetAccessCount(); + if (reusableKeyWrapperBuffer != null) { +reusableKeyWrapperBuffer.add(pair.getKey()); + } iter.remove(); --numEntriesHashTable; if (++entriesFlushed >= entriesToFlush) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperGeneral.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperGeneral.java index c605ce3..929bb0a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperGeneral.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperGeneral.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.wrapper; +import org.apache.hadoop.hi
[hive] branch master updated: HIVE-23952: Reuse VectorAggregationBuffer to reduce GC pressure in VectorGroupByOperator ( Mustafa Iman via Rajesh Balamohan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 1ad68c0 HIVE-23952: Reuse VectorAggregationBuffer to reduce GC pressure in VectorGroupByOperator ( Mustafa Iman via Rajesh Balamohan) 1ad68c0 is described below commit 1ad68c0cd2e039ae15fa222955d078d6bd3580b1 Author: Mustafa Iman AuthorDate: Wed Jul 29 21:49:42 2020 -0700 HIVE-23952: Reuse VectorAggregationBuffer to reduce GC pressure in VectorGroupByOperator ( Mustafa Iman via Rajesh Balamohan) Signed-off-by: Ashutosh Chauhan --- .../hive/ql/exec/vector/VectorGroupByOperator.java | 46 ++--- .../ql/exec/vector/TestVectorGroupByOperator.java | 75 ++ 2 files changed, 111 insertions(+), 10 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 02864d9..b6cd405 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -22,12 +22,14 @@ import java.lang.management.ManagementFactory; import java.lang.management.MemoryMXBean; import java.lang.ref.SoftReference; import java.lang.reflect.Constructor; +import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Queue; import org.apache.commons.lang3.ArrayUtils; import org.apache.hadoop.conf.Configuration; @@ -107,7 +109,8 @@ public class VectorGroupByOperator extends Operator // transient. //--- - private transient VectorAggregateExpression[] aggregators; + @VisibleForTesting + transient VectorAggregateExpression[] aggregators; /** * The aggregation buffers to use for the current batch. */ @@ -159,10 +162,10 @@ public class VectorGroupByOperator extends Operator * Interface for processing mode: global, hash, unsorted streaming, or group batch */ private static interface IProcessingMode { -public void initialize(Configuration hconf) throws HiveException; -public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException; -public void processBatch(VectorizedRowBatch batch) throws HiveException; -public void close(boolean aborted) throws HiveException; +void initialize(Configuration hconf) throws HiveException; +void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException; +void processBatch(VectorizedRowBatch batch) throws HiveException; +void close(boolean aborted) throws HiveException; } /** @@ -300,6 +303,9 @@ public class VectorGroupByOperator extends Operator @VisibleForTesting Map mapKeysAggregationBuffers; +private Queue reusableAggregationBufferRows = +new ArrayDeque<>(VectorizedRowBatch.DEFAULT_SIZE); + /** * Total per hashtable entry fixed memory (does not depend on key/agg values). */ @@ -465,7 +471,23 @@ public class VectorGroupByOperator extends Operator } @Override +protected VectorAggregationBufferRow allocateAggregationBuffer() throws HiveException { + VectorAggregationBufferRow bufferSet; + if (reusableAggregationBufferRows.size() > 0) { +bufferSet = reusableAggregationBufferRows.remove(); +bufferSet.setVersionAndIndex(0, 0); +for (int i = 0; i < aggregators.length; i++) { + aggregators[i].reset(bufferSet.getAggregationBuffer(i)); +} +return bufferSet; + } else { +return super.allocateAggregationBuffer(); + } +} + +@Override public void close(boolean aborted) throws HiveException { + reusableAggregationBufferRows.clear(); if (!aborted) { flush(true); } @@ -598,19 +620,23 @@ public class VectorGroupByOperator extends Operator mapKeysAggregationBuffers.entrySet().iterator(); while(iter.hasNext()) { Map.Entry pair = iter.next(); +KeyWrapper keyWrapper = pair.getKey(); +VectorAggregationBufferRow bufferRow = pair.getValue(); if (!all && avgAccess >= 1) { - if (pair.getValue().getAccessCount() > avgAccess) { + if (bufferRow.getAccessCount() > avgAccess) { // resetting to give chance for other entries -totalAccessCount -= pair.getValue().getAccessCount(); -pair.getValue().resetAccessCount(); +totalAccessCount -= bufferRow.getAccessCount(); +bufferRow.resetAccessCount(); continue; }
[hive] branch master updated: HIVE-23746 : Send task attempts async from AM to daemons (Mustafa Iman via Rajesh Balamohan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 5c108dc HIVE-23746 : Send task attempts async from AM to daemons (Mustafa Iman via Rajesh Balamohan) 5c108dc is described below commit 5c108dc2c49f70228231099fce1a2032f33f9efd Author: Mustafa Iman AuthorDate: Tue Jul 21 12:02:48 2020 -0700 HIVE-23746 : Send task attempts async from AM to daemons (Mustafa Iman via Rajesh Balamohan) Signed-off-by: Ashutosh Chauhan --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 2 + .../hive/llap/tez/LlapProtocolClientProxy.java | 8 +- .../apache/hadoop/hive/llap/AsyncPbRpcProxy.java | 128 +++- .../hadoop/hive/llap/AsyncResponseHandler.java | 112 +++ .../hadoop/hive/llap/AsyncResponseHandlerTest.java | 222 + 5 files changed, 466 insertions(+), 6 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 9b637fb..1d64f6b 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -4825,6 +4825,8 @@ public class HiveConf extends Configuration { LLAP_TASK_COMMUNICATOR_LISTENER_THREAD_COUNT( "hive.llap.task.communicator.listener.thread-count", 30, "The number of task communicator listener threads."), + LLAP_MAX_CONCURRENT_REQUESTS_PER_NODE("hive.llap.max.concurrent.requests.per.daemon", 12, +"Maximum number of concurrent requests to one daemon from Tez AM"), LLAP_TASK_COMMUNICATOR_CONNECTION_SLEEP_BETWEEN_RETRIES_MS( "hive.llap.task.communicator.connection.sleep.between.retries.ms", "2000ms", new TimeValidator(TimeUnit.MILLISECONDS), diff --git a/llap-client/src/java/org/apache/hadoop/hive/llap/tez/LlapProtocolClientProxy.java b/llap-client/src/java/org/apache/hadoop/hive/llap/tez/LlapProtocolClientProxy.java index bc74c55..6702531 100644 --- a/llap-client/src/java/org/apache/hadoop/hive/llap/tez/LlapProtocolClientProxy.java +++ b/llap-client/src/java/org/apache/hadoop/hive/llap/tez/LlapProtocolClientProxy.java @@ -54,7 +54,7 @@ public class LlapProtocolClientProxy HiveConf.getTimeVar(conf, ConfVars.LLAP_TASK_COMMUNICATOR_CONNECTION_TIMEOUT_MS, TimeUnit.MILLISECONDS), HiveConf.getTimeVar(conf, ConfVars.LLAP_TASK_COMMUNICATOR_CONNECTION_SLEEP_BETWEEN_RETRIES_MS, -TimeUnit.MILLISECONDS), -1, 1); +TimeUnit.MILLISECONDS), -1, HiveConf.getIntVar(conf, ConfVars.LLAP_MAX_CONCURRENT_REQUESTS_PER_NODE)); } public void registerDag(RegisterDagRequestProto request, String host, int port, @@ -108,7 +108,7 @@ public class LlapProtocolClientProxy } } - private class SubmitWorkCallable extends NodeCallableRequest { + private class SubmitWorkCallable extends AsyncCallableRequest { protected SubmitWorkCallable(LlapNodeId nodeId, SubmitWorkRequestProto submitWorkRequestProto, @@ -117,8 +117,8 @@ public class LlapProtocolClientProxy } @Override -public SubmitWorkResponseProto call() throws Exception { - return getProxy(nodeId, null).submitWork(null, request); +public void callInternal() throws Exception { + getProxy(nodeId, null).submitWork(null, request); } } diff --git a/llap-common/src/java/org/apache/hadoop/hive/llap/AsyncPbRpcProxy.java b/llap-common/src/java/org/apache/hadoop/hive/llap/AsyncPbRpcProxy.java index 25e10f4..5041c66 100644 --- a/llap-common/src/java/org/apache/hadoop/hive/llap/AsyncPbRpcProxy.java +++ b/llap-common/src/java/org/apache/hadoop/hive/llap/AsyncPbRpcProxy.java @@ -39,16 +39,22 @@ import java.util.concurrent.locks.ReentrantLock; import javax.net.SocketFactory; +import com.google.protobuf.ServiceException; import org.apache.hadoop.conf.Configuration; // TODO: LlapNodeId is just a host+port pair; we could make this class more generic. +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryPolicy; +import org.apache.hadoop.ipc.AsyncCallLimitExceededException; +import org.apache.hadoop.ipc.Client; +import org.apache.hadoop.ipc.ProtobufRpcEngine; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.util.concurrent.AsyncGet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -100,11 +106,15 @@ public ab
[hive] 02/02: HIVE-23870: Optimise multiple text conversions in WritableHiveCharObjectInspector.getPrimitiveJavaObject / HiveCharWritable (Rajesh Balamohan via Ashutosh Chauhan, David Mollitor)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git commit 51346a0935acfca410c4858c7d4367e27a075392 Author: Rajesh Balamohan AuthorDate: Mon Jul 20 15:19:41 2020 +0530 HIVE-23870: Optimise multiple text conversions in WritableHiveCharObjectInspector.getPrimitiveJavaObject / HiveCharWritable (Rajesh Balamohan via Ashutosh Chauhan, David Mollitor) Signed-off-by: Ashutosh Chauhan --- .../org/apache/hadoop/hive/serde2/io/HiveBaseCharWritable.java | 8 +++- .../org/apache/hadoop/hive/serde2/io/HiveCharWritable.java | 10 +- .../org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java | 2 ++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveBaseCharWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveBaseCharWritable.java index 5b7b3b4..c4bd6ff 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveBaseCharWritable.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveBaseCharWritable.java @@ -27,12 +27,17 @@ import org.apache.hive.common.util.HiveStringUtils; public abstract class HiveBaseCharWritable { protected Text value = new Text(); + protected int charLength = -1; public HiveBaseCharWritable() { } public int getCharacterLength() { -return HiveStringUtils.getTextUtfLength(value); +if (charLength != -1) { + return charLength; +} +charLength = HiveStringUtils.getTextUtfLength(value); +return charLength; } /** @@ -45,6 +50,7 @@ public abstract class HiveBaseCharWritable { public void readFields(DataInput in) throws IOException { value.readFields(in); +charLength = -1; } public void write(DataOutput out) throws IOException { diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java index 5cc10a8..ea3b8e5 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java @@ -53,6 +53,7 @@ public class HiveCharWritable extends HiveBaseCharWritable public void set(HiveCharWritable val) { value.set(val.value); +charLength = -1; } public void set(HiveCharWritable val, int maxLength) { @@ -78,6 +79,9 @@ public class HiveCharWritable extends HiveBaseCharWritable } public Text getStrippedValue() { +if (value.charAt(value.getLength() - 1) != ' ') { + return value; +} // A lot of these methods could be done more efficiently by operating on the Text value // directly, rather than converting to HiveChar. return new Text(getHiveChar().getStrippedValue()); @@ -88,7 +92,11 @@ public class HiveCharWritable extends HiveBaseCharWritable } public int getCharacterLength() { -return HiveStringUtils.getTextUtfLength(getStrippedValue()); +if (charLength != -1) { + return charLength; +} +charLength = HiveStringUtils.getTextUtfLength(getStrippedValue()); +return charLength; } public int compareTo(HiveCharWritable rhs) { diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java index 796c533..c3812d6 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java @@ -45,6 +45,7 @@ public class HiveVarcharWritable extends HiveBaseCharWritable public void set(HiveVarcharWritable val) { value.set(val.value); +charLength = val.charLength; } public void set(HiveVarcharWritable val, int maxLength) { @@ -57,6 +58,7 @@ public class HiveVarcharWritable extends HiveBaseCharWritable public void set(String val, int maxLength) { value.set(HiveBaseChar.enforceMaxLength(val, maxLength)); +charLength = maxLength; } public HiveVarchar getHiveVarchar() {
[hive] 01/02: HIVE-23843: Improve key evictions in VectorGroupByOperator (Rajesh Balamohan via Ashutosh Chauhan, Zoltan Haindrich)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git commit 380be9afd1a364fe0ff83e61e17ba4ced12f29a0 Author: Rajesh Balamohan AuthorDate: Tue Jul 14 10:00:14 2020 +0530 HIVE-23843: Improve key evictions in VectorGroupByOperator (Rajesh Balamohan via Ashutosh Chauhan, Zoltan Haindrich) Signed-off-by: Ashutosh Chauhan --- .../ql/exec/vector/VectorAggregationBufferRow.java | 12 +- .../hive/ql/exec/vector/VectorGroupByOperator.java | 53 +++-- .../ql/exec/vector/TestVectorGroupByOperator.java | 125 ++--- 3 files changed, 168 insertions(+), 22 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java index 494db35..a7ef154 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java @@ -28,7 +28,8 @@ public class VectorAggregationBufferRow { private VectorAggregateExpression.AggregationBuffer[] aggregationBuffers; private int version; private int index; - + private int accessed = 0; + public VectorAggregationBufferRow( VectorAggregateExpression.AggregationBuffer[] aggregationBuffers) { this.aggregationBuffers = aggregationBuffers; @@ -80,5 +81,12 @@ public class VectorAggregationBufferRow { aggregationBuffers[i].reset(); } } - + + public int getAccessCount() { +return accessed; + } + + public void incrementAccessCount() { +accessed++; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 9f81e8e..85535f5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -151,6 +151,10 @@ public class VectorGroupByOperator extends Operator private float memoryThreshold; private boolean isLlap = false; + + // tracks overall access count in map agg buffer any given time. + private long totalAccessCount; + /** * Interface for processing mode: global, hash, unsorted streaming, or group batch */ @@ -251,7 +255,7 @@ public class VectorGroupByOperator extends Operator * This mode is very simple, there are no keys to consider, and only flushes one row at closing * The one row must flush even if no input was seen (NULLs) */ - private class ProcessingModeGlobalAggregate extends ProcessingModeBase { + final class ProcessingModeGlobalAggregate extends ProcessingModeBase { /** * In global processing mode there is only one set of aggregation buffers @@ -288,12 +292,13 @@ public class VectorGroupByOperator extends Operator /** * Hash Aggregate mode processing */ - private class ProcessingModeHashAggregate extends ProcessingModeBase { + final class ProcessingModeHashAggregate extends ProcessingModeBase { /** * The global key-aggregation hash map. */ -private Map mapKeysAggregationBuffers; +@VisibleForTesting +Map mapKeysAggregationBuffers; /** * Total per hashtable entry fixed memory (does not depend on key/agg values). @@ -334,7 +339,8 @@ public class VectorGroupByOperator extends Operator /** * A soft reference used to detect memory pressure */ -private SoftReference gcCanary = new SoftReference(new Object()); +@VisibleForTesting +SoftReference gcCanary = new SoftReference(new Object()); /** * Counts the number of time the gcCanary died and was resurrected @@ -387,10 +393,19 @@ public class VectorGroupByOperator extends Operator sumBatchSize = 0; mapKeysAggregationBuffers = new HashMap(); + if (groupingSets != null && groupingSets.length > 0) { +this.maxHtEntries = this.maxHtEntries / groupingSets.length; +LOG.info("New maxHtEntries: {}, groupingSets len: {}", maxHtEntries, groupingSets.length); + } computeMemoryLimits(); LOG.debug("using hash aggregation processing mode"); } +@VisibleForTesting +int getMaxHtEntries() { + return maxHtEntries; +} + @Override public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { @@ -502,6 +517,10 @@ public class VectorGroupByOperator extends Operator mapKeysAggregationBuffers.put(kw.copyKey(), aggregationBuffer); numEntriesHashTable++; numEntriesSinceCheck++; +} else { + // for access tracking + aggregationBuffer.incrementAccessCount
[hive] branch master updated (c279768 -> 51346a0)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from c279768 HIVE-23849: Hive skips the creation of ColumnAccessInfo when creating a view (Barnabas Maidics, reviewed by Peter Vary, Jesus Camacho Rodriguez) new 380be9a HIVE-23843: Improve key evictions in VectorGroupByOperator (Rajesh Balamohan via Ashutosh Chauhan, Zoltan Haindrich) new 51346a0 HIVE-23870: Optimise multiple text conversions in WritableHiveCharObjectInspector.getPrimitiveJavaObject / HiveCharWritable (Rajesh Balamohan via Ashutosh Chauhan, David Mollitor) The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../ql/exec/vector/VectorAggregationBufferRow.java | 12 +- .../hive/ql/exec/vector/VectorGroupByOperator.java | 53 +++-- .../ql/exec/vector/TestVectorGroupByOperator.java | 125 ++--- .../hive/serde2/io/HiveBaseCharWritable.java | 8 +- .../hadoop/hive/serde2/io/HiveCharWritable.java| 10 +- .../hadoop/hive/serde2/io/HiveVarcharWritable.java | 2 + 6 files changed, 186 insertions(+), 24 deletions(-)
[hive] branch master updated: HIVE-23733 : genIncludedColNames functionality for ORCIputFormat ( Panos G via Ashutosh Chauhan) Adding getOriginalColumnNames as part of LLAP Includes interface
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new d7ee83d HIVE-23733 : genIncludedColNames functionality for ORCIputFormat ( Panos G via Ashutosh Chauhan) Adding getOriginalColumnNames as part of LLAP Includes interface d7ee83d is described below commit d7ee83d0e23d74f0258ab3678bced016d4043db3 Author: Panos Garefalakis AuthorDate: Sat Jun 20 21:59:16 2020 +0100 HIVE-23733 : genIncludedColNames functionality for ORCIputFormat ( Panos G via Ashutosh Chauhan) Adding getOriginalColumnNames as part of LLAP Includes interface Signed-off-by: Ashutosh Chauhan --- .../hive/llap/io/api/impl/LlapRecordReader.java| 6 ++ .../hive/llap/io/decode/ColumnVectorProducer.java | 1 + .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 25 ++ 3 files changed, 32 insertions(+) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java index a257a06..55a142e 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java @@ -742,6 +742,12 @@ class LlapRecordReader implements RecordReader } @Override +public String[] getOriginalColumnNames(TypeDescription fileSchema) { + return OrcInputFormat.genIncludedColNames( + fileSchema, filePhysicalColumnIds, acidStructColumnId); +} + +@Override public String getQueryId() { return HiveConf.getVar(jobConf, HiveConf.ConfVars.HIVEQUERYID); } diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java index e37379b..2a3d7fd 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java @@ -49,6 +49,7 @@ public interface ColumnVectorProducer { List getPhysicalColumnIds(); List getReaderLogicalColumnIds(); TypeDescription[] getBatchReaderTypes(TypeDescription fileSchema); +String[] getOriginalColumnNames(TypeDescription fileSchema); String getQueryId(); boolean isProbeDecodeEnabled(); byte getProbeMjSmallTablePos(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index de962cd..1380185 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -421,6 +421,31 @@ public class OrcInputFormat implements InputFormat, return result; } + // Mostly dup of genIncludedColumns + public static String[] genIncludedColNames(TypeDescription fileSchema, + List included, Integer recursiveStruct) { +String[] originalColNames = new String[included.size()]; +List children = fileSchema.getChildren(); +for (int columnNumber = 0; columnNumber < children.size(); ++columnNumber) { + int indexInBatchCols = included.indexOf(columnNumber); + if (indexInBatchCols >= 0) { +// child Index and FiledIdx should be the same +originalColNames[indexInBatchCols] = fileSchema.getFieldNames().get(columnNumber); + } else if (recursiveStruct != null && recursiveStruct == columnNumber) { +// This assumes all struct cols immediately follow struct +List nestedChildren = children.get(columnNumber).getChildren(); +for (int columnNumberDelta = 0; columnNumberDelta < nestedChildren.size(); ++columnNumberDelta) { + int columnNumberNested = columnNumber + 1 + columnNumberDelta; + int nestedIxInBatchCols = included.indexOf(columnNumberNested); + if (nestedIxInBatchCols >= 0) { +originalColNames[nestedIxInBatchCols] = children.get(columnNumber).getFieldNames().get(columnNumberDelta); + } +} + } +} +return originalColNames; + } + private static void addColumnToIncludes(TypeDescription child, boolean[] result) { for(int col = child.getId(); col <= child.getMaximumId(); ++col) {
[hive] branch master updated: HIVE-16490. Hive should not use getKeyProvider from DFSClient directly ( Uma Maheswara Rao G via David Mollitor).
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new cf75c77 HIVE-16490. Hive should not use getKeyProvider from DFSClient directly ( Uma Maheswara Rao G via David Mollitor). cf75c77 is described below commit cf75c776f44450e268d2bf4f4f67bbc0ccd9ef94 Author: Uma Maheswara Rao G AuthorDate: Sun Jul 19 00:12:20 2020 -0700 HIVE-16490. Hive should not use getKeyProvider from DFSClient directly ( Uma Maheswara Rao G via David Mollitor). Signed-off-by: Ashutosh Chauhan --- .../src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java index 440efe9..858a799 100644 --- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java +++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java @@ -1223,11 +1223,9 @@ public class Hadoop23Shims extends HadoopShimsSecure { private final Configuration conf; public HdfsEncryptionShim(URI uri, Configuration conf) throws IOException { - DistributedFileSystem dfs = (DistributedFileSystem)FileSystem.get(uri, conf); - this.conf = conf; - this.keyProvider = dfs.getClient().getKeyProvider(); this.hdfsAdmin = new HdfsAdmin(uri, conf); + this.keyProvider = this.hdfsAdmin.getKeyProvider(); } @Override
[hive] branch master updated: HIVE-23734: Untangle LlapRecordReader Includes construction (Panos G via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 47da936 HIVE-23734: Untangle LlapRecordReader Includes construction (Panos G via Ashutosh Chauhan) 47da936 is described below commit 47da936a06e50ba11e7cee9cbdc32715077709cb Author: Panos Garefalakis AuthorDate: Sat Jun 20 22:12:07 2020 +0100 HIVE-23734: Untangle LlapRecordReader Includes construction (Panos G via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../hive/llap/io/api/impl/LlapRecordReader.java| 50 +++--- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java index c148dd4..a257a06 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java @@ -648,8 +648,10 @@ class LlapRecordReader implements RecordReader // Note: columnIds below makes additional changes for ACID. Don't use this var directly. this.readerSchema = readerSchema; this.jobConf = jobConf; + this.includeAcidColumns = includeAcidColumns; + + // Assume including everything means the VRB will have everything. if (tableIncludedCols == null) { -// Assume including everything means the VRB will have everything. // TODO: this is rather brittle, esp. in view of schema evolution (in abstract, not as // currently implemented in Hive). The compile should supply the columns it expects // to see, which is not "all, of any schema". Is VRB row CVs the right mechanism @@ -659,43 +661,39 @@ class LlapRecordReader implements RecordReader tableIncludedCols.add(i); } } - LOG.debug("Logical table includes: {}", tableIncludedCols); + this.readerLogicalColumnIds = tableIncludedCols; + LOG.debug("Logical table includes: {}", readerLogicalColumnIds); + // Note: schema evolution currently does not support column index changes. // So, the indices should line up... to be fixed in SE v2? - List filePhysicalColumnIds = readerLogicalColumnIds; if (isAcidScan) { int rootCol = OrcInputFormat.getRootColumn(false); -filePhysicalColumnIds = new ArrayList<>(filePhysicalColumnIds.size() + rootCol); +this.filePhysicalColumnIds = new ArrayList<>(readerLogicalColumnIds.size() + rootCol); this.acidStructColumnId = rootCol - 1; // OrcRecordUpdater.ROW. This is somewhat fragile... -// Note: this guarantees that physical column IDs are in order. -for (int i = 0; i < rootCol; ++i) { - // We don't want to include the root struct in ACID case; it would cause the whole - // struct to get read without projection. - if (acidStructColumnId == i) continue; - if(!includeAcidColumns) { -/* - if not including acid columns, we still want to number the - physical columns as if acid columns are included becase - {@link #generateFileIncludes(TypeDescription)} takes the file - schema as input - (eg >) - */ -continue; +if (includeAcidColumns) { + // Up to acidStructColumnId: as we don't want to include the root struct in ACID case; + // it would cause the whole struct to get read without projection. + for (int i = 0; i < acidStructColumnId; ++i) { +// Note: this guarantees that physical column IDs are in order. +filePhysicalColumnIds.add(i); } - filePhysicalColumnIds.add(i); } +/** + * Even when NOT including acid columns, we still want to number the + * physical columns as if acid columns are included because + * {@link #generateFileIncludes(TypeDescription)} takes the file + * schema as input + * (eg >) + */ for (int tableColumnId : readerLogicalColumnIds) { - //but make sure to generate correct ids in type tree in-order - //walk order + // Make sure to generate correct ids in type tree in-order traversal + /* ok, so if filePhysicalColumnIds include acid column ids, we end up decoding the vectors*/ filePhysicalColumnIds.add(rootCol + tableColumnId); } -/*ok, so if filePhysicalColumnIds include acid column ids, we end up - decoding the vectors*/ + } else { +this.filePhysicalColumnIds = readerLogicalColumnIds; } - -
[hive] branch master updated: HIVE-23855: TestQueryShutdownHooks is flaky (Mustafa Iman via Panos G, Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 2864d8d HIVE-23855: TestQueryShutdownHooks is flaky (Mustafa Iman via Panos G, Ashutosh Chauhan) 2864d8d is described below commit 2864d8dbeafebfa3059e07fb34206906466f99f9 Author: Mustafa Iman AuthorDate: Fri Jul 17 15:07:36 2020 -0700 HIVE-23855: TestQueryShutdownHooks is flaky (Mustafa Iman via Panos G, Ashutosh Chauhan) Increased timeout for async query. Test were not isolated very well. Test async query did not clean up properly. State leaked to test sync causing it to fail. Cleanup is moved to @After so cleanup is always run. Signed-off-by: Ashutosh Chauhan --- .../hadoop/util/ShutdownHookManagerInspector.java | 15 - .../cli/operation/TestQueryShutdownHooks.java | 64 +- 2 files changed, 39 insertions(+), 40 deletions(-) diff --git a/service/src/test/org/apache/hadoop/util/ShutdownHookManagerInspector.java b/service/src/test/org/apache/hadoop/util/ShutdownHookManagerInspector.java index d360475..2221f20 100644 --- a/service/src/test/org/apache/hadoop/util/ShutdownHookManagerInspector.java +++ b/service/src/test/org/apache/hadoop/util/ShutdownHookManagerInspector.java @@ -20,9 +20,20 @@ package org.apache.hadoop.util; import java.util.List; +import static org.junit.Assert.assertEquals; + public class ShutdownHookManagerInspector { - public static List getShutdownHooksInOrder() { -return ShutdownHookManager.get().getShutdownHooksInOrder(); + public static int getShutdownHookCount() { +return ShutdownHookManager.get().getShutdownHooksInOrder().size(); + } + + public static void assertShutdownHookCount(int expected) { +List entries = ShutdownHookManager.get().getShutdownHooksInOrder(); +StringBuilder errorBuilder = new StringBuilder("Shutdown hooks:\n"); +for (ShutdownHookManager.HookEntry entry: entries) { + errorBuilder.append(entry.getHook()).append(" Priority:").append(entry.getPriority()).append("\n"); +} +assertEquals(errorBuilder.toString(), expected, entries.size()); } } diff --git a/service/src/test/org/apache/hive/service/cli/operation/TestQueryShutdownHooks.java b/service/src/test/org/apache/hive/service/cli/operation/TestQueryShutdownHooks.java index 0233e8b..0170c71 100644 --- a/service/src/test/org/apache/hive/service/cli/operation/TestQueryShutdownHooks.java +++ b/service/src/test/org/apache/hive/service/cli/operation/TestQueryShutdownHooks.java @@ -28,6 +28,7 @@ import org.apache.hive.service.cli.OperationStatus; import org.apache.hive.service.cli.SessionHandle; import org.apache.hive.service.cli.thrift.EmbeddedThriftBinaryCLIService; import org.apache.hive.service.cli.thrift.ThriftCLIServiceClient; +import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -40,11 +41,13 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.fail; -@org.junit.Ignore("HIVE-23855 TestQueryShutdownHooks is flaky") public class TestQueryShutdownHooks { + private static final long ASYNC_QUERY_TIMEOUT_MS = 60; private EmbeddedThriftBinaryCLIService service; private ThriftCLIServiceClient client; + private SessionHandle sessionHandle; + private final Map confOverlay = new HashMap<>(); @Before public void setUp() throws Exception { @@ -57,21 +60,23 @@ public class TestQueryShutdownHooks { hiveConf.setVar(ConfVars.HIVE_LOCK_MANAGER, "org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager"); service.init(hiveConf); client = new ThriftCLIServiceClient(service); -SessionHandle tempSession = client.openSession("anonymous", "anonymous", new HashMap<>()); +sessionHandle = client.openSession("anonymous", "anonymous", new HashMap<>()); // any job causes creation of HadoopJobExecHelper's shutdown hook. It is once per JVM // We want it to be created before we count the hooks so it does not cause off by one error in our count -client.executeStatement(tempSession, "select reflect(\"java.lang.System\", \"currentTimeMillis\")", new HashMap<>()); -client.closeSession(tempSession); +client.executeStatement(sessionHandle, "select reflect(\"java.lang.System\", \"currentTimeMillis\")", new HashMap<>()); + } + + @After + public void cleanup() throws HiveSQLException { +if (sessionHandle != null) { + client.closeSession(sessionHandle); +} +service.stop(); } @Test public void testSync() throws Exception { -Map opConf = new HashMap(); - -SessionHandle sessHandle =
[hive] branch master updated: HIVE-23871: ObjectStore should properly handle MicroManaged Table properties (Panos G via Mustafa Iman, Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new bdc7d27 HIVE-23871: ObjectStore should properly handle MicroManaged Table properties (Panos G via Mustafa Iman, Ashutosh Chauhan) bdc7d27 is described below commit bdc7d27ff3d0c99bf4372834559a5d34b86b6c57 Author: Panos Garefalakis AuthorDate: Fri Jul 17 16:11:54 2020 +0100 HIVE-23871: ObjectStore should properly handle MicroManaged Table properties (Panos G via Mustafa Iman, Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- data/files/table1_delim.txt| 5 + .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +- .../clientpositive/load_micromanaged_delim.q | 29 .../results/clientpositive/llap/acid_stats5.q.out | 12 ++ .../clientpositive/llap/check_constraint.q.out | 6 + .../llap/create_transactional_insert_only.q.out| 2 + .../llap/load_micromanaged_delim.q.out | 192 + .../test/results/clientpositive/llap/mm_all.q.out | 6 + .../test/results/clientpositive/llap/mm_bhif.q.out | 44 ++--- .../results/clientpositive/llap/mm_default.q.out | 8 + .../test/results/clientpositive/llap/mm_exim.q.out | 4 + .../hadoop/hive/metastore/MetaStoreDirectSql.java | 15 +- .../apache/hadoop/hive/metastore/ObjectStore.java | 43 ++--- .../apache/hadoop/hive/metastore/txn/TxnUtils.java | 6 + 14 files changed, 315 insertions(+), 59 deletions(-) diff --git a/data/files/table1_delim.txt b/data/files/table1_delim.txt new file mode 100644 index 000..60a592d --- /dev/null +++ b/data/files/table1_delim.txt @@ -0,0 +1,5 @@ +1 Acura 4 +2 Toyota 3 +3 Tesla 5 +4 Honda 5 +11 Mazda 2 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 1d013ae..3c1741f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -7463,7 +7463,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable); checkAcidConstraints(); } else { -// Acid tables can't be list bucketed or have skewed cols +// Transactional tables can't be list bucketed or have skewed cols lbCtx = constructListBucketingCtx(destinationPartition.getSkewedColNames(), destinationPartition.getSkewedColValues(), destinationPartition.getSkewedColValueLocationMaps(), destinationPartition.isStoredAsSubDirectories()); diff --git a/ql/src/test/queries/clientpositive/load_micromanaged_delim.q b/ql/src/test/queries/clientpositive/load_micromanaged_delim.q new file mode 100644 index 000..00ba262 --- /dev/null +++ b/ql/src/test/queries/clientpositive/load_micromanaged_delim.q @@ -0,0 +1,29 @@ +set hive.support.concurrency=true; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + + +dfs -mkdir ${system:test.tmp.dir}/delim_table; +dfs -mkdir ${system:test.tmp.dir}/delim_table_ext; +dfs -mkdir ${system:test.tmp.dir}/delim_table_trans; +dfs -cp ${system:hive.root}/data/files/table1_delim.txt ${system:test.tmp.dir}/delim_table/; +dfs -cp ${system:hive.root}/data/files/table1_delim.txt ${system:test.tmp.dir}/delim_table_ext/; +dfs -cp ${system:hive.root}/data/files/table1_delim.txt ${system:test.tmp.dir}/delim_table_trans/; + +-- Checking that MicroManged and External tables have the same behaviour with delimited input files +-- External table +CREATE EXTERNAL TABLE delim_table_ext(id INT, name STRING, safety INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE LOCATION '${system:test.tmp.dir}/delim_table_ext/'; +describe formatted delim_table_ext; +SELECT * FROM delim_table_ext; + +-- MicroManaged insert_only table +CREATE TABLE delim_table_micro(id INT, name STRING, safety INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE TBLPROPERTIES('transactional'='true', "transactional_properties"="insert_only"); +LOAD DATA INPATH '${system:test.tmp.dir}/delim_table/table1_delim.txt' OVERWRITE INTO TABLE delim_table_micro; +describe formatted delim_table_micro; +SELECT * FROM delim_table_micro; + +-- Same as above with different syntax +CREATE TRANSACTIONAL TABLE delim_table_trans(id INT, name STRING, safety INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE; +LOAD DATA INPATH '${system:test.tmp.dir}/delim_table_trans/table1_delim.txt' OVERWRITE INTO TABLE delim_table_trans; +describe formatted delim_table_trans; +SELECT * FROM delim_table_trans; diff --git a/ql/src/test/results
[hive] branch master updated: HIVE-23363: Upgrade DataNucleus dependency to 5.2 (David Mollitor via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new aa407da HIVE-23363: Upgrade DataNucleus dependency to 5.2 (David Mollitor via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan aa407da is described below commit aa407da7b00aea585629f27c2b88d1faf17eae32 Author: David Mollitor AuthorDate: Mon Jun 15 14:56:49 2020 -0400 HIVE-23363: Upgrade DataNucleus dependency to 5.2 (David Mollitor via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- pom.xml| 8 +++--- .../hive/metastore/PersistenceManagerProvider.java | 30 +++--- .../src/main/resources/package.jdo | 10 standalone-metastore/pom.xml | 8 +++--- 4 files changed, 28 insertions(+), 28 deletions(-) diff --git a/pom.xml b/pom.xml index 2a0c328..0513fe7 100644 --- a/pom.xml +++ b/pom.xml @@ -116,10 +116,10 @@ 1.12.0 1.8.2 1.21.0 -4.2.4 -4.1.17 -4.1.19 -3.2.0-m3 +5.2.2 +5.2.2 +3.2.0-m13 +5.2.2 1.2 1.7 3.2.2 diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PersistenceManagerProvider.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PersistenceManagerProvider.java index f97808a..870532a 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PersistenceManagerProvider.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PersistenceManagerProvider.java @@ -42,8 +42,8 @@ import org.datanucleus.NucleusContext; import org.datanucleus.PropertyNames; import org.datanucleus.api.jdo.JDOPersistenceManager; import org.datanucleus.api.jdo.JDOPersistenceManagerFactory; -import org.datanucleus.store.scostore.Store; -import org.datanucleus.util.WeakValueMap; +import org.datanucleus.util.ConcurrentReferenceHashMap; +import org.datanucleus.store.types.scostore.Store; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -367,26 +367,26 @@ public class PersistenceManagerProvider { } private static void clearClr(ClassLoaderResolver clr) throws Exception { -if (clr != null) { - if (clr instanceof ClassLoaderResolverImpl) { -ClassLoaderResolverImpl clri = (ClassLoaderResolverImpl) clr; -long resourcesCleared = clearFieldMap(clri, "resources"); -long loadedClassesCleared = clearFieldMap(clri, "loadedClasses"); -long unloadedClassesCleared = clearFieldMap(clri, "unloadedClasses"); -LOG.debug("Cleared ClassLoaderResolverImpl: {}, {}, {}", resourcesCleared, -loadedClassesCleared, unloadedClassesCleared); - } +if (clr instanceof ClassLoaderResolverImpl) { + ClassLoaderResolverImpl clri = (ClassLoaderResolverImpl) clr; + int resourcesCleared = clearFieldMap(clri, "resources"); + int loadedClassesCleared = clearFieldMap(clri, "loadedClasses"); + int unloadedClassesCleared = clearFieldMap(clri, "unloadedClasses"); + + LOG.debug( + "Cleared ClassLoaderResolverImpl: resources: {}, loaded classes: {}, unloaded classes: {}", + resourcesCleared, loadedClassesCleared, unloadedClassesCleared); } } - private static long clearFieldMap(ClassLoaderResolverImpl clri, String mapFieldName) + private static int clearFieldMap(ClassLoaderResolverImpl clri, String mapFieldName) throws Exception { Field mapField = ClassLoaderResolverImpl.class.getDeclaredField(mapFieldName); mapField.setAccessible(true); -Map map = (Map) mapField.get(clri); -long sz = map.size(); -mapField.set(clri, Collections.synchronizedMap(new WeakValueMap())); +Map map = (Map) mapField.get(clri); +final int sz = map.size(); +mapField.set(clri, new ConcurrentReferenceHashMap<>()); return sz; } diff --git a/standalone-metastore/metastore-server/src/main/resources/package.jdo b/standalone-metastore/metastore-server/src/main/resources/package.jdo index 0252dd0..d1f4b33 100644 --- a/standalone-metastore/metastore-server/src/main/resources/package.jdo +++ b/standalone-metastore/metastore-server/src/main/resources/package.jdo @@ -345,11 +345,11 @@ - + - - + + @@ -357,8 +357,8 @@ - - + + diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml index b0267aa..a7eb54b 100644 --- a/standalone-metastore/pom.xml
[hive] branch master updated: HIVE-23277 : HiveProtoLogger should carry out JSON conversion in its own thread ( Attila Magyar via Rajesh Balamohan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 58e532e HIVE-23277 : HiveProtoLogger should carry out JSON conversion in its own thread ( Attila Magyar via Rajesh Balamohan) 58e532e is described below commit 58e532e17fd5f8c0d6188756c1a9869b467dfaff Author: Attila Magyar AuthorDate: Wed Jul 8 15:01:49 2020 -0700 HIVE-23277 : HiveProtoLogger should carry out JSON conversion in its own thread ( Attila Magyar via Rajesh Balamohan) Signed-off-by: Ashutosh Chauhan --- .../apache/hadoop/hive/ql/exec/ExplainTask.java| 20 - .../ql/hooks/HiveHookEventProtoPartialBuilder.java | 86 ++ .../hadoop/hive/ql/hooks/HiveProtoLoggingHook.java | 67 +++-- .../ql/optimizer/physical/StageIDsRearranger.java | 8 +- .../TestHiveHookEventProtoPartialBuilder.java | 82 + .../hive/ql/hooks/TestHiveProtoLoggingHook.java| 1 + 6 files changed, 216 insertions(+), 48 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index 750abcb..f2ed01a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -244,9 +244,25 @@ public class ExplainTask extends Task implements Serializable { work.getCboPlan(), work.getOptimizedSQL()); } + public JSONObject getJSONPlan(PrintStream out, ExplainWork work, String stageIdRearrange) + throws Exception { +return getJSONPlan(out, work.getRootTasks(), work.getFetchTask(), +work.isFormatted(), work.getExtended(), work.isAppendTaskType(), work.getCboInfo(), +work.getCboPlan(), work.getOptimizedSQL(), stageIdRearrange); + } + + public JSONObject getJSONPlan(PrintStream out, List> tasks, Task fetchTask, +boolean jsonOutput, boolean isExtended, boolean appendTaskType, String cboInfo, +String cboPlan, String optimizedSQL) throws Exception { +return getJSONPlan( +out, tasks, fetchTask, jsonOutput, isExtended, +appendTaskType, cboInfo, cboPlan, optimizedSQL, +conf.getVar(ConfVars.HIVESTAGEIDREARRANGE)); + } + public JSONObject getJSONPlan(PrintStream out, List> tasks, Task fetchTask, boolean jsonOutput, boolean isExtended, boolean appendTaskType, String cboInfo, - String cboPlan, String optimizedSQL) throws Exception { + String cboPlan, String optimizedSQL, String stageIdRearrange) throws Exception { // If the user asked for a formatted output, dump the json output // in the output stream @@ -274,7 +290,7 @@ public class ExplainTask extends Task implements Serializable { } } -List ordered = StageIDsRearranger.getExplainOrder(conf, tasks); +List ordered = StageIDsRearranger.getExplainOrder(tasks, stageIdRearrange); if (fetchTask != null) { fetchTask.setParentTasks((List)StageIDsRearranger.getFetchSources(tasks)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveHookEventProtoPartialBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveHookEventProtoPartialBuilder.java new file mode 100644 index 000..9b9b4e1 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveHookEventProtoPartialBuilder.java @@ -0,0 +1,86 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package org.apache.hadoop.hive.ql.hooks; + +import java.util.Map; + +import javax.annotation.Nullable; + +import org.apache.hadoop.hive.ql.exec.ExplainTask; +import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.hooks.HiveProtoLoggingHook.OtherInfoType; +import org.apache.hadoop.hive.ql.hooks.proto.HiveHookEvents; +import org.apache.hadoop.hive.ql.plan.ExplainWork; +import org.json.JSONObject; +import org.slf4j.Logger; +import org.slf4j.Logg
[hive] branch master updated: HIVE-23665 Rewrite last_value to first_value to enable streaming results (Ramesh Kumar via Jesus Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 5c1a3d6 HIVE-23665 Rewrite last_value to first_value to enable streaming results (Ramesh Kumar via Jesus Camacho Rodriguez) 5c1a3d6 is described below commit 5c1a3d6dfe64d0daf2ea47047cfcb9dae62e2fa5 Author: Ramesh Thangarajan AuthorDate: Wed Jun 24 10:13:31 2020 -0700 HIVE-23665 Rewrite last_value to first_value to enable streaming results (Ramesh Kumar via Jesus Camacho Rodriguez) Signed-off-by: Ashutosh Chauhan --- .../rules/HiveWindowingLastValueRewrite.java | 119 +++ .../hadoop/hive/ql/parse/CalcitePlanner.java | 4 +- .../vector_ptf_last_value_streaming.q | 9 + .../llap/vector_ptf_last_value_streaming.q.out | 192 + .../llap/vector_ptf_part_simple.q.out | 238 ++--- 5 files changed, 478 insertions(+), 84 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveWindowingLastValueRewrite.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveWindowingLastValueRewrite.java new file mode 100644 index 000..5845553 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveWindowingLastValueRewrite.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import com.google.common.collect.ImmutableList; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexFieldCollation; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexOver; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexWindow; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.commons.collections.CollectionUtils; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; + +/** + * Rule to rewrite a window function containing a last value clause. + */ +public class HiveWindowingLastValueRewrite extends RelOptRule { + + public static final HiveWindowingLastValueRewrite INSTANCE = new HiveWindowingLastValueRewrite(); + + private static final String FIRST_VALUE_FUNC = "first_value"; + private static final String LAST_VALUE_FUNC = "last_value"; + + + private HiveWindowingLastValueRewrite() { +super(operand(Project.class, any())); + } + + @Override + public void onMatch(RelOptRuleCall call) { +Project project = call.rel(0); + +List newExprs = new ArrayList<>(); +LastValueRewriteRexShuttle lastValueRewrite = new LastValueRewriteRexShuttle( +project.getCluster().getRexBuilder()); +boolean modified = false; +for (RexNode expr : project.getChildExps()) { + RexNode newExpr = lastValueRewrite.apply(expr); + newExprs.add(newExpr); + modified |= (newExpr != expr); +} +if (modified) { + RelNode newProject = project.copy( + project.getTraitSet(), project.getInput(), newExprs, project.getRowType()); + call.transformTo(newProject); +} + } + + private static class LastValueRewriteRexShuttle extends RexShuttle { + +private final RexBuilder rexBuilder; + +private LastValueRewriteRexShuttle(RexBuilder rexBuilder) { + this.rexBuilder = rexBuilder; +} + +public RexNode visitOver(RexOver over) { + if (over.op.getName().equals(LAST_VALUE_FUNC) && over.getWindow().getLowerBound().isUnbounded() +&& over.getWindow().getUpperBound().isUnbounded()) { +ImmutableList orderKeys = over.getWindow().orderKeys; +if
[hive] branch master updated: HIVE-23735 : Reducer misestimate for export command (Rajesh Balamohan via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 54a2c0c HIVE-23735 : Reducer misestimate for export command (Rajesh Balamohan via Ashutosh Chauhan) 54a2c0c is described below commit 54a2c0c6ec7b47ae63e25c83a405c7e349f739ca Author: Rajesh Balamohan AuthorDate: Mon Jul 6 08:39:56 2020 -0700 HIVE-23735 : Reducer misestimate for export command (Rajesh Balamohan via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index de746a8..72794e4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -6866,7 +6866,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { int numFiles = 1; int totalFiles = 1; -if (dest_tab.getNumBuckets() > 0) { +if (dest_tab.getNumBuckets() > 0 && !dest_tab.getBucketCols().isEmpty()) { enforceBucketing = true; if (updating(dest) || deleting(dest)) { partnCols = getPartitionColsFromBucketColsForUpdateDelete(input, true);
[hive] branch master updated: HIVE-17879 : Upgrade Datanucleus Maven Plugin
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 321b46f HIVE-17879 : Upgrade Datanucleus Maven Plugin 321b46f is described below commit 321b46fe1baf179719ec644c573a39abaf4646d4 Author: Ashutosh Chauhan AuthorDate: Sun Jun 14 18:45:04 2020 -0700 HIVE-17879 : Upgrade Datanucleus Maven Plugin --- pom.xml | 13 - standalone-metastore/metastore-server/pom.xml | 2 +- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/pom.xml b/pom.xml index eaadad0..2a31dbd 100644 --- a/pom.xml +++ b/pom.xml @@ -99,7 +99,6 @@ 1.0b3 - 3.3.0-release -Xmx2048m 2.17 1.12 @@ -1170,18 +1169,6 @@ ${maven.surefire.plugin.version} - org.datanucleus - datanucleus-maven-plugin - ${datanucleus.maven.plugin.version} - - - org.datanucleus - datanucleus-core - ${datanucleus-core.version} - - - - org.apache.felix maven-bundle-plugin ${felix.version} diff --git a/standalone-metastore/metastore-server/pom.xml b/standalone-metastore/metastore-server/pom.xml index df6be61..d1a3631 100644 --- a/standalone-metastore/metastore-server/pom.xml +++ b/standalone-metastore/metastore-server/pom.xml @@ -722,7 +722,7 @@ org.datanucleus datanucleus-maven-plugin -4.0.5 +5.2.1 JDO false
[hive] branch master updated: HIVE-22687 : Query hangs indefinitely if LLAP daemon registers after the query is submitted (Himanshu Mishra, Attila Magyar via Ashutosh Chauhan, Prasanth J)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new e85c9e6 HIVE-22687 : Query hangs indefinitely if LLAP daemon registers after the query is submitted (Himanshu Mishra, Attila Magyar via Ashutosh Chauhan, Prasanth J) e85c9e6 is described below commit e85c9e6a2635e773c691e196bece759dfbe53878 Author: Ashutosh Chauhan AuthorDate: Sat Jun 13 14:21:06 2020 -0700 HIVE-22687 : Query hangs indefinitely if LLAP daemon registers after the query is submitted (Himanshu Mishra, Attila Magyar via Ashutosh Chauhan, Prasanth J) Signed-off-by: Ashutosh Chauhan --- .../hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java | 4 +++- .../src/java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java index 92c4771..cf4e7b8 100644 --- a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java +++ b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java @@ -153,9 +153,10 @@ public class LlapZookeeperRegistryImpl HiveConf.getVarWithoutType(conf, ConfVars.LLAP_DAEMON_TASK_SCHEDULER_WAIT_QUEUE_SIZE)); populateConfigValues(capacityValues.entrySet()); -String uniqueId = registerServiceRecord(daemonZkRecord); +String uniqueId = UNIQUE_ID.toString(); long znodeCreationTimeout = 120; +initializeWithoutRegisteringInternal(); // Create a znode under the rootNamespace parent for this instance of the server try { slotZnode = new SlotZnode( @@ -171,6 +172,7 @@ public class LlapZookeeperRegistryImpl throw (e instanceof IOException) ? (IOException)e : new IOException(e); } +registerServiceRecord(daemonZkRecord, uniqueId); LOG.info("Registered node. Created a znode on ZooKeeper for LLAP instance: rpc: {}, " + "shuffle: {}, webui: {}, mgmt: {}, znodePath: {}", rpcEndpoint, getShuffleEndpoint(), getServicesEndpoint(), getMngEndpoint(), getRegistrationZnodePath()); diff --git a/llap-client/src/java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java b/llap-client/src/java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java index 2b21baa..249fa49 100644 --- a/llap-client/src/java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java +++ b/llap-client/src/java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java @@ -339,7 +339,7 @@ public abstract class ZkRegistryBase { } - final void initializeWithoutRegisteringInternal() throws IOException { + final protected void initializeWithoutRegisteringInternal() throws IOException { // Create a znode under the rootNamespace parent for this instance of the server try { try {
[hive] branch master updated: HIVE-23468 : LLAP: Optimise OrcEncodedDataReader to avoid FS init to NN (Rajesh Balamohan via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 03165e0 HIVE-23468 : LLAP: Optimise OrcEncodedDataReader to avoid FS init to NN (Rajesh Balamohan via Ashutosh Chauhan) 03165e0 is described below commit 03165e00eb52d77062a741b576e48971827dfea3 Author: Rajesh Balamohan AuthorDate: Sat Jun 13 14:08:43 2020 -0700 HIVE-23468 : LLAP: Optimise OrcEncodedDataReader to avoid FS init to NN (Rajesh Balamohan via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java | 2 +- ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java| 7 +++ .../hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java index c90ff52..35d066a 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java @@ -537,7 +537,7 @@ public class OrcEncodedDataReader extends CallableWithNdc path = split.getPath(); if (fileKey instanceof Long && HiveConf.getBoolVar( daemonConf, ConfVars.LLAP_IO_USE_FILEID_PATH)) { - path = HdfsUtils.getFileIdPath(fsSupplier.get(), path, (long)fileKey); + path = HdfsUtils.getFileIdPath(path, (long)fileKey); } LlapIoImpl.ORC_LOGGER.trace("Creating reader for {} ({})", path, split.getPath()); long startTime = counters.startTimeCounter(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java index 4de5c8c..f590eb6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java @@ -102,10 +102,9 @@ public class HdfsUtils { // as public utility method in HDFS to obtain the inode-based path. private static String HDFS_ID_PATH_PREFIX = "/.reserved/.inodes/"; - public static Path getFileIdPath( - FileSystem fileSystem, Path path, long fileId) { -return ((fileSystem instanceof DistributedFileSystem)) -? new Path(HDFS_ID_PATH_PREFIX + fileId) : path; + public static Path getFileIdPath(Path path, long fileId) { +// BI/ETL split strategies set fileId correctly when HDFS is used. +return (fileId > 0) ? new Path(HDFS_ID_PATH_PREFIX + fileId) : path; } public static boolean isDefaultFs(DistributedFileSystem fs) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java index 3fd93ca..a3bbb7b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java @@ -221,7 +221,7 @@ public class VectorizedParquetRecordReader extends ParquetRecordReaderBase FileSystem fs = file.getFileSystem(configuration); if (cacheKey instanceof Long && HiveConf.getBoolVar( cacheConf, ConfVars.LLAP_IO_USE_FILEID_PATH)) { -file = HdfsUtils.getFileIdPath(fs, file, (long)cacheKey); +file = HdfsUtils.getFileIdPath(file, (long)cacheKey); } }
[hive] branch master updated: HIVE-23554 : Adding FilterContext as part of LLAP ColumnVectorBatch (propagated in ReadPipeline). Also moving the code that prepares VectorBatches in EncodedDataConsumer
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 3065904 HIVE-23554 : Adding FilterContext as part of LLAP ColumnVectorBatch (propagated in ReadPipeline). Also moving the code that prepares VectorBatches in EncodedDataConsumer in a separate method -- this will be handy when dealing with row-filters later on. (Panos G via Ashutosh Chauhan) 3065904 is described below commit 30659041e470702e4a52f024eabe28d2d848d478 Author: Panos Garefalakis AuthorDate: Wed May 27 11:51:48 2020 +0100 HIVE-23554 : Adding FilterContext as part of LLAP ColumnVectorBatch (propagated in ReadPipeline). Also moving the code that prepares VectorBatches in EncodedDataConsumer in a separate method -- this will be handy when dealing with row-filters later on. (Panos G via Ashutosh Chauhan) Change-Id: I0177756e842e60f6850c966cfa44fe0d53df4a28 Signed-off-by: Ashutosh Chauhan --- .../hive/llap/io/api/impl/ColumnVectorBatch.java | 15 .../llap/io/decode/OrcEncodedDataConsumer.java | 27 +- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/ColumnVectorBatch.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/ColumnVectorBatch.java index 19b0b55..52dc072 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/ColumnVectorBatch.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/ColumnVectorBatch.java @@ -20,12 +20,17 @@ package org.apache.hadoop.hive.llap.io.api.impl; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.io.filter.MutableFilterContext; + +import java.util.Arrays; /** * Unlike VRB, doesn't have some fields, and doesn't have all columns * (non-selected, partition cols, cols for downstream ops, etc.) + * It does, however, hold the FilterContext of the VRB. */ public class ColumnVectorBatch { + public MutableFilterContext filterContext; public ColumnVector[] cols; public int size; @@ -34,6 +39,7 @@ public class ColumnVectorBatch { } public ColumnVectorBatch(int columnCount, int batchSize) { +this.filterContext = new VectorizedRowBatch(0); this.cols = new ColumnVector[columnCount]; this.size = batchSize; } @@ -51,6 +57,15 @@ public class ColumnVectorBatch { return ""; } StringBuilder b = new StringBuilder(); +b.append("FilterContext used: "); +b.append(filterContext.isSelectedInUse()); +b.append(", size: "); +b.append(filterContext.getSelectedSize()); +b.append('\n'); +b.append("Selected: "); +b.append(filterContext.isSelectedInUse() ? Arrays.toString(filterContext.getSelected()) : "[]"); +b.append('\n'); + b.append("Column vector types: "); for (int k = 0; k < cols.length; k++) { ColumnVector cv = cols[k]; diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java index 79dba42..9459a4f 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java @@ -152,17 +152,10 @@ public class OrcEncodedDataConsumer } ColumnVectorBatch cvb = cvbPool.take(); +cvb.filterContext.reset(); // assert cvb.cols.length == batch.getColumnIxs().length; // Must be constant per split. cvb.size = batchSize; for (int idx = 0; idx < columnReaders.length; ++idx) { - TreeReader reader = columnReaders[idx]; - if (cvb.cols[idx] == null) { -// Orc store rows inside a root struct (hive writes it this way). -// When we populate column vectors we skip over the root struct. -cvb.cols[idx] = createColumn(batchSchemas[idx], VectorizedRowBatch.DEFAULT_SIZE, useDecimal64ColumnVectors); - } - trace.logTreeReaderNextVector(idx); - /* * Currently, ORC's TreeReaderFactory class does this: * @@ -198,9 +191,8 @@ public class OrcEncodedDataConsumer * it doesn't get confused. * */ - ColumnVector cv = cvb.cols[idx]; - cv.reset(); - cv.ensureSize(batchSize, false); + TreeReader reader = columnReaders[idx]; + ColumnVector cv = prepareColumnVector(cvb, idx, batchSize); reader.nextVector(cv, null, batchSize); } @@ -218,6 +210,19 @@ public class OrcEncodedDataConsumer }
[hive] branch master updated: HIVE-23580 : deleteOnExit set is not cleaned up, causing memory pressure (Attila Magyar via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new fecad5b HIVE-23580 : deleteOnExit set is not cleaned up, causing memory pressure (Attila Magyar via Ashutosh Chauhan) fecad5b is described below commit fecad5b0f72c535ed1c53f2cc62b0d6649b651ae Author: Attila Magyar AuthorDate: Sat Jun 13 13:47:38 2020 -0700 HIVE-23580 : deleteOnExit set is not cleaned up, causing memory pressure (Attila Magyar via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- ql/src/java/org/apache/hadoop/hive/ql/Context.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index 318c207..a41c5c8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -678,6 +678,8 @@ public class Context { Path p = entry.getValue(); if (p.toUri().getPath().contains(stagingDir) && subDirOf(p, fsScratchDirs.values()) ) { LOG.debug("Skip deleting stagingDir: " + p); + FileSystem fs = p.getFileSystem(conf); + fs.cancelDeleteOnExit(p); continue; // staging dir is deleted when deleting the scratch dir } if(resultCacheDir == null || !p.toUri().getPath().contains(resultCacheDir)) {
[hive] branch master updated: HIVE-23666 : checkHashModeEfficiency is skipped when a groupby operator doesn't have a grouping set (Ramesh Kumar via Jesus Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 45f6efd HIVE-23666 : checkHashModeEfficiency is skipped when a groupby operator doesn't have a grouping set (Ramesh Kumar via Jesus Camacho Rodriguez) 45f6efd is described below commit 45f6efd994d6c1e34425821ac1878bdff0249500 Author: RAMESH KUMAR THANGARAJAN AuthorDate: Sat Jun 13 10:36:59 2020 -0700 HIVE-23666 : checkHashModeEfficiency is skipped when a groupby operator doesn't have a grouping set (Ramesh Kumar via Jesus Camacho Rodriguez) Signed-off-by: Ashutosh Chauhan --- .../hive/ql/exec/vector/VectorGroupByOperator.java | 23 +++--- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index d4d18ef..9f81e8e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -180,21 +180,20 @@ public class VectorGroupByOperator extends Operator if (!groupingSetsPresent) { doProcessBatch(batch, false, null); -return; - } - - // We drive the doProcessBatch logic with the same batch but different - // grouping set id and null variation. - // PERFORMANCE NOTE: We do not try to reuse columns and generate the KeyWrappers anew... + } else { +// We drive the doProcessBatch logic with the same batch but different +// grouping set id and null variation. +// PERFORMANCE NOTE: We do not try to reuse columns and generate the KeyWrappers anew... - final int size = groupingSets.length; - for (int i = 0; i < size; i++) { +final int size = groupingSets.length; +for (int i = 0; i < size; i++) { -// NOTE: We are overwriting the constant vector value... -groupingSetsDummyVectorExpression.setLongValue(groupingSets[i]); -groupingSetsDummyVectorExpression.evaluate(batch); + // NOTE: We are overwriting the constant vector value... + groupingSetsDummyVectorExpression.setLongValue(groupingSets[i]); + groupingSetsDummyVectorExpression.evaluate(batch); -doProcessBatch(batch, (i == 0), allGroupingSetsOverrideIsNulls[i]); + doProcessBatch(batch, (i == 0), allGroupingSetsOverrideIsNulls[i]); +} } if (this instanceof ProcessingModeHashAggregate) {
[hive] branch master updated: HIVE-23561: Fixing arrow serializer for Decimals with selected (Panos G via Mahesh Kumar)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 739cc43 HIVE-23561: Fixing arrow serializer for Decimals with selected (Panos G via Mahesh Kumar) 739cc43 is described below commit 739cc434505e4ee79e677cf39ff7fcee79527b2c Author: Panos Garefalakis AuthorDate: Thu May 28 14:22:49 2020 +0100 HIVE-23561: Fixing arrow serializer for Decimals with selected (Panos G via Mahesh Kumar) Change-Id: Ie92fe13f134c71d2510dd82a9cbee39fe90a2273 Signed-off-by: Ashutosh Chauhan --- .../hive/ql/io/arrow/ArrowColumnarBatchSerDe.java | 4 +- .../apache/hadoop/hive/ql/io/arrow/Serializer.java | 8 +- .../ql/io/arrow/TestArrowColumnarBatchSerDe.java | 160 - 3 files changed, 161 insertions(+), 11 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java index 0408707..4896bc4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.io.arrow; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.impl.UnionListWriter; @@ -97,7 +98,8 @@ public class ArrowColumnarBatchSerDe extends AbstractSerDe { StructObjectInspector rowObjectInspector; Configuration conf; - private Serializer serializer; + @VisibleForTesting + Serializer serializer; private Deserializer deserializer; @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java index d5a9b2c..5a79641 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.io.arrow; +import com.google.common.annotations.VisibleForTesting; import io.netty.buffer.ArrowBuf; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; @@ -44,8 +45,6 @@ import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.hadoop.conf.Configuration; -import org.apache.arrow.vector.util.DecimalUtility; -import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; @@ -105,7 +104,8 @@ public class Serializer { private final static byte[] EMPTY_BYTES = new byte[0]; // Hive columns - private final VectorizedRowBatch vectorizedRowBatch; + @VisibleForTesting + final VectorizedRowBatch vectorizedRowBatch; private final VectorAssignRow vectorAssignRow; private int batchSize; private BufferAllocator allocator; @@ -923,7 +923,7 @@ public class Serializer { final int scale = decimalVector.getScale(); decimalVector.set(i, ((DecimalColumnVector) hiveVector).vector[j].getHiveDecimal().bigDecimalValue().setScale(scale)); -final HiveDecimalWritable writable = ((DecimalColumnVector) hiveVector).vector[i]; +final HiveDecimalWritable writable = ((DecimalColumnVector) hiveVector).vector[j]; decimalHolder.precision = writable.precision(); decimalHolder.scale = scale; try (ArrowBuf arrowBuf = allocator.buffer(DecimalHolder.WIDTH)) { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java b/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java index be15197..d5aaa9e 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java @@ -203,7 +203,14 @@ public class TestArrowColumnarBatchSerDe { private void initAndSerializeAndDeserialize(String[][] schema, Object[][] rows) throws SerDeException { ArrowColumnarBatchSerDe serDe = new ArrowColumnarBatchSerDe(); StructObjectInspector rowOI = initSerDe(serDe, schema); -serializeAndDeserialize(serDe, rows, rowOI); +serializeAndDeserialize(serDe, rows, rowOI, null); + } + + private void initAndSerializeAndDeserializeSelected(String[][] schema, Object[][] rows, int[] selected) + throws SerDeException { +ArrowColumnarBatchSerDe serDe = new ArrowColumnarBatchSerDe(); +StructObjectInspector rowOI = initSerDe(serDe, schema); +serializeAndDeserialize(serDe, rows, rowOI, selected
[hive] branch master updated: HIVE-23435 : Full outer join result is missing rows (Mustafa Iman via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 779c42f HIVE-23435 : Full outer join result is missing rows (Mustafa Iman via Ashutosh Chauhan) 779c42f is described below commit 779c42fa3c73f41785eb3266f3438ab9e2a75032 Author: Mustafa Iman AuthorDate: Mon Jun 1 10:26:14 2020 -0700 HIVE-23435 : Full outer join result is missing rows (Mustafa Iman via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../hadoop/hive/ql/exec/CommonJoinOperator.java| 18 +++- .../hive/ql/exec/CommonMergeJoinOperator.java | 2 +- .../apache/hadoop/hive/ql/exec/JoinOperator.java | 1 + .../hadoop/hive/ql/exec/MapJoinOperator.java | 1 + .../hadoop/hive/ql/exec/SMBMapJoinOperator.java| 1 + .../ql/exec/vector/VectorMapJoinBaseOperator.java | 2 - .../ql/exec/vector/VectorSMBMapJoinOperator.java | 1 - .../clientpositive/vector_full_outer_join2.q | 28 + .../results/clientpositive/llap/join_1to1.q.out| 84 --- .../llap/vector_full_outer_join2.q.out | 113 + 10 files changed, 233 insertions(+), 18 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java index 2d76848..7a7c8a5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java @@ -341,6 +341,8 @@ public abstract class CommonJoinOperator extends forwardCache = new Object[totalSz]; aliasFilterTags = new short[numAliases]; Arrays.fill(aliasFilterTags, (byte)0xff); +aliasFilterTagsNext = new short[numAliases]; +Arrays.fill(aliasFilterTagsNext, (byte) 0xff); filterTags = new short[numAliases]; skipVectors = new boolean[numAliases][]; @@ -478,6 +480,7 @@ public abstract class CommonJoinOperator extends * 100, 30 : N, N */ protected transient short[] aliasFilterTags; + protected transient short[] aliasFilterTagsNext; // all evaluation should be processed here for valid aliasFilterTags // @@ -491,11 +494,21 @@ public abstract class CommonJoinOperator extends short filterTag = JoinUtil.isFiltered(row, joinFilters[alias], joinFilterObjectInspectors[alias], filterMaps[alias]); nr.add(new ShortWritable(filterTag)); - aliasFilterTags[alias] &= filterTag; } return nr; } + protected void addToAliasFilterTags(byte alias, List object, boolean isNextGroup) { +boolean hasFilter = hasFilter(alias); +if (hasFilter) { + if (isNextGroup) { +aliasFilterTagsNext[alias] &= ((ShortWritable) (object.get(object.size() - 1))).get(); + } else { +aliasFilterTags[alias] &= ((ShortWritable) (object.get(object.size() - 1))).get(); + } +} + } + // fill forwardCache with skipvector // returns whether a record was forwarded private boolean createForwardJoinObject(boolean[] skip) throws HiveException { @@ -961,7 +974,8 @@ public abstract class CommonJoinOperator extends genJoinObject(); } } -Arrays.fill(aliasFilterTags, (byte)0xff); +System.arraycopy(aliasFilterTagsNext, 0, aliasFilterTags, 0, aliasFilterTagsNext.length); +Arrays.fill(aliasFilterTagsNext, (byte) 0xff); } protected void reportProgress() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java index 581577e..0d9dc46 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java @@ -46,7 +46,6 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; @@ -242,6 +241,7 @@ public class CommonMergeJoinOperator extends AbstractMapJoinOperator implements Serial alias = (byte) tag; List nr = getFilteredValue(alias, row); + addToAliasFilterTags(alias, nr, false); if (handleSkewJoin) { skewJoinKeyContext.handleSkew(tag); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index 07b1fba..489d09f 100644 --- a/ql/src/java/org/apache
[hive] branch master updated: HIVE-23488 : Optimise PartitionManagementTask::Msck::repair (Rajesh Balamohan via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new a3a25eb HIVE-23488 : Optimise PartitionManagementTask::Msck::repair (Rajesh Balamohan via Ashutosh Chauhan) a3a25eb is described below commit a3a25ebb7e65d3348213a2e79f629eaffd9605c6 Author: Rajesh Balamohan AuthorDate: Wed May 27 11:14:52 2020 -0700 HIVE-23488 : Optimise PartitionManagementTask::Msck::repair (Rajesh Balamohan via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../hive/ql/metadata/TestHiveMetaStoreChecker.java | 48 +++--- .../hive/metastore/HiveMetaStoreChecker.java | 26 ++-- .../org/apache/hadoop/hive/metastore/Msck.java | 2 +- 3 files changed, 39 insertions(+), 37 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java index 520eb1b..198fb81 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java @@ -116,7 +116,7 @@ public class TestHiveMetaStoreChecker { @Test public void testTableCheck() throws HiveException, IOException, TException, MetastoreException { CheckResult result = new CheckResult(); -checker.checkMetastore(catName, dbName, null, null, result); +checker.checkMetastore(catName, dbName, null, null, null, result); // we haven't added anything so should return an all ok assertEquals(Collections.emptySet(), result.getTablesNotInMs()); assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); @@ -125,7 +125,7 @@ public class TestHiveMetaStoreChecker { // check table only, should not exist in ms result = new CheckResult(); -checker.checkMetastore(catName, dbName, tableName, null, result); +checker.checkMetastore(catName, dbName, tableName, null, null, result); assertEquals(1, result.getTablesNotInMs().size()); assertEquals(tableName, result.getTablesNotInMs().iterator().next()); assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); @@ -148,7 +148,7 @@ public class TestHiveMetaStoreChecker { // now we've got a table, check that it works // first check all (1) tables result = new CheckResult(); -checker.checkMetastore(catName, dbName, null, null, result); +checker.checkMetastore(catName, dbName, null, null, null, result); assertEquals(Collections.emptySet(), result.getTablesNotInMs()); assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); @@ -156,7 +156,7 @@ public class TestHiveMetaStoreChecker { // then let's check the one we know about result = new CheckResult(); -checker.checkMetastore(catName, dbName, tableName, null, result); +checker.checkMetastore(catName, dbName, tableName, null, null, result); assertEquals(Collections.emptySet(), result.getTablesNotInMs()); assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); @@ -168,7 +168,7 @@ public class TestHiveMetaStoreChecker { // now this shouldn't find the path on the fs result = new CheckResult(); -checker.checkMetastore(catName, dbName, tableName, null, result); +checker.checkMetastore(catName, dbName, tableName, null, null, result); assertEquals(Collections.emptySet(), result.getTablesNotInMs()); assertEquals(1, result.getTablesNotOnFs().size()); assertEquals(tableName, result.getTablesNotOnFs().iterator().next()); @@ -184,7 +184,7 @@ public class TestHiveMetaStoreChecker { // find the extra table result = new CheckResult(); -checker.checkMetastore(catName, dbName, null, null, result); +checker.checkMetastore(catName, dbName, null, null, null, result); assertEquals(1, result.getTablesNotInMs().size()); assertEquals(fakeTable.getName(), Lists.newArrayList(result.getTablesNotInMs()).get(0)); assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); @@ -198,7 +198,7 @@ public class TestHiveMetaStoreChecker { // should return all ok result = new CheckResult(); -checker.checkMetastore(catName, dbName, null, null, result); +checker.checkMetastore(catName, dbName, null, null, null, result); assertEquals(Collections.emptySet(), result.getTablesNotInMs()); assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); @@ -222,7 +222,7 @@ public class TestHiveMetaStoreChecker { fs.mkdirs(fakePart); fs.deleteOnExit(fakePart); CheckResult result = new CheckResult
[hive] branch master updated: HIVE-23487 : Optimise PartitionManagementTask (Rajesh Balamohan via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 65e947e HIVE-23487 : Optimise PartitionManagementTask (Rajesh Balamohan via Ashutosh Chauhan) 65e947e is described below commit 65e947eec05304ed5b9441a374c240cd9a53d30a Author: Rajesh Balamohan AuthorDate: Tue May 26 22:33:33 2020 -0700 HIVE-23487 : Optimise PartitionManagementTask (Rajesh Balamohan via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../hadoop/hive/ql/ddl/misc/msck/MsckOperation.java | 2 +- .../hive/ql/exec/TestMsckCreatePartitionsInBatches.java | 2 +- .../hive/ql/exec/TestMsckDropPartitionsInBatches.java| 2 +- .../main/java/org/apache/hadoop/hive/metastore/Msck.java | 16 ++-- .../hadoop/hive/metastore/PartitionManagementTask.java | 7 --- 5 files changed, 17 insertions(+), 12 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/msck/MsckOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/msck/MsckOperation.java index c05d699..3ffc58f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/msck/MsckOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/msck/MsckOperation.java @@ -52,7 +52,7 @@ public class MsckOperation extends DDLOperation { public int execute() throws HiveException, IOException, TException { try { Msck msck = new Msck(false, false); - msck.init(context.getDb().getConf()); + msck.init(Msck.getMsckConf(context.getDb().getConf())); String[] names = Utilities.getDbTableName(desc.getTableName()); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java index 7821f40..869249a 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java @@ -83,7 +83,7 @@ public class TestMsckCreatePartitionsInBatches { throw new HiveException(e); } msck = new Msck( false, false); -msck.init(hiveConf); +msck.init(Msck.getMsckConf(hiveConf)); } @Before diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java index 8be3112..e7318bf 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java @@ -80,7 +80,7 @@ public class TestMsckDropPartitionsInBatches { SessionState.start(hiveConf); db = new HiveMetaStoreClient(hiveConf); msck = new Msck( false, false); -msck.init(hiveConf); +msck.init(Msck.getMsckConf(hiveConf)); } @Before diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java index f4e109d..59a96e8 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java @@ -84,15 +84,19 @@ public class Msck { public void init(Configuration conf) throws MetaException { if (msc == null) { - // the only reason we are using new conf here is to override EXPRESSION_PROXY_CLASS - Configuration metastoreConf = MetastoreConf.newMetastoreConf(new Configuration(conf)); - metastoreConf.set(MetastoreConf.ConfVars.EXPRESSION_PROXY_CLASS.getVarname(), -MsckPartitionExpressionProxy.class.getCanonicalName()); - setConf(metastoreConf); - this.msc = new HiveMetaStoreClient(metastoreConf); + setConf(conf); + this.msc = new HiveMetaStoreClient(conf); } } + public static Configuration getMsckConf(Configuration conf) { +// the only reason we are using new conf here is to override EXPRESSION_PROXY_CLASS +Configuration metastoreConf = MetastoreConf.newMetastoreConf(new Configuration(conf)); + metastoreConf.set(MetastoreConf.ConfVars.EXPRESSION_PROXY_CLASS.getVarname(), +MsckPartitionExpressionProxy.class.getCanonicalName()); +return metastoreConf; + } + /** * MetastoreCheck, see if the data in the metastore matches what is on the * dfs. Current version checks for tables and partitions that are either diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java index e4488f4..612ac87 100644 --- a/standalone
[hive] branch master updated: HIVE-23281 : ObjectStore::convertToStorageDescriptor can be optimised to reduce calls to DB for ACID tables (Ramesh Kumar, Rajesh Balamohan via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 6aeb63a HIVE-23281 : ObjectStore::convertToStorageDescriptor can be optimised to reduce calls to DB for ACID tables (Ramesh Kumar, Rajesh Balamohan via Ashutosh Chauhan) 6aeb63a is described below commit 6aeb63a2becec1d2eeae8e144da1cbaf84546d72 Author: Ashutosh Chauhan AuthorDate: Sun May 24 23:14:55 2020 -0700 HIVE-23281 : ObjectStore::convertToStorageDescriptor can be optimised to reduce calls to DB for ACID tables (Ramesh Kumar, Rajesh Balamohan via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../org/apache/hadoop/hive/ql/metadata/Table.java | 2 + .../hive/ql/parse/ImportSemanticAnalyzer.java | 9 ++- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 19 ++--- .../exim_09_nonpart_noncompat_serdeparam.q.out | 2 +- .../clientpositive/llap/acid_bucket_pruning.q.out | 2 - .../clientpositive/llap/acid_no_buckets.q.out | 4 - .../clientpositive/llap/acid_nullscan.q.out| 2 - .../results/clientpositive/llap/acid_stats2.q.out | 18 - .../results/clientpositive/llap/acid_stats5.q.out | 12 --- .../clientpositive/llap/acid_table_stats.q.out | 12 --- .../clientpositive/llap/autoColumnStats_4.q.out| 6 -- .../clientpositive/llap/check_constraint.q.out | 18 - .../llap/create_transactional_full_acid.q.out | 2 - .../llap/create_transactional_insert_only.q.out| 2 - .../clientpositive/llap/default_constraint.q.out | 24 -- .../insert_values_orig_table_use_metadata.q.out| 10 --- .../test/results/clientpositive/llap/mm_all.q.out | 6 -- .../test/results/clientpositive/llap/mm_bhif.q.out | 44 +++ .../results/clientpositive/llap/mm_default.q.out | 12 --- .../test/results/clientpositive/llap/mm_exim.q.out | 4 - .../llap/murmur_hash_migration2.q.out | 2 - .../clientpositive/llap/sqlmerge_stats.q.out | 10 --- .../clientpositive/llap/stats_nonpart.q.out| 4 - .../results/clientpositive/llap/stats_part.q.out | 10 --- .../results/clientpositive/llap/stats_part2.q.out | 30 .../clientpositive/llap/stats_sizebug.q.out| 4 - .../hadoop/hive/metastore/MetaStoreDirectSql.java | 43 +++ .../apache/hadoop/hive/metastore/ObjectStore.java | 87 -- 28 files changed, 127 insertions(+), 273 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 8805eee..61b9fb8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -209,6 +209,8 @@ public class Table implements Serializable { // We have to use MetadataTypedColumnsetSerDe because LazySimpleSerDe does // not support a table with no columns. sd.getSerdeInfo().setSerializationLib(MetadataTypedColumnsetSerDe.class.getName()); + //TODO setting serializaton format here is hacky. Only lazy simple serde needs it + // so should be set by serde only. Setting it here sets it unconditionally. sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); sd.setInputFormat(SequenceFileInputFormat.class.getName()); sd.setOutputFormat(HiveSequenceFileOutputFormat.class.getName()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index c21c6f1..cd92247 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -934,15 +934,18 @@ public class ImportSemanticAnalyzer extends BaseSemanticAnalyzer { .getSerdeParam(serdeConstants.SERIALIZATION_FORMAT); String importedSerdeFormat = tableDesc.getSerdeProps().get( serdeConstants.SERIALIZATION_FORMAT); - /* + + /* TODO : Remove this weirdity. See notes in Table.getEmptyTable() * If Imported SerdeFormat is null, then set it to "1" just as * metadata.Table.getEmptyTable */ importedSerdeFormat = importedSerdeFormat == null ? "1" : importedSerdeFormat; - if (!ObjectUtils.equals(existingSerdeFormat, importedSerdeFormat)) { + if (!TxnUtils.isTransactionalTable(table.getParameters()) && + !ObjectUtils.equals(existingSerdeFormat, importedSerdeFormat)) { throw new SemanticException( ErrorMsg.INCOMPATIBLE_SCHEMA -.getMsg(" Table Serde format does not match")); +.getMsg(" Table Serde format does not match. Imported :" ++ &quo
[hive] branch master updated: HIVE-23214 Get rid of skipCorrupt as part of ORC read pipeline (Panos Garefalakis via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new f712def HIVE-23214 Get rid of skipCorrupt as part of ORC read pipeline (Panos Garefalakis via Ashutosh Chauhan) f712def is described below commit f712def65b716ba6646828ed8f8be4464abbedc8 Author: Panos Garefalakis AuthorDate: Mon May 18 16:51:15 2020 +0100 HIVE-23214 Get rid of skipCorrupt as part of ORC read pipeline (Panos Garefalakis via Ashutosh Chauhan) Change-Id: Ic1efd6dcffc71adfa1ac3059ceacbd3f30e6ef7e Signed-off-by: Ashutosh Chauhan --- .../hive/llap/io/decode/GenericColumnVectorProducer.java | 3 +-- .../hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java| 5 + .../hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java | 10 +++--- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java index 1617692..1c7e537 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java @@ -85,8 +85,7 @@ public class GenericColumnVectorProducer implements ColumnVectorProducer { SchemaEvolutionFactory sef, InputFormat sourceInputFormat, Deserializer sourceSerDe, Reporter reporter, JobConf job, Map parts) throws IOException { cacheMetrics.incrCacheReadRequests(); -OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer( -consumer, includes, false, counters, ioMetrics); +OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(consumer, includes, counters, ioMetrics); SerDeFileMetadata fm; try { fm = new SerDeFileMetadata(sourceSerDe); diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java index 17c4821..50abdfd 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java @@ -56,7 +56,6 @@ public class OrcColumnVectorProducer implements ColumnVectorProducer { private final LowLevelCache lowLevelCache; private final BufferUsageManager bufferManager; private final Configuration conf; - private boolean _skipCorrupt; // TODO: get rid of this private LlapDaemonCacheMetrics cacheMetrics; private LlapDaemonIOMetrics ioMetrics; // TODO: if using in multiple places, e.g. SerDe cache, pass this in. @@ -73,7 +72,6 @@ public class OrcColumnVectorProducer implements ColumnVectorProducer { this.lowLevelCache = lowLevelCache; this.bufferManager = bufferManager; this.conf = conf; -this._skipCorrupt = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf); this.cacheMetrics = cacheMetrics; this.ioMetrics = ioMetrics; this.tracePool = tracePool; @@ -90,8 +88,7 @@ public class OrcColumnVectorProducer implements ColumnVectorProducer { InputFormat unused0, Deserializer unused1, Reporter reporter, JobConf job, Map parts) throws IOException { cacheMetrics.incrCacheReadRequests(); -OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer( -consumer, includes, _skipCorrupt, counters, ioMetrics); +OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(consumer, includes, counters, ioMetrics); OrcEncodedDataReader reader = new OrcEncodedDataReader(lowLevelCache, bufferManager, metadataCache, conf, job, split, includes, sarg, edc, counters, sef, tracePool, parts); edc.init(reader, reader, reader.getTrace()); diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java index b697a0d..79dba42 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java @@ -70,20 +70,16 @@ public class OrcEncodedDataConsumer private ConsumerFileMetadata fileMetadata; // We assume one request is only for one file. private CompressionCodec codec; private List stripes; - private final boolean skipCorrupt; // TODO: get rid of this private SchemaEvolution evolution; private IoTrace trace; private final Includes includes; private TypeDescription[] batchSchemas; private boolean useDecimal64ColumnVectors; - public OrcEncodedDataConsumer( -Consumer consumer, Includes includes, boolean skipCorrupt
[hive] branch master updated: HIVE-21971 : HS2 leaks classloader due to `ReflectionUtils::CONSTRUCTOR_CACHE` with temporary functions + GenericUDF (Rajesh Balamohan via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new bd84b5c HIVE-21971 : HS2 leaks classloader due to `ReflectionUtils::CONSTRUCTOR_CACHE` with temporary functions + GenericUDF (Rajesh Balamohan via Ashutosh Chauhan) bd84b5c is described below commit bd84b5cfdc0423463331935f85cbbed50b364e4b Author: Rajesh Balamohan AuthorDate: Tue May 26 16:38:59 2020 -0700 HIVE-21971 : HS2 leaks classloader due to `ReflectionUtils::CONSTRUCTOR_CACHE` with temporary functions + GenericUDF (Rajesh Balamohan via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../hadoop/hive/ql/session/SessionState.java | 22 ++ .../hadoop/hive/ql/session/TestSessionState.java | 51 ++ 2 files changed, 73 insertions(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index 55bd27e..20f352e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; import java.lang.management.ManagementFactory; +import java.lang.reflect.Method; import java.net.URI; import java.net.URISyntaxException; import java.security.AccessController; @@ -106,6 +107,7 @@ import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.ReflectionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -1808,6 +1810,26 @@ public class SessionState implements ISessionAuthState{ Hive.closeCurrent(); } progressMonitor = null; +// Hadoop's ReflectionUtils caches constructors for the classes it instantiated. +// In UDFs, this can result in classloaders not getting GCed for a temporary function, +// resulting in a PermGen leak when used extensively from HiveServer2 +// There are lots of places where hadoop's ReflectionUtils is still used. Until all of them are +// cleared up, we would have to retain this to avoid mem leak. +clearReflectionUtilsCache(); + } + + private void clearReflectionUtilsCache() { +Method clearCacheMethod; +try { + clearCacheMethod = ReflectionUtils.class.getDeclaredMethod("clearCache"); + if (clearCacheMethod != null) { +clearCacheMethod.setAccessible(true); +clearCacheMethod.invoke(null); +LOG.debug("Cleared Hadoop ReflectionUtils CONSTRUCTOR_CACHE"); + } +} catch (Exception e) { + LOG.info("Failed to clear up Hadoop ReflectionUtils CONSTRUCTOR_CACHE", e); +} } private void unCacheDataNucleusClassLoaders() { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java b/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java index 0fa1c81..4c374e8 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java @@ -24,9 +24,12 @@ import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.Field; import java.lang.reflect.Method; import java.util.Arrays; import java.util.Collection; +import java.util.Map; import org.apache.commons.io.FileUtils; import org.apache.hadoop.fs.FileSystem; @@ -34,6 +37,12 @@ import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.ParentNotDirectoryException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.util.ReflectionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; @@ -221,6 +230,48 @@ public class TestSessionState { } } + static class DummyUDF extends GenericUDF { + +@Override public ObjectInspector initialize(ObjectInspector[] arguments) +throws UDFArgumentException { + return PrimitiveObjectInspectorFactory.javaStringObjectInspector; +} + +@Override public Object evaluate(DeferredObject[] arguments) throws HiveException { + return "dummy"; +} + +@Override public String getDisplayString(S
[hive] branch master updated: HIVE-23447 : Avoid sending configs to tasks and AM which are only relevant for HS2
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 4e70a13 HIVE-23447 : Avoid sending configs to tasks and AM which are only relevant for HS2 4e70a13 is described below commit 4e70a1397504929df8fee227b57e4171e5688edc Author: Ashutosh Chauhan AuthorDate: Mon May 11 22:05:13 2020 -0700 HIVE-23447 : Avoid sending configs to tasks and AM which are only relevant for HS2 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index 97220c0..d21aa45 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -1396,6 +1396,9 @@ public class DagUtils { // TODO: convert this to a predicate too hiveConf.stripHiddenConfigurations(conf); + +// Remove hive configs which are used only in HS2 and not needed for execution + conf.unset(ConfVars.HIVE_AUTHORIZATION_SQL_STD_AUTH_CONFIG_WHITELIST.varname); return conf; }
[hive] branch master updated: HIVE-23536 : Provide an option to skip stats generation for major compaction (Peter Vary via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 947b7a4 HIVE-23536 : Provide an option to skip stats generation for major compaction (Peter Vary via Ashutosh Chauhan) 947b7a4 is described below commit 947b7a44896fa57bc4e2ddaa6014cc4cb2c7002e Author: Peter Vary AuthorDate: Mon May 25 16:13:32 2020 -0700 HIVE-23536 : Provide an option to skip stats generation for major compaction (Peter Vary via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 6 +++ .../hive/ql/txn/compactor/TestCompactor.java | 52 ++ .../hadoop/hive/ql/txn/compactor/CompactorMR.java | 5 ++- .../hadoop/hive/ql/txn/compactor/Worker.java | 8 ++-- 4 files changed, 67 insertions(+), 4 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index a00d907..8094d28 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2908,6 +2908,12 @@ public class HiveConf extends Configuration { HIVE_COMPACTOR_WAIT_TIMEOUT("hive.compactor.wait.timeout", 30L, "Time out in " + "milliseconds for blocking compaction. It's value has to be higher than 2000 milliseconds. "), + +HIVE_MR_COMPACTOR_GATHER_STATS("hive.mr.compactor.gather.stats", true, "If set to true MAJOR compaction " + +"will gather stats if there are stats already associated with the table/partition.\n" + +"Turn this off to save some resources and the stats are not used anyway.\n" + +"Works only for MR based compaction, CRUD based compaction uses hive.stats.autogather."), + /** * @deprecated Use MetastoreConf.COMPACTOR_INITIATOR_FAILED_THRESHOLD */ diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java index c687f14..32fe535 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java @@ -129,6 +129,7 @@ public class TestCompactor { hiveConf.setVar(HiveConf.ConfVars.POSTEXECHOOKS, ""); hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, TEST_WAREHOUSE_DIR); hiveConf.setVar(HiveConf.ConfVars.HIVEINPUTFORMAT, HiveInputFormat.class.getName()); +hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false); TxnDbUtil.setConfValues(hiveConf); TxnDbUtil.cleanDb(hiveConf); @@ -1468,6 +1469,57 @@ public class TestCompactor { } } + @Test + public void testCompactorGatherStats() throws Exception { +String dbName = "default"; +String tableName = "stats_comp_test"; +List colNames = Arrays.asList("a"); +executeStatementOnDriver("drop table if exists " + dbName + "." + tableName, driver); +executeStatementOnDriver("create table " + dbName + "." + tableName + +" (a INT) STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver); +executeStatementOnDriver("insert into " + dbName + "." + tableName + " values(1)", driver); +executeStatementOnDriver("insert into " + dbName + "." + tableName + " values(1)", driver); + +TxnStore txnHandler = TxnUtils.getTxnStore(conf); +txnHandler.compact(new CompactionRequest(dbName, tableName, CompactionType.MAJOR)); +runWorker(conf); + +// Make sure we do not have statistics for this table yet +// Compaction generates stats only if there is any +List colStats = msClient.getTableColumnStatistics(dbName, +tableName, colNames, Constants.HIVE_ENGINE); +assertEquals("No stats should be there for the table", 0, colStats.size()); + +executeStatementOnDriver("analyze table " + dbName + "." + tableName + " compute statistics for columns", driver); +executeStatementOnDriver("insert into " + dbName + "." + tableName + " values(2)", driver); + +// Make sure we have old statistics for the table +colStats = msClient.getTableColumnStatistics(dbName, tableName, colNames, Constants.HIVE_ENGINE); +assertEquals("Stats should be there", 1, colStats.size()); +assertEquals("Value should contain old data", 1, colStats.get(0).getStatsData().getLongStats().getHighValue()); +assertEquals("V
[hive] branch master updated: HIVE-23535 : Bump Minimum Required Version of Maven to 3.0.5 (David Mollitor via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new f76df73 HIVE-23535 : Bump Minimum Required Version of Maven to 3.0.5 (David Mollitor via Ashutosh Chauhan) f76df73 is described below commit f76df736d5461c0b6466432522498ca85bd4b240 Author: David Mollitor AuthorDate: Mon May 25 16:04:23 2020 -0700 HIVE-23535 : Bump Minimum Required Version of Maven to 3.0.5 (David Mollitor via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- pom.xml | 3 --- 1 file changed, 3 deletions(-) diff --git a/pom.xml b/pom.xml index 60f5f13..b4b41ea 100644 --- a/pom.xml +++ b/pom.xml @@ -26,9 +26,6 @@ Hive https://hive.apache.org - -2.2.1 - storage-api
[hive] branch master updated: HIVE-23480: use the JsonPropertyOrder annotation to ensure the ordering of the serialized properties. (Panos G via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new ef7a9de HIVE-23480: use the JsonPropertyOrder annotation to ensure the ordering of the serialized properties. (Panos G via Ashutosh Chauhan) ef7a9de is described below commit ef7a9de93523051bfa16d753a3528a9a8457ac57 Author: Panos Garefalakis AuthorDate: Mon May 18 16:32:19 2020 +0100 HIVE-23480: use the JsonPropertyOrder annotation to ensure the ordering of the serialized properties. (Panos G via Ashutosh Chauhan) Change-Id: I2f2b3f1d9eec1e26b5b6e445efe6f0106f4ea15d Signed-off-by: Ashutosh Chauhan --- .../src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java| 2 ++ 1 file changed, 2 insertions(+) diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java index eed1428..cf88474 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -32,6 +32,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonPropertyOrder; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonProcessingException; @@ -167,6 +168,7 @@ public class StatsSetupConst { COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE, ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS, NUM_ERASURE_CODED_FILES); + @JsonPropertyOrder({"basicStats", "columnStats"}) private static class ColumnStatsAccurate { private static ObjectReader objectReader; private static ObjectWriter objectWriter;
[hive] branch master updated: HIVE-23529 : CTAS is broken for uniontype when row_deserialize (Mustafa Iman via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 3015ff2 HIVE-23529 : CTAS is broken for uniontype when row_deserialize (Mustafa Iman via Ashutosh Chauhan) 3015ff2 is described below commit 3015ff26ad0761c6332c6935ec83436d46bde561 Author: Mustafa Iman AuthorDate: Sun May 24 20:54:24 2020 -0700 HIVE-23529 : CTAS is broken for uniontype when row_deserialize (Mustafa Iman via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- data/files/data_with_union.txt | 1 + .../test/resources/testconfiguration.properties| 1 + .../clientpositive/row_deserialize_with_union.q| 17 .../llap/row_deserialize_with_union.q.out | 45 ++ .../objectinspector/ObjectInspectorUtils.java | 5 ++- .../TestStandardObjectInspectors.java | 12 +++--- 6 files changed, 73 insertions(+), 8 deletions(-) diff --git a/data/files/data_with_union.txt b/data/files/data_with_union.txt new file mode 100644 index 000..dd1e7f9 --- /dev/null +++ b/data/files/data_with_union.txt @@ -0,0 +1 @@ +0102111foo diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 7a91eef..e1ba435 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -745,6 +745,7 @@ spark.query.files=\ rcfile_bigdata.q,\ reduce_deduplicate_exclude_join.q,\ router_join_ppr.q,\ + row_deserialize_with_union.q,\ runtime_skewjoin_mapjoin_spark.q,\ sample1.q,\ sample10.q,\ diff --git a/ql/src/test/queries/clientpositive/row_deserialize_with_union.q b/ql/src/test/queries/clientpositive/row_deserialize_with_union.q new file mode 100644 index 000..cb7d2aa --- /dev/null +++ b/ql/src/test/queries/clientpositive/row_deserialize_with_union.q @@ -0,0 +1,17 @@ +SET hive.vectorized.execution.enabled=true; +set hive.vectorized.use.row.serde.deserialize=true; +set hive.vectorized.use.vector.serde.deserialize=false; +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/data_with_union/; +dfs -copyFromLocal ../../data/files/data_with_union.txt ${system:test.tmp.dir}/data_with_union/data_with_union.txt; + +CREATE EXTERNAL TABLE data_with_union( + unionfield uniontype, + arrayfield array, + mapfield map, + structfield struct<`sf1`:int, `sf2`:string>) +stored as textfile +location '${system:test.tmp.dir}/data_with_union'; + +create table data_with_union_2 as select * from data_with_union; + +select * from data_with_union_2; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/row_deserialize_with_union.q.out b/ql/src/test/results/clientpositive/llap/row_deserialize_with_union.q.out new file mode 100644 index 000..cc0ba5c --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/row_deserialize_with_union.q.out @@ -0,0 +1,45 @@ +PREHOOK: query: CREATE EXTERNAL TABLE data_with_union( + unionfield uniontype, + arrayfield array, + mapfield map, + structfield struct<`sf1`:int, `sf2`:string>) +stored as textfile + A masked pattern was here +PREHOOK: type: CREATETABLE + A masked pattern was here +PREHOOK: Output: database:default +PREHOOK: Output: default@data_with_union +POSTHOOK: query: CREATE EXTERNAL TABLE data_with_union( + unionfield uniontype, + arrayfield array, + mapfield map, + structfield struct<`sf1`:int, `sf2`:string>) +stored as textfile + A masked pattern was here +POSTHOOK: type: CREATETABLE + A masked pattern was here +POSTHOOK: Output: database:default +POSTHOOK: Output: default@data_with_union +PREHOOK: query: create table data_with_union_2 as select * from data_with_union +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@data_with_union +PREHOOK: Output: database:default +PREHOOK: Output: default@data_with_union_2 +POSTHOOK: query: create table data_with_union_2 as select * from data_with_union +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@data_with_union +POSTHOOK: Output: database:default +POSTHOOK: Output: default@data_with_union_2 +POSTHOOK: Lineage: data_with_union_2.arrayfield SIMPLE [(data_with_union)data_with_union.FieldSchema(name:arrayfield, type:array, comment:null), ] +POSTHOOK: Lineage: data_with_union_2.mapfield SIMPLE [(data_with_union)data_with_union.FieldSchema(name:mapfield, type:map, comment:null), ] +POSTHOOK: Lineage: data_with_union_2.structfield SIMPLE [(data_with_union)data_with_union.FieldSchema(name:structfield, type:struct, comment:null), ] +POSTHOOK: Lineage: data_with_union_2.unionfield SIMPLE [(data_with_union)data_with_union.FieldSchema(name:unionfield, type:uniontype, comment:null), ] +PREHOOK: query: select * from data_with_unio
[hive] branch master updated: HIVE-23494 : Upgrade Apache parent POM to version 23 (David Mollitor via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 270ca80 HIVE-23494 : Upgrade Apache parent POM to version 23 (David Mollitor via Ashutosh Chauhan) 270ca80 is described below commit 270ca800353458ebce6eb262781bd39b15f5e349 Author: David Mollitor AuthorDate: Sun May 24 20:38:48 2020 -0700 HIVE-23494 : Upgrade Apache parent POM to version 23 (David Mollitor via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- pom.xml | 2 +- standalone-metastore/metastore-common/pom.xml | 1 - standalone-metastore/metastore-server/pom.xml | 3 --- .../metastore-tools/metastore-benchmarks/pom.xml| 2 -- standalone-metastore/metastore-tools/pom.xml| 17 ++--- .../metastore-tools/tools-common/pom.xml| 2 -- standalone-metastore/pom.xml| 18 -- storage-api/pom.xml | 21 +++-- testutils/pom.xml | 2 -- upgrade-acid/pom.xml| 8 ++-- 10 files changed, 16 insertions(+), 60 deletions(-) diff --git a/pom.xml b/pom.xml index 7515b4c..60f5f13 100644 --- a/pom.xml +++ b/pom.xml @@ -17,7 +17,7 @@ org.apache apache -21 +23 org.apache.hive hive diff --git a/standalone-metastore/metastore-common/pom.xml b/standalone-metastore/metastore-common/pom.xml index e58c748..1938dce 100644 --- a/standalone-metastore/metastore-common/pom.xml +++ b/standalone-metastore/metastore-common/pom.xml @@ -415,7 +415,6 @@ org.apache.maven.plugins maven-antrun-plugin - ${maven.antrun.plugin.version} ant-contrib diff --git a/standalone-metastore/metastore-server/pom.xml b/standalone-metastore/metastore-server/pom.xml index ea5f905..adc98d4 100644 --- a/standalone-metastore/metastore-server/pom.xml +++ b/standalone-metastore/metastore-server/pom.xml @@ -411,7 +411,6 @@ org.apache.maven.plugins maven-antrun-plugin - ${maven.antrun.plugin.version} ant-contrib @@ -574,7 +573,6 @@ org.apache.maven.plugins maven-failsafe-plugin -2.20.1 @@ -634,7 +632,6 @@ org.apache.rat apache-rat-plugin -0.10 binary-package-licenses/** diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml b/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml index 06a4d8d..612ff6f 100644 --- a/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml +++ b/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml @@ -176,8 +176,6 @@ org.apache.maven.plugins maven-compiler-plugin - 1.8 - 1.8 javac-with-errorprone true diff --git a/standalone-metastore/metastore-tools/pom.xml b/standalone-metastore/metastore-tools/pom.xml index f831a2e..b482c15 100644 --- a/standalone-metastore/metastore-tools/pom.xml +++ b/standalone-metastore/metastore-tools/pom.xml @@ -15,16 +15,15 @@ http://maven.apache.org/POM/4.0.0; xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance; xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd;> + 4.0.0 hive-standalone-metastore -org.apache.hive 4.0.0-SNAPSHOT +org.apache.hive - 4.0.0 hive-metastore-tools Hive Metastore Tools - pom @@ -34,9 +33,6 @@ 4.0.0-SNAPSHOT -2.20.1 -UTF-8 -UTF-8 2.8 2.3.1 3.1.0 @@ -149,15 +145,6 @@ - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.7.0 - - - diff --git a/standalone-metastore/metastore-tools/tools-common/pom.xml b/standalone-metastore/metastore-tools/tools-common/pom.xml index 44f02b7..7403b13 100644 --- a/standalone-metastore/metastore-tools/tools-common/pom.xml +++ b/standalone-metastore/metastore-tools/tools-common/pom.xml @@ -74,8 +74,6 @@ org.apache.maven.plugins maven-compiler-plugin - 1.8 - 1.8 javac-with-errorprone true diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml index ee2701e..577497b 100644 --- a/standalone-metastore/pom.xml +++ b/standalone-metastore/pom.xml @@ -16,30 +16,30 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance; xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://mav
[hive] branch master updated: HIVE-23501 : AOOB in VectorDeserializeRow when complex types are converted to primitive types (Ramesh Kumar via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 04b311d HIVE-23501 : AOOB in VectorDeserializeRow when complex types are converted to primitive types (Ramesh Kumar via Ashutosh Chauhan) 04b311d is described below commit 04b311d2ce52fb6ab6cf6fe4edb91cd0de970946 Author: RAMESH KUMAR THANGARAJAN AuthorDate: Wed May 20 22:21:07 2020 -0700 HIVE-23501 : AOOB in VectorDeserializeRow when complex types are converted to primitive types (Ramesh Kumar via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- data/files/arrayofIntdata/base_001/54_0| 1 + .../test/resources/testconfiguration.properties| 1 + .../hive/ql/exec/vector/VectorDeserializeRow.java | 6 +- .../clientpositive/vector_deserialize_row.q| 33 + .../llap/vector_deserialize_row.q.out | 145 + 5 files changed, 182 insertions(+), 4 deletions(-) diff --git a/data/files/arrayofIntdata/base_001/54_0 b/data/files/arrayofIntdata/base_001/54_0 new file mode 100644 index 000..a7a5893 --- /dev/null +++ b/data/files/arrayofIntdata/base_001/54_0 @@ -0,0 +1 @@ +test 1 5208187416695208186759785208123282775208126369095208128960765208133938905208135089945208135646875208135960535208135314935208136316075208136109305208136556885208136690205208137412015208137269385208137339875208137443045208137204395208137666885208137720445208137787775208137264825208137990955208138331935208138811555208139113415208139319435208137474495208138853255208139267335208139179105208137428265208139428805208139564045208139611825208139439395208 [...] diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index f48e433..e7c3e43 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -946,6 +946,7 @@ minillaplocal.query.files=\ vector_decimal64_case_when_nvl.q,\ vector_decimal64_case_when_nvl_cbo.q,\ vector_decimal64_multi_vertex.q,\ + vector_deserialize_row.q,\ vector_full_outer_join.q,\ vector_fullouter_mapjoin_1_fast.q,\ vector_fullouter_mapjoin_1_optimized.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java index 97166ec..6453069 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java @@ -846,8 +846,7 @@ public final class VectorDeserializeRow { // Ensure child size. final int childCapacity = listColVector.child.isNull.length; - final int childCount = listColVector.childCount; - if (childCapacity < childCount / 0.75) { + if (childCapacity < offset / 0.75) { listColVector.child.ensureSize(childCapacity * 2, true); } @@ -877,8 +876,7 @@ public final class VectorDeserializeRow { // Ensure child size. final int childCapacity = mapColVector.keys.isNull.length; - final int childCount = mapColVector.childCount; - if (childCapacity < childCount / 0.75) { + if (childCapacity < offset / 0.75) { mapColVector.keys.ensureSize(childCapacity * 2, true); mapColVector.values.ensureSize(childCapacity * 2, true); } diff --git a/ql/src/test/queries/clientpositive/vector_deserialize_row.q b/ql/src/test/queries/clientpositive/vector_deserialize_row.q new file mode 100644 index 000..38c8454 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_deserialize_row.q @@ -0,0 +1,33 @@ +CREATE external TABLE IF NOT EXISTS sessions +( +session_id string, +uid bigint, +uids array, +search_ids array, +total_views int, +datestamp date +) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +STORED AS TEXTFILE +LOCATION '../../data/files/arrayofIntdata'; + +CREATE TABLE IF NOT EXISTS sessions_orc +( +session_id string, +uid bigint, +uids array, +search_ids array, +total_views int, +datestamp date +); + +describe formatted sessions_orc; + +INSERT OVERWRITE TABLE sessions_orc +SELECT * FROM sessions; + +select count(1) from sessions_orc; +select count(1) from sessions; +drop table sessions; +drop table sessions_orc; + diff --git a/ql/src/test/results/clientpositive/llap/vector_deserialize_row.q.out b/ql/src/test/results/clientpositive/llap/vector_deserialize_row.q.out new file mode 100644 index 000..6b0e010 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_deserialize_row.q.out @@ -0,0 +1,145 @@ +PREHOOK: query: CREATE external TABLE IF NOT EXISTS sessions +( +session_id string, +uid bigint, +uids array, +search_ids array, +tot
[hive] branch master updated: HIVE-23476 : LLAP: Preallocate arenas for mmap case as well (Prasanth J via Gopal V)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 28bdeeb HIVE-23476 : LLAP: Preallocate arenas for mmap case as well (Prasanth J via Gopal V) 28bdeeb is described below commit 28bdeeb12e786c4830fdc1b138c7113dc1baa095 Author: Prasanth Jayachandran AuthorDate: Sun May 17 15:38:59 2020 -0700 HIVE-23476 : LLAP: Preallocate arenas for mmap case as well (Prasanth J via Gopal V) Signed-off-by: Ashutosh Chauhan --- .../src/java/org/apache/hadoop/hive/llap/cache/BuddyAllocator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/BuddyAllocator.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/BuddyAllocator.java index 341da25..6934dba 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/BuddyAllocator.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/BuddyAllocator.java @@ -215,7 +215,7 @@ public final class BuddyAllocator for (int i = 0; i < maxArenas; ++i) { arenas[i] = new Arena(); } -int initCount = doPreallocate && !isMapped ? maxArenas : 1; +int initCount = doPreallocate ? maxArenas : 1; for (int i = 0; i < initCount; ++i) { arenas[i].init(i); metrics.incrAllocatedArena();
[hive] branch master updated: HIVE-23443 : LLAP speculative task pre-emption seems to be not working (Prasanth J via Gopal V, Panos G)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new e85731c HIVE-23443 : LLAP speculative task pre-emption seems to be not working (Prasanth J via Gopal V, Panos G) e85731c is described below commit e85731c42b6485412deefccf85f17e3ae9e0f403 Author: Prasanth Jayachandran AuthorDate: Sun May 17 09:01:21 2020 -0700 HIVE-23443 : LLAP speculative task pre-emption seems to be not working (Prasanth J via Gopal V, Panos G) Signed-off-by: Ashutosh Chauhan --- .../hive/llap/daemon/impl/TaskExecutorService.java | 22 +++- .../llap/daemon/impl/TestTaskExecutorService.java | 134 + 2 files changed, 152 insertions(+), 4 deletions(-) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorService.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorService.java index d8b517d..1d6e852 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorService.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorService.java @@ -867,7 +867,8 @@ public class TaskExecutorService extends AbstractService return sc; } - private void finishableStateUpdated(TaskWrapper taskWrapper, boolean newFinishableState) { + @VisibleForTesting + void finishableStateUpdated(TaskWrapper taskWrapper, boolean newFinishableState) { synchronized (lock) { LOG.debug("Fragment {} guaranteed state changed to {}; finishable {}, in wait queue {}, " + "in preemption queue {}", taskWrapper.getRequestId(), taskWrapper.isGuaranteed(), @@ -884,10 +885,20 @@ public class TaskExecutorService extends AbstractService taskWrapper.updateCanFinishForPriority(newFinishableState); forceReinsertIntoQueue(taskWrapper, isRemoved); } else { -taskWrapper.updateCanFinishForPriority(newFinishableState); -if (!newFinishableState && !taskWrapper.isInPreemptionQueue()) { - // No need to check guaranteed here; if it was false we would already be in the queue. +// if speculative task, any finishable state change should re-order the queue as speculative tasks are always +// not-guaranteed (re-order helps put non-finishable's ahead of finishable) +if (!taskWrapper.isGuaranteed()) { + removeFromPreemptionQueue(taskWrapper); + taskWrapper.updateCanFinishForPriority(newFinishableState); addToPreemptionQueue(taskWrapper); +} else { + // if guaranteed task, if the finishable state changed to non-finishable and if the task doesn't exist + // pre-emption queue, then add it so that it becomes candidate to kill + taskWrapper.updateCanFinishForPriority(newFinishableState); + if (!newFinishableState && !taskWrapper.isInPreemptionQueue()) { +// No need to check guaranteed here; if it was false we would already be in the queue. +addToPreemptionQueue(taskWrapper); + } } } @@ -896,6 +907,9 @@ public class TaskExecutorService extends AbstractService } private void addToPreemptionQueue(TaskWrapper taskWrapper) { +if (taskWrapper.isInPreemptionQueue()) { + return; +} synchronized (lock) { insertIntoPreemptionQueueOrFailUnlocked(taskWrapper); taskWrapper.setIsInPreemptableQueue(true); diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TestTaskExecutorService.java b/llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TestTaskExecutorService.java index ce9fce9..ff61fdd 100644 --- a/llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TestTaskExecutorService.java +++ b/llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TestTaskExecutorService.java @@ -236,6 +236,140 @@ public class TestTaskExecutorService { } } + @Test(timeout = 1) + public void testPreemptionQueueOnFinishableStateUpdates() throws InterruptedException { + +long r1WorkTime = 1000L; +long r2WorkTime = 2000L; +long r3WorkTime = 2000L; +// all tasks start with non-finishable state +MockRequest r1 = createMockRequest(1, 2, 100, 200, false, r1WorkTime, false); +MockRequest r2 = createMockRequest(2, 1, 100, 200, false, r2WorkTime, false); +MockRequest r3 = createMockRequest(3, 3, 50, 200, false, r3WorkTime, false); + + +TaskExecutorServiceForTest taskExecutorService = + new TaskExecutorServiceForTest(4, 2, ShortestJobFirstComparator.class.getName(), true, mockMetrics); +taskExecutorService.init(new Configuration()); +taskExecutorService.start(); + +try { + String fragmentId1 = r1.getRequestId(); + Scheduler.SubmissionState submissionState
[hive] branch master updated: HIVE-23376 : Avoid repeated SHA computation in GenericUDTFGetSplits for hive-exec jar (Ramesh Kumar via Rajesh Balamohan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new b53a62f HIVE-23376 : Avoid repeated SHA computation in GenericUDTFGetSplits for hive-exec jar (Ramesh Kumar via Rajesh Balamohan) b53a62f is described below commit b53a62ff3b94efd2c9da7b0d805f076e6b1da21e Author: RAMESH KUMAR THANGARAJAN AuthorDate: Sat May 16 20:20:35 2020 -0700 HIVE-23376 : Avoid repeated SHA computation in GenericUDTFGetSplits for hive-exec jar (Ramesh Kumar via Rajesh Balamohan) Signed-off-by: Ashutosh Chauhan --- .../org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java index 7682e1f..9568096 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java @@ -128,6 +128,7 @@ import com.google.common.base.Preconditions; @UDFType(deterministic = false) public class GenericUDTFGetSplits extends GenericUDTF { private static final Logger LOG = LoggerFactory.getLogger(GenericUDTFGetSplits.class); + private static String sha = null; protected transient StringObjectInspector stringOI; protected transient IntObjectInspector intOI; @@ -703,7 +704,9 @@ public class GenericUDTFGetSplits extends GenericUDTF { Path destDirPath = destDirStatus.getPath(); Path localFile = new Path(localJarPath); -String sha = getSha(localFile, conf); +if (sha == null || !destDirPath.toString().contains(sha)) { + sha = getSha(localFile, conf); +} String destFileName = localFile.getName();
[hive] branch master updated: HIVE-23446 : LLAP: Reduce IPC connection misses to AM for short queries (Rajesh Balamohan via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new eeffb0e HIVE-23446 : LLAP: Reduce IPC connection misses to AM for short queries (Rajesh Balamohan via Ashutosh Chauhan) eeffb0e is described below commit eeffb0e4e7feab7cea0dba9e7a2b63808b2023f7 Author: Rajesh Balamohan AuthorDate: Sat May 16 20:05:27 2020 -0700 HIVE-23446 : LLAP: Reduce IPC connection misses to AM for short queries (Rajesh Balamohan via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../hive/llap/daemon/impl/ContainerRunnerImpl.java | 82 +- .../hadoop/hive/llap/daemon/impl/QueryInfo.java| 25 +++ .../hadoop/hive/llap/daemon/impl/QueryTracker.java | 5 +- .../llap/daemon/impl/TaskExecutorTestHelpers.java | 2 +- 4 files changed, 92 insertions(+), 22 deletions(-) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java index 9c73747..a4de3d9 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java @@ -15,7 +15,6 @@ package org.apache.hadoop.hive.llap.daemon.impl; import java.io.IOException; -import java.net.InetAddress; import java.net.InetSocketAddress; import java.nio.ByteBuffer; import java.util.Arrays; @@ -23,10 +22,19 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.RemovalListener; +import com.google.common.cache.RemovalNotification; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.UgiFactory; import org.apache.hadoop.hive.conf.HiveConf; @@ -103,6 +111,7 @@ public class ContainerRunnerImpl extends CompositeService implements ContainerRu private static final Logger LOG = LoggerFactory.getLogger(ContainerRunnerImpl.class); public static final String THREAD_NAME_FORMAT_PREFIX = "ContainerExecutor "; + private UgiPool ugiPool; private final AMReporter amReporter; private final QueryTracker queryTracker; private final Scheduler executorService; @@ -131,6 +140,7 @@ public class ContainerRunnerImpl extends CompositeService implements ContainerRu super("ContainerRunnerImpl"); Preconditions.checkState(numExecutors > 0, "Invalid number of executors: " + numExecutors + ". Must be > 0"); +this.ugiPool = new UgiPool(numExecutors); this.localAddress = localAddress; this.localShufflePort = localShufflePort; this.amReporter = amReporter; @@ -270,7 +280,7 @@ public class ContainerRunnerImpl extends CompositeService implements ContainerRu queryIdentifier, qIdProto.getApplicationIdString(), dagId, vertex.getDagName(), vertex.getHiveQueryId(), dagIdentifier, vertex.getVertexName(), request.getFragmentNumber(), request.getAttemptNumber(), - vertex.getUser(), vertex, jobToken, fragmentIdString, tokenInfo, amNodeId); + vertex.getUser(), vertex, jobToken, fragmentIdString, tokenInfo, amNodeId, ugiPool); // May need to setup localDir for re-localization, which is usually setup as Environment.PWD. // Used for re-localization, to add the user specified configuration (conf_pb_binary_stream) @@ -593,4 +603,72 @@ public class ContainerRunnerImpl extends CompositeService implements ContainerRu return executorService.getNumActiveForReporting(); } + static class UgiPool { +// Pool of UGI for a given appTokenIdentifier (AM). Expires after 3 hours of last access +private final Cache> ugiPool = +CacheBuilder +.newBuilder().removalListener(new RemovalListener>() { + @Override + public void onRemoval( + RemovalNotification> notification) { +LOG.debug("Removing " + notification.getValue() + " from pool.Pool size: " + ugiPool.size()); + } +}).expireAfterAccess(60 * 3, TimeUnit.MINUTES).build(); + +private final int numExecutors; + +public UgiPool(int numExecutors) { + this.numExecutors = numExecutors; +} + +/** + * Get UGI for a given AM and appToken. It is
[hive] branch master updated: HIVE-23449 : LLAP: Reduce mkdir and config creations in submitWork hotpath (Rajesh Balamohan via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 781b7fc HIVE-23449 : LLAP: Reduce mkdir and config creations in submitWork hotpath (Rajesh Balamohan via Ashutosh Chauhan) 781b7fc is described below commit 781b7fc3e450f5a15e1afa2096189884b772b115 Author: Rajesh Balamohan AuthorDate: Sat May 16 09:12:49 2020 -0700 HIVE-23449 : LLAP: Reduce mkdir and config creations in submitWork hotpath (Rajesh Balamohan via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java | 13 + .../apache/hadoop/hive/llap/daemon/impl/QueryTracker.java | 4 ++-- .../hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java| 13 - .../hadoop/hive/llap/shufflehandler/ShuffleHandler.java | 4 .../hive/llap/daemon/impl/TaskExecutorTestHelpers.java | 3 ++- .../hive/llap/daemon/impl/TestContainerRunnerImpl.java | 6 -- 6 files changed, 25 insertions(+), 18 deletions(-) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java index 6a13b55..9c73747 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java @@ -25,6 +25,7 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Supplier; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.UgiFactory; @@ -271,23 +272,19 @@ public class ContainerRunnerImpl extends CompositeService implements ContainerRu vertex.getVertexName(), request.getFragmentNumber(), request.getAttemptNumber(), vertex.getUser(), vertex, jobToken, fragmentIdString, tokenInfo, amNodeId); - String[] localDirs = fragmentInfo.getLocalDirs(); - Preconditions.checkNotNull(localDirs); - if (LOG.isDebugEnabled()) { -LOG.debug("Dirs are: " + Arrays.toString(localDirs)); - } // May need to setup localDir for re-localization, which is usually setup as Environment.PWD. // Used for re-localization, to add the user specified configuration (conf_pb_binary_stream) - Configuration callableConf = new Configuration(getConfig()); + // Lazy create conf object, as it gets expensive in this codepath. + Supplier callableConf = () -> new Configuration(getConfig()); UserGroupInformation fsTaskUgi = fsUgiFactory == null ? null : fsUgiFactory.createUgi(); boolean isGuaranteed = request.hasIsGuaranteed() && request.getIsGuaranteed(); // enable the printing of (per daemon) LLAP task queue/run times via LLAP_TASK_TIME_SUMMARY ConfVars tezSummary = ConfVars.TEZ_EXEC_SUMMARY; ConfVars llapTasks = ConfVars.LLAP_TASK_TIME_SUMMARY; - boolean addTaskTimes = callableConf.getBoolean(tezSummary.varname, tezSummary.defaultBoolVal) - && callableConf.getBoolean(llapTasks.varname, llapTasks.defaultBoolVal); + boolean addTaskTimes = getConfig().getBoolean(tezSummary.varname, tezSummary.defaultBoolVal) + && getConfig().getBoolean(llapTasks.varname, llapTasks.defaultBoolVal); final String llapHost; if (UserGroupInformation.isSecurityEnabled()) { diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryTracker.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryTracker.java index eae8e08..bf4eea0 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryTracker.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryTracker.java @@ -211,9 +211,9 @@ public class QueryTracker extends AbstractService { LOG.debug("Registering request for {} with the ShuffleHandler", queryIdentifier); } if (!vertex.getIsExternalSubmission()) { +String[] localDirs = (ShuffleHandler.get().isDirWatcherEnabled()) ? queryInfo.getLocalDirs() : null; ShuffleHandler.get() -.registerDag(appIdString, dagIdentifier, appToken, -user, queryInfo.getLocalDirs()); +.registerDag(appIdString, dagIdentifier, appToken, user, localDirs); } return queryInfo.registerFragment( diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java index 3619252..bc26dc0 100644 --- a/llap-server/s
[hive] branch master updated: HIVE-23375: Introduce TezCounter to track MJ HashTable Load time
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 31f20c8 HIVE-23375: Introduce TezCounter to track MJ HashTable Load time 31f20c8 is described below commit 31f20c8a29cd75bbf244ce9da5b5dfef37900280 Author: Panos Garefalakis AuthorDate: Wed May 6 12:00:35 2020 +0100 HIVE-23375: Introduce TezCounter to track MJ HashTable Load time Change-Id: I277696aa46b8a6343f53c36f773ee53575f513ac Signed-off-by: Ashutosh Chauhan --- .../java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java | 4 .../org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java | 11 +++ .../vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java | 11 +++ .../hadoop/hive/ql/hooks/PostExecTezSummaryPrinter.java | 6 +- 4 files changed, 31 insertions(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java index 02cdae0..6681522 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java @@ -29,6 +29,10 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; */ public interface HashTableLoader { + enum HashTableLoaderCounters { +HASHTABLE_LOAD_TIME_MS + }; + void init(ExecMapperContext context, MapredContext mrContext, Configuration hconf, MapJoinOperator joinOp); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java index 151d1b3..9cb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java @@ -26,7 +26,10 @@ import java.util.Map; import org.apache.hadoop.hive.llap.LlapDaemonInfo; import org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError; +import org.apache.tez.common.counters.TezCounter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -66,6 +69,7 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable private MapJoinDesc desc; private TezContext tezContext; private String cacheKey; + private TezCounter htLoadCounter; @Override public void init(ExecMapperContext context, MapredContext mrContext, Configuration hconf, @@ -74,6 +78,10 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable this.hconf = hconf; this.desc = joinOp.getConf(); this.cacheKey = joinOp.getCacheKey(); +String counterGroup = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVECOUNTERGROUP); +String vertexName = hconf.get(Operator.CONTEXT_NAME_KEY, ""); +String counterName = Utilities.getVertexCounterName(HashTableLoaderCounters.HASHTABLE_LOAD_TIME_MS.name(), vertexName); +this.htLoadCounter = tezContext.getTezProcessorContext().getCounters().findCounter(counterGroup, counterName); } @Override @@ -238,6 +246,7 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable cacheKey, tableContainer.getClass().getSimpleName(), pos); tableContainer.setSerde(keyCtx, valCtx); +long startTime = System.currentTimeMillis(); while (kvReader.next()) { tableContainer.putRow((Writable) kvReader.getCurrentKey(), (Writable) kvReader.getCurrentValue()); numEntries++; @@ -258,6 +267,8 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable } } } +long delta = System.currentTimeMillis() - startTime; +htLoadCounter.increment(delta); tableContainer.seal(); mapJoinTables[pos] = tableContainer; if (doMemCheck) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java index 8d9c546..a011027 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java @@ -23,7 +23,10 @@ import java.util.Map; import org.apache.hadoop.hive.llap.LlapDaemonInfo; import org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhau
[hive] branch master updated: HIVE-23423 : Check of disabling hash aggregation ignores grouping set ( Gopal V via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 9f40d7c HIVE-23423 : Check of disabling hash aggregation ignores grouping set ( Gopal V via Ashutosh Chauhan) 9f40d7c is described below commit 9f40d7cc1d889aa3079f3f494cf810fabe326e44 Author: Gopal V AuthorDate: Wed May 13 21:42:29 2020 -0700 HIVE-23423 : Check of disabling hash aggregation ignores grouping set ( Gopal V via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../hive/ql/exec/vector/VectorGroupByOperator.java | 21 ++- .../ql/exec/vector/TestVectorGroupByOperator.java | 207 +++-- 2 files changed, 209 insertions(+), 19 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index f104c13..d4d18ef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -645,9 +645,26 @@ public class VectorGroupByOperator extends Operator LOG.debug(String.format("checkHashModeEfficiency: HT:%d RC:%d MIN:%d", numEntriesHashTable, sumBatchSize, (long)(sumBatchSize * minReductionHashAggr))); } -if (numEntriesHashTable > sumBatchSize * minReductionHashAggr) { +/* + * The grouping sets expand the hash sizes by producing intermediate keys. 3 grouping sets + * of (),(col1),(col1,col2), will turn 10 rows into 30 rows. If the col1 has an nDV of 2 and + * col2 has nDV of 5, then this turns into a maximum of 1+3+(2*5) or 14 keys into the + * hashtable. + * + * So you get 10 rows in and 14 rows out, which is a reduction of ~2x vs Streaming mode, + * but it is an increase if the grouping-set is not accounted for. + * + * For performance, it is definitely better to send 14 rows out to shuffle and not 30. + * + * Particularly if the same nDVs are repeated for a thousand rows, this would send a + * thousand rows via streaming to a single reducer which owns the empty grouping set, + * instead of sending 1 from the hash. + * + */ +final int groupingExpansion = (groupingSets != null) ? groupingSets.length : 1; +final long intermediateKeyCount = sumBatchSize * groupingExpansion; +if (numEntriesHashTable > intermediateKeyCount * minReductionHashAggr) { flush(true); - changeToStreamingMode(); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java index e8586fc..12df385 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java @@ -54,6 +54,7 @@ import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -111,9 +112,10 @@ public class TestVectorGroupByOperator { String column, TypeInfo typeInfo) { -ExprNodeDesc inputColumn = buildColumnDesc(ctx, column, typeInfo); -ArrayList params = new ArrayList(); +TypeInfo[] typeInfos = new TypeInfo[] {typeInfo}; +ArrayList params = new ArrayList(1); +ExprNodeDesc inputColumn = buildColumnDesc(ctx, column, typeInfo); params.add(inputColumn); AggregationDesc agg = new AggregationDesc(); @@ -121,10 +123,7 @@ public class TestVectorGroupByOperator { agg.setMode(mode); agg.setParameters(params); -TypeInfo[] typeInfos = new TypeInfo[] { typeInfo }; - final GenericUDAFEvaluator evaluator; -PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); try { switch (aggregate) { case "count": @@ -232,14 +231,13 @@ public class TestVectorGroupByOperator { return new Pair(desc, vectorDesc); } - private static Pair buildKeyGroupByDesc( VectorizationContext ctx, String aggregate, String column, TypeInfo dataTypeInfo, - String key, - TypeInfo keyTypeInfo) { + String[] keys, + TypeInfo[] keyTypeInfos) { Pair pair = buildGroupByDescType(ctx, aggregate, GenericU
[hive] branch master updated: HIVE-23451 : FileSinkOperator calls deleteOnExit (hdfs call) twice for the same file ( Rajesh Balamohan via Ashutosh Chauhan )
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new b63c35a HIVE-23451 : FileSinkOperator calls deleteOnExit (hdfs call) twice for the same file ( Rajesh Balamohan via Ashutosh Chauhan ) b63c35a is described below commit b63c35a10dedc390e7be832cc7ebcf1bac030f43 Author: Rajesh Balamohan AuthorDate: Wed May 13 15:44:58 2020 -0700 HIVE-23451 : FileSinkOperator calls deleteOnExit (hdfs call) twice for the same file ( Rajesh Balamohan via Ashutosh Chauhan ) Signed-off-by: Ashutosh Chauhan --- .../hadoop/hive/ql/exec/FileSinkOperator.java | 5 - .../clientpositive/llap/orc_llap_counters.q.out| 84 - .../clientpositive/llap/orc_llap_counters1.q.out | 4 +- .../clientpositive/llap/orc_ppd_basic.q.out| 102 ++--- .../llap/orc_ppd_schema_evol_3a.q.out | 100 ++-- 5 files changed, 145 insertions(+), 150 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 082f1cb..1b84ba2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -791,11 +791,6 @@ public class FileSinkOperator extends TerminalOperator implements filesIdx++; } assert filesIdx == numFiles; - - // in recent hadoop versions, use deleteOnExit to clean tmp files. - if (isNativeTable() && fs != null && fsp != null && !conf.isMmTable() && !conf.isDirectInsert()) { -autoDelete = fs.deleteOnExit(fsp.outPaths[0]); - } } catch (Exception e) { throw new HiveException(e); } diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out index f8190af..a5fb0da 100644 --- a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out @@ -257,7 +257,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ### Stage-1 FILE SYSTEM COUNTERS: HDFS_BYTES_READ: 16680 HDFS_BYTES_WRITTEN: 104 - HDFS_READ_OPS: 6 + HDFS_READ_OPS: 5 HDFS_LARGE_READ_OPS: 0 HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: @@ -296,7 +296,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ### Stage-1 FILE SYSTEM COUNTERS: HDFS_BYTES_READ: 0 HDFS_BYTES_WRITTEN: 101 - HDFS_READ_OPS: 3 + HDFS_READ_OPS: 2 HDFS_LARGE_READ_OPS: 0 HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: @@ -319,7 +319,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ### Stage-1 FILE SYSTEM COUNTERS: HDFS_BYTES_READ: 1047 HDFS_BYTES_WRITTEN: 101 - HDFS_READ_OPS: 4 + HDFS_READ_OPS: 3 HDFS_LARGE_READ_OPS: 0 HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: @@ -360,7 +360,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ### Stage-1 FILE SYSTEM COUNTERS: HDFS_BYTES_READ: 0 HDFS_BYTES_WRITTEN: 102 - HDFS_READ_OPS: 3 + HDFS_READ_OPS: 2 HDFS_LARGE_READ_OPS: 0 HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: @@ -399,7 +399,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ### Stage-1 FILE SYSTEM COUNTERS: HDFS_BYTES_READ: 0 HDFS_BYTES_WRITTEN: 102 - HDFS_READ_OPS: 3 + HDFS_READ_OPS: 2 HDFS_LARGE_READ_OPS: 0 HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: @@ -438,7 +438,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ### Stage-1 FILE SYSTEM COUNTERS: HDFS_BYTES_READ: 0 HDFS_BYTES_WRITTEN: 102 - HDFS_READ_OPS: 3 + HDFS_READ_OPS: 2 HDFS_LARGE_READ_OPS: 0 HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: @@ -477,7 +477,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ### Stage-1 FILE SYSTEM COUNTERS: HDFS_BYTES_READ: 0 HDFS_BYTES_WRITTEN: 101 - HDFS_READ_OPS: 3 + HDFS_READ_OPS: 2 HDFS_LARGE_READ_OPS: 0 HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: @@ -516,7 +516,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ### Stage-1 FILE SYSTEM COUNTERS: HDFS_BYTES_READ: 0 HDFS_BYTES_WRITTEN: 102 - HDFS_READ_OPS: 3 + HDFS_READ_OPS: 2 HDFS_LARGE_READ_OPS: 0 HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: @@ -555,7 +555,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ### Stage-1 FILE SYSTEM COUNTERS: HDFS_BYTES_READ: 0 HDFS_BYTES_WRITTEN: 102 - HDFS_READ_OPS: 3 + HDFS_READ_OPS: 2 HDFS_LARGE_READ_OPS: 0 HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: @@ -594,7 +594,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ### Stage-1 FILE SYSTEM COUNTERS: HDFS_BYTES_READ: 0 HDFS_BYTES_WRITTEN: 104 - HDFS_READ_OPS: 3 + HDFS_READ_OPS: 2 HDFS_LARGE_READ_OPS: 0 HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: @@ -633,7 +633,7 @@ PREHOOK: Output: hdfs://### HDFS PATH ### Stage-1 FILE SYSTEM COUNTERS: HDFS_BYTES_READ: 0 HDFS_BYTES_W
[hive] branch master updated: HIVE-23445 : Remove mapreduce.workflow.* configs (Ashutosh Chauhan via Gopal V)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 57c1593 HIVE-23445 : Remove mapreduce.workflow.* configs (Ashutosh Chauhan via Gopal V) 57c1593 is described below commit 57c15936d7a69e215c986d62aa959e70cb352da4 Author: Ashutosh Chauhan AuthorDate: Mon May 11 21:33:48 2020 -0700 HIVE-23445 : Remove mapreduce.workflow.* configs (Ashutosh Chauhan via Gopal V) --- .../java/org/apache/hadoop/hive/ql/Compiler.java | 3 --- .../java/org/apache/hadoop/hive/ql/Executor.java | 2 -- .../org/apache/hadoop/hive/ql/exec/DagUtils.java | 4 +--- .../org/apache/hadoop/hive/ql/exec/Utilities.java | 22 -- 4 files changed, 1 insertion(+), 30 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Compiler.java b/ql/src/java/org/apache/hadoop/hive/ql/Compiler.java index aa42fd5..b5e093d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Compiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Compiler.java @@ -339,9 +339,6 @@ public class Compiler { plan.setOptimizedCBOPlan(context.getCalcitePlan()); plan.setOptimizedQueryString(context.getOptimizedSql()); -driverContext.getConf().set("mapreduce.workflow.id", "hive_" + driverContext.getQueryId()); -driverContext.getConf().set("mapreduce.workflow.name", driverContext.getQueryString()); - // initialize FetchTask right here if (plan.getFetchTask() != null) { plan.getFetchTask().initialize(driverContext.getQueryState(), plan, null, context); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Executor.java b/ql/src/java/org/apache/hadoop/hive/ql/Executor.java index e9909a9..7af3cfb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Executor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Executor.java @@ -346,8 +346,6 @@ public class Executor { if (noName) { driverContext.getConf().set(MRJobConfig.JOB_NAME, jobName + " (" + task.getId() + ")"); } - driverContext.getConf().set(DagUtils.MAPREDUCE_WORKFLOW_NODE_NAME, task.getId()); - Utilities.setWorkflowAdjacencies(driverContext.getConf(), driverContext.getPlan()); taskQueue.incCurJobNo(1); CONSOLE.printInfo("Launching Job " + taskQueue.getCurJobNo() + " out of " + jobCount); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DagUtils.java index 480d679..48d8dba 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DagUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DagUtils.java @@ -26,14 +26,12 @@ import com.google.common.base.Strings; public class DagUtils { - public static final String MAPREDUCE_WORKFLOW_NODE_NAME = "mapreduce.workflow.node.name"; - public static String getQueryName(Configuration conf) { String name = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYNAME); if (Strings.isNullOrEmpty(name)) { return conf.get(MRJobConfig.JOB_NAME); } else { - return name + " (" + conf.get(DagUtils.MAPREDUCE_WORKFLOW_NODE_NAME) + ")"; + return name; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index ace24be..0e4ce78 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -543,28 +543,6 @@ public final class Utilities { } } - public static void setWorkflowAdjacencies(Configuration conf, QueryPlan plan) { -try { - Graph stageGraph = plan.getQueryPlan().getStageGraph(); - if (stageGraph == null) { -return; - } - List adjList = stageGraph.getAdjacencyList(); - if (adjList == null) { -return; - } - for (Adjacency adj : adjList) { -List children = adj.getChildren(); -if (CollectionUtils.isEmpty(children)) { - return; -} -conf.setStrings("mapreduce.workflow.adjacency." + adj.getNode(), -children.toArray(new String[0])); - } -} catch (IOException e) { -} - } - public static List getFieldSchemaString(List fl) { if (fl == null) { return null;
[hive] branch master updated: HIVE-23429 : LLAP: Optimize retrieving queryId details in LlapTaskCommunicator (Rajesh Balamohan via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 7e77f25 HIVE-23429 : LLAP: Optimize retrieving queryId details in LlapTaskCommunicator (Rajesh Balamohan via Ashutosh Chauhan) 7e77f25 is described below commit 7e77f25f1361d0fff8b97e6fc45052bc280e5f7b Author: Rajesh Balamohan AuthorDate: Mon May 11 07:24:04 2020 -0700 HIVE-23429 : LLAP: Optimize retrieving queryId details in LlapTaskCommunicator (Rajesh Balamohan via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../hive/llap/tezplugins/LlapTaskCommunicator.java | 18 ++ .../org/apache/hadoop/hive/ql/exec/tez/TezTask.java| 3 +++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java index b168f76..36a2d6b 100644 --- a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java +++ b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java @@ -94,12 +94,14 @@ import org.apache.tez.dag.api.TezUncheckedException; import org.apache.tez.dag.api.UserPayload; import org.apache.tez.dag.api.event.VertexStateUpdate; import org.apache.tez.dag.app.TezTaskCommunicatorImpl; +import org.apache.tez.dag.app.dag.DAG; import org.apache.tez.dag.records.TezTaskAttemptID; import org.apache.tez.runtime.api.TaskFailureType; import org.apache.tez.runtime.api.impl.TaskSpec; import org.apache.tez.runtime.api.impl.TezHeartbeatRequest; import org.apache.tez.runtime.api.impl.TezHeartbeatResponse; import org.apache.tez.serviceplugins.api.ContainerEndReason; +import org.apache.tez.serviceplugins.api.DagInfo; import org.apache.tez.serviceplugins.api.ServicePluginErrorDefaults; import org.apache.tez.serviceplugins.api.TaskAttemptEndReason; import org.apache.tez.serviceplugins.api.TaskCommunicatorContext; @@ -398,11 +400,9 @@ public class LlapTaskCommunicator extends TezTaskCommunicatorImpl { credentialsChanged, priority); int dagId = taskSpec.getTaskAttemptID().getTaskID().getVertexID().getDAGId().getId(); if (currentQueryIdentifierProto == null || (dagId != currentQueryIdentifierProto.getDagIndex())) { - // TODO HiveQueryId extraction by parsing the Processor payload is ugly. This can be improved - // once TEZ-2672 is fixed. - String hiveQueryId; + String hiveQueryId = extractQueryIdFromContext(); try { -hiveQueryId = extractQueryId(taskSpec); +hiveQueryId = (hiveQueryId == null) ? extractQueryId(taskSpec) : hiveQueryId; } catch (IOException e) { throw new RuntimeException("Failed to extract query id from task spec: " + taskSpec, e); } @@ -820,12 +820,22 @@ public class LlapTaskCommunicator extends TezTaskCommunicatorImpl { // is likely already happening. } + // Needed for GenericUDTFGetSplits, where TaskSpecs are generated private String extractQueryId(TaskSpec taskSpec) throws IOException { UserPayload processorPayload = taskSpec.getProcessorDescriptor().getUserPayload(); Configuration conf = TezUtils.createConfFromUserPayload(processorPayload); return HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID); } + private String extractQueryIdFromContext() { +//TODO: Remove following instance of check, When TEZ-2672 exposes getConf from DagInfo +DagInfo dagInfo = getContext().getCurrentDagInfo(); +if (dagInfo instanceof DAG) { + return ((DAG)dagInfo).getConf().get(ConfVars.HIVEQUERYID.varname); +} +return null; + } + private SubmitWorkRequestProto constructSubmitWorkRequest(ContainerId containerId, TaskSpec taskSpec, FragmentRuntimeInfo fragmentRuntimeInfo, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java index 854bc89..b1bf2f8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java @@ -424,6 +424,9 @@ public class TezTask extends Task { .put("description", ctx.getCmd()); String dagInfo = json.toString(); +String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID); +dag.setConf(HiveConf.ConfVars.HIVEQUERYID.varname, queryId); + if (LOG.isDebugEnabled()) { LOG.debug("DagInfo: " + dagInfo); }
[hive] branch master updated: HIVE-22320 : Cluster and fs type settings can be replaced with a single minicluster setting in CliConfigs (Laszlo Bodor via Miklos Gergely)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new b0dc673 HIVE-22320 : Cluster and fs type settings can be replaced with a single minicluster setting in CliConfigs (Laszlo Bodor via Miklos Gergely) b0dc673 is described below commit b0dc6734cdc0945ac9b3ebff800a63bccc4a7e61 Author: Laszlo Bodor AuthorDate: Sun May 10 21:16:35 2020 -0700 HIVE-22320 : Cluster and fs type settings can be replaced with a single minicluster setting in CliConfigs (Laszlo Bodor via Miklos Gergely) Signed-off-by: Ashutosh Chauhan --- .../apache/hadoop/hive/cli/control/AbstractCliConfig.java| 1 + .../java/org/apache/hadoop/hive/cli/control/CliConfigs.java | 12 ++-- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/AbstractCliConfig.java b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/AbstractCliConfig.java index 712af82..353a4aa 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/AbstractCliConfig.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/AbstractCliConfig.java @@ -368,6 +368,7 @@ public abstract class AbstractCliConfig { if (clusterType == null) { throw new RuntimeException("clustertype cant be null"); } +this.setFsType(clusterType.getDefaultFsType()); } protected FsType getFsType() { diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java index 1ecd0d1..473a9f8 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java @@ -147,7 +147,6 @@ public class CliConfigs { setHiveConfDir("data/conf/tez"); setClusterType(MiniClusterType.TEZ); -setFsType(QTestMiniClusters.FsType.HDFS); } catch (Exception e) { throw new RuntimeException("can't construct cliconfig", e); } @@ -196,7 +195,6 @@ public class CliConfigs { setCleanupScript("q_test_cleanup_druid.sql"); setHiveConfDir("data/conf/llap"); setClusterType(MiniClusterType.DRUID); -setFsType(QTestMiniClusters.FsType.HDFS); } catch (Exception e) { throw new RuntimeException("can't construct cliconfig", e); } @@ -216,7 +214,6 @@ public class CliConfigs { setCleanupScript("q_test_cleanup_druid.sql"); setHiveConfDir("data/conf/llap"); setClusterType(MiniClusterType.DRUID_KAFKA); -setFsType(QTestMiniClusters.FsType.HDFS); } catch (Exception e) { throw new RuntimeException("can't construct cliconfig", e); } @@ -233,7 +230,6 @@ public class CliConfigs { setLogDir("itests/qtest/target/tmp/log"); setHiveConfDir("data/conf/llap"); setClusterType(MiniClusterType.KAFKA); -setFsType(QTestMiniClusters.FsType.HDFS); } catch (Exception e) { throw new RuntimeException("can't construct cliconfig", e); } @@ -265,7 +261,6 @@ public class CliConfigs { setHiveConfDir("data/conf/llap"); setClusterType(MiniClusterType.LLAP_LOCAL); -setFsType(QTestMiniClusters.FsType.LOCAL); } catch (Exception e) { throw new RuntimeException("can't construct cliconfig", e); } @@ -288,7 +283,7 @@ public class CliConfigs { setClusterType(MiniClusterType.MR); -setFsType(QTestMiniClusters.FsType.ENCRYPTED_HDFS); +setFsType(QTestMiniClusters.FsType.ENCRYPTED_HDFS); // override default FsType.HDFS if (getClusterType() == MiniClusterType.TEZ) { setHiveConfDir("data/conf/tez"); } else { @@ -723,7 +718,7 @@ public class CliConfigs { setCleanupScript("q_test_cleanup_src.sql"); setClusterType(MiniClusterType.MR); -setFsType(QTestMiniClusters.FsType.ERASURE_CODED_HDFS); +setFsType(QTestMiniClusters.FsType.ERASURE_CODED_HDFS); // override default FsType.HDFS setHiveConfDir(getClusterType()); } catch (Exception e) { throw new RuntimeException("can't construct cliconfig", e); @@ -766,7 +761,6 @@ public class CliConfigs { setCleanupScript("q_test_cleanup_druid.sql"); setHiveConfDir("data/conf/llap"); setClusterType(MiniClusterType.DRUID_LOCAL); -setFsType(QTestMiniClusters.FsType.LOCAL); } catch (Exception e) { throw new RuntimeException("can't construct cliconfig", e);
[hive] branch master updated: HIVE-22343 : Fix incorrect spelling of 'artifectId' in pom.xml ( ice bai via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 6491151 HIVE-22343 : Fix incorrect spelling of 'artifectId' in pom.xml ( ice bai via Ashutosh Chauhan) 6491151 is described below commit 64911519f1eefe700bed0e9368743487c0df751e Author: Ashutosh Chauhan AuthorDate: Sun May 10 18:07:59 2020 -0700 HIVE-22343 : Fix incorrect spelling of 'artifectId' in pom.xml ( ice bai via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- beeline/pom.xml | 2 +- cli/pom.xml | 2 +- common/pom.xml| 2 +- contrib/pom.xml | 2 +- druid-handler/pom.xml | 2 +- hbase-handler/pom.xml | 2 +- hcatalog/core/pom.xml | 2 +- hcatalog/hcatalog-pig-adapter/pom.xml | 2 +- hcatalog/server-extensions/pom.xml| 2 +- hcatalog/streaming/pom.xml| 4 ++-- hcatalog/webhcat/java-client/pom.xml | 2 +- itests/hcatalog-unit/pom.xml | 2 +- itests/hive-unit-hadoop2/pom.xml | 2 +- itests/qtest-accumulo/pom.xml | 2 +- itests/qtest-spark/pom.xml| 2 +- itests/qtest/pom.xml | 2 +- itests/test-serde/pom.xml | 2 +- jdbc/pom.xml | 2 +- metastore/pom.xml | 4 ++-- packaging/pom.xml | 2 +- pom.xml | 8 ql/pom.xml| 4 ++-- serde/pom.xml | 2 +- service/pom.xml | 2 +- shims/0.23/pom.xml| 2 +- shims/aggregator/pom.xml | 2 +- shims/common/pom.xml | 2 +- shims/scheduler/pom.xml | 2 +- standalone-metastore/metastore-common/pom.xml | 2 +- streaming/pom.xml | 2 +- testutils/pom.xml | 2 +- testutils/ptest2/pom.xml | 6 +++--- upgrade-acid/pre-upgrade/pom.xml | 2 +- vector-code-gen/pom.xml | 2 +- 34 files changed, 42 insertions(+), 42 deletions(-) diff --git a/beeline/pom.xml b/beeline/pom.xml index bb627f2..5ba92e3 100644 --- a/beeline/pom.xml +++ b/beeline/pom.xml @@ -33,7 +33,7 @@ - + org.apache.hive diff --git a/cli/pom.xml b/cli/pom.xml index eca0282..7dca2dd 100644 --- a/cli/pom.xml +++ b/cli/pom.xml @@ -32,7 +32,7 @@ - + org.apache.hive diff --git a/common/pom.xml b/common/pom.xml index 74586e5..5cae80c 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -32,7 +32,7 @@ - + org.apache.hive diff --git a/contrib/pom.xml b/contrib/pom.xml index 4e28cb5..577130e 100644 --- a/contrib/pom.xml +++ b/contrib/pom.xml @@ -32,7 +32,7 @@ - + org.apache.hive diff --git a/druid-handler/pom.xml b/druid-handler/pom.xml index e6ca298..4b99cc3 100644 --- a/druid-handler/pom.xml +++ b/druid-handler/pom.xml @@ -33,7 +33,7 @@ - + com.fasterxml.jackson.dataformat diff --git a/hbase-handler/pom.xml b/hbase-handler/pom.xml index 4c3c8ef..2528697 100644 --- a/hbase-handler/pom.xml +++ b/hbase-handler/pom.xml @@ -32,7 +32,7 @@ - + org.apache.hive diff --git a/hcatalog/core/pom.xml b/hcatalog/core/pom.xml index 920547e..9227199 100644 --- a/hcatalog/core/pom.xml +++ b/hcatalog/core/pom.xml @@ -38,7 +38,7 @@ - + org.apache.hive diff --git a/hcatalog/hcatalog-pig-adapter/pom.xml b/hcatalog/hcatalog-pig-adapter/pom.xml index 7c321be..aba63af 100644 --- a/hcatalog/hcatalog-pig-adapter/pom.xml +++ b/hcatalog/hcatalog-pig-adapter/pom.xml @@ -38,7 +38,7 @@ - + org.apache.hive.hcatalog diff --git a/hcatalog/server-extensions/pom.xml b/hcatalog/server-extensions/pom.xml index a6807fb..73a462b 100644 --- a/hcatalog/server-extensions/pom.xml +++ b/hcatalog/server-extensions/pom.xml @@ -38,7 +38,7 @@ - + org.apache.hive.hcatalog diff --git a/hcatalog/streaming/pom.xml b/hcatalog/streaming/pom.xml index af9dc59..99af4e6 100644 --- a/hcatalog/streaming/pom.xml +++ b/hcatalog/streaming/pom.xml @@ -33,7 +33,7 @@ - + org.apache.hive @@ -137,7 +137,7 @@ - + org.apache.maven.plugins maven-jar-plugin diff --git a/hcatalog/webhcat/java-client/pom.xml b/hcatalog
[hive] branch master updated: HIVE-22933 : Allow connecting kerberos-enabled Hive to connect to a non-kerberos druid cluster ( Nishant Bangarwa via Slim Bouguerra)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 75f7762 HIVE-22933 : Allow connecting kerberos-enabled Hive to connect to a non-kerberos druid cluster ( Nishant Bangarwa via Slim Bouguerra) 75f7762 is described below commit 75f77621b52218f4781a08441d4c3f7be572e299 Author: Nishant Bangarwa AuthorDate: Sun May 10 16:46:38 2020 -0700 HIVE-22933 : Allow connecting kerberos-enabled Hive to connect to a non-kerberos druid cluster ( Nishant Bangarwa via Slim Bouguerra) Signed-off-by: Ashutosh Chauhan --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 2 ++ .../java/org/apache/hadoop/hive/druid/DruidStorageHandler.java| 8 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 60ae06a..4f2ea9a 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3033,6 +3033,8 @@ public class HiveConf extends Configuration { "Wait time in ms default to 30 seconds." ), HIVE_DRUID_BITMAP_FACTORY_TYPE("hive.druid.bitmap.type", "roaring", new PatternSet("roaring", "concise"), "Coding algorithm use to encode the bitmaps"), +HIVE_DRUID_KERBEROS_ENABLE("hive.druid.kerberos.enable", true, +"Enable/Disable Kerberos authentication explicitly while connecting to a druid cluster."), // For HBase storage handler HIVE_HBASE_WAL_ENABLED("hive.hbase.wal.enabled", true, "Whether writes to HBase should be forced to the write-ahead log. \n" + diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java index beaf249..36b2cdf 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java @@ -86,6 +86,7 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; import org.apache.hadoop.hive.ql.metadata.StorageHandlerInfo; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; @@ -760,7 +761,8 @@ import static org.apache.hadoop.hive.druid.DruidStorageHandlerUtils.JSON_MAPPER; } @Override public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { -if (UserGroupInformation.isSecurityEnabled()) { +final boolean kerberosEnabled = HiveConf.getBoolVar(getConf(), HiveConf.ConfVars.HIVE_DRUID_KERBEROS_ENABLE); +if (kerberosEnabled && UserGroupInformation.isSecurityEnabled()) { // AM can not do Kerberos Auth so will do the input split generation in the HS2 LOG.debug("Setting {} to {} to enable split generation on HS2", HiveConf.ConfVars.HIVE_AM_SPLIT_GENERATION.toString(), @@ -928,7 +930,9 @@ import static org.apache.hadoop.hive.druid.DruidStorageHandlerUtils.JSON_MAPPER; .withNumConnections(numConnection) .withReadTimeout(new Period(readTimeout).toStandardDuration()) .build(), lifecycle); -if (UserGroupInformation.isSecurityEnabled()) { +final boolean kerberosEnabled = +HiveConf.getBoolVar(SessionState.getSessionConf(), HiveConf.ConfVars.HIVE_DRUID_KERBEROS_ENABLE); +if (kerberosEnabled && UserGroupInformation.isSecurityEnabled()) { LOG.info("building Kerberos Http Client"); return new KerberosHttpClient(httpClient); }
[hive] branch master updated: HIVE-22419 : Improve Messages Emitted From HiveMetaStoreClient (David Mollitor via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 19e67cd HIVE-22419 : Improve Messages Emitted From HiveMetaStoreClient (David Mollitor via Ashutosh Chauhan) 19e67cd is described below commit 19e67cd54e2f980be901bed4e6d21ce002ebc9e1 Author: David Mollitor AuthorDate: Sat May 9 23:17:50 2020 -0700 HIVE-22419 : Improve Messages Emitted From HiveMetaStoreClient (David Mollitor via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../hadoop/hive/metastore/HiveMetaStoreClient.java | 54 -- .../hadoop/hive/metastore/utils/LogUtils.java | 9 2 files changed, 29 insertions(+), 34 deletions(-) diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index 1ec5839..64d3833 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -70,7 +70,6 @@ import org.apache.hadoop.hive.metastore.utils.FilterUtils; import org.apache.hadoop.hive.metastore.utils.JavaUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.metastore.utils.SecurityUtils; -import org.apache.hadoop.hive.metastore.utils.LogUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; @@ -440,8 +439,8 @@ public class HiveMetaStoreClient implements IMetaStoreClient, AutoCloseable { String newVar = MetastoreConf.getAsString(conf, oneVar); if (oldVar == null || (oneVar.isCaseSensitive() ? !oldVar.equals(newVar) : !oldVar.equalsIgnoreCase(newVar))) { -LOG.info("Mestastore configuration " + oneVar.toString() + -" changed from " + oldVar + " to " + newVar); +LOG.info("Mestastore configuration {} changed from {} to {}", +oneVar, oldVar, newVar); compatible = false; } } @@ -457,8 +456,8 @@ public class HiveMetaStoreClient implements IMetaStoreClient, AutoCloseable { public void reconnect() throws MetaException { if (localMetaStore) { // For direct DB connections we don't yet support reestablishing connections. - throw new MetaException("For direct MetaStore DB connections, we don't support retries" + - " at the client level."); + throw new MetaException("Retries for direct MetaStore DB connections " + + "are not supported by this client"); } else { close(); @@ -593,10 +592,13 @@ public class HiveMetaStoreClient implements IMetaStoreClient, AutoCloseable { // Create an SSL socket and connect transport = SecurityUtils.getSSLSocket(store.getHost(), store.getPort(), clientSocketTimeout, trustStorePath, trustStorePassword); - LOG.debug("Opened an SSL connection to metastore, current connections: " + connCount.incrementAndGet()); + final int newCount = connCount.incrementAndGet(); + LOG.debug( + "Opened an SSL connection to metastore, current connections: {}", + newCount); if (LOG.isTraceEnabled()) { -LOG.trace("", new LogUtils.StackTraceLogger("METASTORE SSL CONNECTION TRACE - open - " + -System.identityHashCode(this))); +LOG.trace("METASTORE SSL CONNECTION TRACE - open [{}]", +System.identityHashCode(this), new Exception()); } } catch (IOException e) { throw new IllegalArgumentException(e); @@ -632,7 +634,7 @@ public class HiveMetaStoreClient implements IMetaStoreClient, AutoCloseable { transport = MetaStorePlainSaslHelper.getPlainTransport(userName, passwd, transport); } catch (IOException sasle) { // IOException covers SaslException - LOG.error("Couldn't create client transport", sasle); + LOG.error("Could not create client transport", sasle); throw new MetaException(sasle.toString()); } } else if (useSasl) { @@ -665,7 +667,7 @@ public class HiveMetaStoreClient implements IMetaStoreClient, AutoCloseable { transport, MetaStoreUtils.getMetaStoreSaslProperties(conf, useSSL)); } } catch (I
[hive] branch master updated: HIVE-22423 : Improve Logging In HadoopThriftAuthBridge (David Mollitor via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 28f6463 HIVE-22423 : Improve Logging In HadoopThriftAuthBridge (David Mollitor via Ashutosh Chauhan) 28f6463 is described below commit 28f6463ed5bc899d7fd554e4e977034411347b27 Author: David Mollitor AuthorDate: Sat May 9 23:00:38 2020 -0700 HIVE-22423 : Improve Logging In HadoopThriftAuthBridge (David Mollitor via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../metastore/security/HadoopThriftAuthBridge.java | 59 +++--- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/security/HadoopThriftAuthBridge.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/security/HadoopThriftAuthBridge.java index 299bdd2..53ed214 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/security/HadoopThriftAuthBridge.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/security/HadoopThriftAuthBridge.java @@ -97,10 +97,12 @@ public abstract class HadoopThriftAuthBridge { throw new IllegalStateException("Unable to get current login user: " + e, e); } if (loginUserHasCurrentAuthMethod(ugi, authMethod)) { - LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current."); + LOG.debug("Not setting UGI conf as passed-in authMethod of {} = current", + authMethod); return new Client(); } else { - LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current."); + LOG.debug("Setting UGI conf as passed-in authMethod of {} != current", + authMethod); Configuration conf = new Configuration(); conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod); UserGroupInformation.setConfiguration(conf); @@ -150,10 +152,12 @@ public abstract class HadoopThriftAuthBridge { throw new IllegalStateException("Unable to get current user: " + e, e); } if (loginUserHasCurrentAuthMethod(ugi, authMethod)) { - LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current."); + LOG.debug("Not setting UGI conf as passed-in authMethod of {} = current", + authMethod); return ugi; } else { - LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current."); + LOG.debug("Setting UGI conf as passed-in authMethod of {} != current", + authMethod); Configuration conf = new Configuration(); conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod); UserGroupInformation.setConfiguration(conf); @@ -177,7 +181,7 @@ public abstract class HadoopThriftAuthBridge { throw new IllegalArgumentException("Invalid attribute value for " + HADOOP_SECURITY_AUTHENTICATION + " of " + sAuthMethod, iae); } -LOG.debug("Current authMethod = " + ugi.getAuthenticationMethod()); +LOG.debug("Current authMethod = {}", ugi.getAuthenticationMethod()); return ugi.getAuthenticationMethod().equals(authMethod); } @@ -283,22 +287,16 @@ public abstract class HadoopThriftAuthBridge { } } if (nc != null) { - if (LOG.isDebugEnabled()) { -LOG.debug("SASL client callback: setting username: " + userName); - } + LOG.debug("SASL client callback: setting username: {}", userName); nc.setName(userName); } if (pc != null) { - if (LOG.isDebugEnabled()) { -LOG.debug("SASL client callback: setting userPassword"); - } + LOG.debug("SASL client callback: setting userPassword"); pc.setPassword(userPassword); } if (rc != null) { - if (LOG.isDebugEnabled()) { -LOG.debug("SASL client callback: setting realm: " -+ rc.getDefaultText()); - } + LOG.debug("SASL client callback: setting realm: {}", + rc.getDefaultText()); rc.setText(rc.getDefaultText()); } } @@ -343,14 +341,15 @@ public abstract class HadoopThriftAuthBridge { } if (clientConf == null || clientConf.isEmpty()) { // Don't bust existing setups. -LOG.warn("Client-facing principal not set. Using server-side setting: " + principalConf); +LOG.warn("Client-facing principal not set. Using server-side se
[hive] branch master updated: HIVE-22437 : LLAP Metadata cache NPE on locking metadata. (Slim Bouguerra via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 6664208 HIVE-22437 : LLAP Metadata cache NPE on locking metadata. (Slim Bouguerra via Ashutosh Chauhan) 6664208 is described below commit 6664208992bd8ecc788ee0dcd0ddd8e6b87582b0 Author: Slim Bouguerra AuthorDate: Sat May 9 22:55:47 2020 -0700 HIVE-22437 : LLAP Metadata cache NPE on locking metadata. (Slim Bouguerra via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../hive/llap/io/metadata/MetadataCache.java | 16 +++--- .../hive/llap/cache/TestOrcMetadataCache.java | 25 ++ 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/MetadataCache.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/MetadataCache.java index 10bd736..ce1e236 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/MetadataCache.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/MetadataCache.java @@ -63,7 +63,7 @@ public class MetadataCache implements LlapIoDebugDump, FileMetadataCache { this.policy = policy; this.metrics = metrics; this.estimateErrors = useEstimateCache -? new ConcurrentHashMap() : null; +? new ConcurrentHashMap<>() : null; } public void putIncompleteCbs(Object fileKey, DiskRange[] ranges, long baseOffset, AtomicBoolean isStopped) { @@ -235,7 +235,7 @@ public class MetadataCache implements LlapIoDebugDump, FileMetadataCache { if (maxAlloc < length) { largeBuffers = new LlapMetadataBuffer[length / maxAlloc]; for (int i = 0; i < largeBuffers.length; ++i) { -largeBuffers[i] = new LlapMetadataBuffer(fileKey, tag); +largeBuffers[i] = new LlapMetadataBuffer<>(fileKey, tag); } allocator.allocateMultiple(largeBuffers, maxAlloc, null, isStopped); for (int i = 0; i < largeBuffers.length; ++i) { @@ -256,7 +256,7 @@ public class MetadataCache implements LlapIoDebugDump, FileMetadataCache { LlapMetadataBuffer[] cacheData = new LlapMetadataBuffer[largeBuffers.length + 1]; System.arraycopy(largeBuffers, 0, cacheData, 0, largeBuffers.length); cacheData[largeBuffers.length] = smallBuffer[0]; -return new LlapMetadataBuffers(cacheData); +return new LlapMetadataBuffers<>(cacheData); } } } @@ -340,16 +340,16 @@ public class MetadataCache implements LlapIoDebugDump, FileMetadataCache { if (result != null) return result; if (tailBuffer.remaining() <= allocator.getMaxAllocation()) { // The common case by far. - return wrapSmallBb(new LlapMetadataBuffer(key, tag), tailBuffer, isStopped); + return wrapSmallBb(new LlapMetadataBuffer<>(key, tag), tailBuffer, isStopped); } else { int allocCount = determineAllocCount(tailBuffer); @SuppressWarnings("unchecked") LlapMetadataBuffer[] results = new LlapMetadataBuffer[allocCount]; for (int i = 0; i < allocCount; ++i) { -results[i] = new LlapMetadataBuffer(key, tag); +results[i] = new LlapMetadataBuffer<>(key, tag); } wrapLargeBb(results, tailBuffer, isStopped); - return new LlapMetadataBuffers(results); + return new LlapMetadataBuffers<>(results); } } @@ -406,7 +406,7 @@ public class MetadataCache implements LlapIoDebugDump, FileMetadataCache { for (int i = 0; i < bufferArray.length; ++i) { if (lockOneBuffer(bufferArray[i], doNotifyPolicy)) continue; for (int j = 0; j < i; ++j) { -unlockSingleBuffer(buffer, true); +unlockSingleBuffer(bufferArray[j], true); } discardMultiBuffer(buffers); return false; @@ -497,7 +497,7 @@ public class MetadataCache implements LlapIoDebugDump, FileMetadataCache { } } - public static interface LlapBufferOrBuffers extends MemoryBufferOrBuffers { + public interface LlapBufferOrBuffers extends MemoryBufferOrBuffers { LlapAllocatorBuffer getSingleLlapBuffer(); LlapAllocatorBuffer[] getMultipleLlapBuffers(); } diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestOrcMetadataCache.java b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestOrcMetadataCache.java index d8632b0..1534864 100644 --- a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestOrcMetadataCache.java +++ b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestOrcMetadataCache.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapBufferOrBuffers import org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapMetadataBuffer; import
[hive] branch master updated: HIVE-22491 : Use Collections emptyList (David Mollitor via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 0bf24c8 HIVE-22491 : Use Collections emptyList (David Mollitor via Ashutosh Chauhan) 0bf24c8 is described below commit 0bf24c805467db2c04098fac5029914ce0b0985c Author: David Mollitor AuthorDate: Sat May 9 22:23:33 2020 -0700 HIVE-22491 : Use Collections emptyList (David Mollitor via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java | 3 ++- .../main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java | 3 ++- .../org/apache/hive/hcatalog/templeton/tool/ZooKeeperCleanup.java | 3 +-- .../org/apache/hive/hcatalog/templeton/tool/ZooKeeperStorage.java | 3 ++- .../java/org/apache/hadoop/hive/kudu/KuduPredicateHandler.java| 2 +- .../org/apache/hadoop/hive/ql/exec/SerializationUtilities.java| 4 ++-- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java | 2 +- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlan.java | 7 +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java | 3 ++- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java | 8 +++- ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java | 3 ++- ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java | 3 ++- .../hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java | 7 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java | 2 +- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java| 5 +++-- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java | 4 ++-- serde/src/java/org/apache/hadoop/hive/serde2/NullStructSerDe.java | 6 +++--- .../java/org/apache/hadoop/hive/metastore/utils/FileUtils.java| 2 +- .../main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java | 6 ++ .../apache/hadoop/hive/metastore/MetastoreDefaultTransformer.java | 7 --- .../main/java/org/apache/hadoop/hive/metastore/ObjectStore.java | 6 +++--- .../apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java | 3 ++- 22 files changed, 48 insertions(+), 44 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java b/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java index 7641610..10ff548 100644 --- a/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.common; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -66,7 +67,7 @@ public class HiveStatsUtils { // does not exist. But getFileStatus() throw IOException. To mimic the // similar behavior we will return empty array on exception. For external // tables, the path of the table will not exists during table creation -return new ArrayList<>(0); +return Collections.emptyList(); } return result; } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java index b73b6fe..2f92733 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java @@ -21,6 +21,7 @@ package org.apache.hive.hcatalog.api; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -1007,7 +1008,7 @@ public class HCatClientHMSImpl extends HCatClient { } }); } else { -return new ArrayList(); +return Collections.emptyList(); } } catch (TException e) { throw new ConnectionFailureException("TException while getting notifications", e); diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperCleanup.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperCleanup.java index 4ebc34b..ecf3ef5 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperCleanup.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperCleanup.java @@ -19,7 +19,6 @@ package org.apache.hive.hcatalog.templeton.tool; import java.io.IOException; -import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Date; @@ -138,7 +137,7
[hive] branch master updated: HIVE-22531 : Fix a couple whitespace errors in error messages (Jacob Tolar via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new d12a308 HIVE-22531 : Fix a couple whitespace errors in error messages (Jacob Tolar via Ashutosh Chauhan) d12a308 is described below commit d12a30804f17b774cb75c6d0ec32dbc67512975a Author: Jacob Tolar AuthorDate: Sat May 9 22:03:14 2020 -0700 HIVE-22531 : Fix a couple whitespace errors in error messages (Jacob Tolar via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../hive/ql/ddl/table/info/show/tables/ShowTablesOperation.java | 4 ++-- .../apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java| 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/tables/ShowTablesOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/tables/ShowTablesOperation.java index bb2356a..a776155 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/tables/ShowTablesOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/tables/ShowTablesOperation.java @@ -72,7 +72,7 @@ public class ShowTablesOperation extends DDLOperation { try (DataOutputStream os = DDLUtils.getOutputStream(new Path(desc.getResFile()), context)) { context.getFormatter().showTables(os, tableNames); } catch (Exception e) { - throw new HiveException(e, ErrorMsg.GENERIC_ERROR, "in database" + desc.getDbName()); + throw new HiveException(e, ErrorMsg.GENERIC_ERROR, "in database " + desc.getDbName()); } } @@ -91,7 +91,7 @@ public class ShowTablesOperation extends DDLOperation { try (DataOutputStream os = DDLUtils.getOutputStream(new Path(desc.getResFile()), context)) { context.getFormatter().showTablesExtended(os, tableObjects); } catch (Exception e) { - throw new HiveException(e, ErrorMsg.GENERIC_ERROR, "in database" + desc.getDbName()); + throw new HiveException(e, ErrorMsg.GENERIC_ERROR, "in database " + desc.getDbName()); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index f958720..b8b227c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -1222,7 +1222,7 @@ public class SessionHiveMetaStoreClient extends HiveMetaStoreClient implements I if (partition == null) { throw new NoSuchObjectException("Partition with partition values " + (pvals != null ? Arrays.toString(pvals.toArray()) : "null") + - " for table " + tableName + " in database " + dbName + "and for user " + + " for table " + tableName + " in database " + dbName + " and for user " + userName + " and group names " + (groupNames != null ? Arrays.toString(groupNames.toArray()) : "null") + " is not found."); }
[hive] branch master updated: HIVE-22569 : PartitionPruner use Collections Class (David Mollitor via Vineet Garg)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 4dccfc2 HIVE-22569 : PartitionPruner use Collections Class (David Mollitor via Vineet Garg) 4dccfc2 is described below commit 4dccfc289e5922e03a91175bd57ab32475d23357 Author: David Mollitor AuthorDate: Sat May 9 21:48:22 2020 -0700 HIVE-22569 : PartitionPruner use Collections Class (David Mollitor via Vineet Garg) Signed-off-by: Ashutosh Chauhan --- .../hive/ql/optimizer/calcite/RelOptHiveTable.java | 7 ++-- .../hive/ql/optimizer/ppr/PartitionPruner.java | 5 +-- .../apache/hadoop/hive/ql/parse/ParseContext.java | 2 +- .../hadoop/hive/ql/parse/PrunedPartitionList.java | 37 +++--- 4 files changed, 26 insertions(+), 25 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 1f6e1bc..5b0021f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -515,10 +515,11 @@ public class RelOptHiveTable implements RelOptTable { computePartitionList(hiveConf, null, new HashSet()); } -ColumnStatsList colStatsCached = colStatsCache.get(partitionList.getKey()); +String partitionListKey = partitionList.getKey().orElse(null); +ColumnStatsList colStatsCached = colStatsCache.get(partitionListKey); if (colStatsCached == null) { colStatsCached = new ColumnStatsList(); - colStatsCache.put(partitionList.getKey(), colStatsCached); + colStatsCache.put(partitionListKey, colStatsCached); } // 2. Obtain Col Stats for Non Partition Cols @@ -751,7 +752,7 @@ public class RelOptHiveTable implements RelOptTable { } public String getPartitionListKey() { -return partitionList != null ? partitionList.getKey() : null; +return partitionList != null ? partitionList.getKey().orElse(null) : null; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index 673d858..709b221 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.optimizer.ppr; import java.util.AbstractSequentialList; import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.LinkedList; @@ -207,7 +208,7 @@ public class PartitionPruner extends Transform { if (compactExpr == null || isBooleanExpr(compactExpr)) { if (isFalseExpr(compactExpr)) { return new PrunedPartitionList(tab, key + compactExpr.getExprString(true), -new LinkedHashSet(0), new ArrayList(0), false); +Collections.emptySet(), Collections.emptyList(), false); } // For null and true values, return every partition return getAllPartsFromCacheOrServer(tab, key, true, prunedPartitionsMap); @@ -242,7 +243,7 @@ public class PartitionPruner extends Transform { } catch (HiveException e) { throw new SemanticException(e); } -ppList = new PrunedPartitionList(tab, key, parts, null, unknownPartitions); +ppList = new PrunedPartitionList(tab, key, parts, Collections.emptyList(), unknownPartitions); if (partsCache != null) { partsCache.put(key, ppList); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index bef0217..da443f4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -431,7 +431,7 @@ public class ParseContext { * @return col stats */ public ColumnStatsList getColStatsCached(PrunedPartitionList partList) { -return ctx.getOpContext().getColStatsCache().get(partList.getKey()); +return ctx.getOpContext().getColStatsCache().get(partList.getKey().orElse(null)); } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java index 4068735..398dbf5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java @@ -19,7 +19,10 @@ package org.apache.hadoop.hive.ql.parse; import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.Objects; +import java.util.Optional; import java.util.Set; import
[hive] branch master updated: HIVE-23083 : Enable fast serialization in xprod edge (Rajesh Balamohan via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new f7e5de1 HIVE-23083 : Enable fast serialization in xprod edge (Rajesh Balamohan via Ashutosh Chauhan) f7e5de1 is described below commit f7e5de1e9896dca984e72072a97472d6553ca278 Author: Rajesh Balamohan AuthorDate: Sat May 9 20:18:22 2020 -0700 HIVE-23083 : Enable fast serialization in xprod edge (Rajesh Balamohan via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index 78b5e32..3e8ba08 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -565,6 +565,8 @@ public class DagUtils { UnorderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, ValueHashPartitioner.class.getName()) .setFromConfiguration(conf) + .setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null) + .setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null) .build(); return cpEdgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor); case SIMPLE_EDGE:
[hive] branch master updated: HIVE-22699 : Mask UDFs should mask numeric value 0 (Quanlong Huang via Madhan Neetiraj, Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 3f1358d HIVE-22699 : Mask UDFs should mask numeric value 0 (Quanlong Huang via Madhan Neetiraj, Ashutosh Chauhan) 3f1358d is described below commit 3f1358d57d3a43894b912169a2d66cd6c77ce1d4 Author: Quanlong Huang AuthorDate: Sat May 9 18:43:26 2020 -0700 HIVE-22699 : Mask UDFs should mask numeric value 0 (Quanlong Huang via Madhan Neetiraj, Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../apache/hadoop/hive/ql/udf/generic/GenericUDFMask.java| 12 .../hadoop/hive/ql/udf/generic/GenericUDFMaskFirstN.java | 12 .../hadoop/hive/ql/udf/generic/GenericUDFMaskLastN.java | 12 .../hadoop/hive/ql/udf/generic/GenericUDFMaskShowFirstN.java | 12 .../hadoop/hive/ql/udf/generic/GenericUDFMaskShowLastN.java | 12 ql/src/test/queries/clientpositive/udf_mask.q| 4 ql/src/test/queries/clientpositive/udf_mask_first_n.q| 3 +++ ql/src/test/queries/clientpositive/udf_mask_last_n.q | 3 +++ ql/src/test/queries/clientpositive/udf_mask_show_first_n.q | 3 +++ ql/src/test/queries/clientpositive/udf_mask_show_last_n.q| 3 +++ ql/src/test/results/clientpositive/udf_mask.q.out| 10 +- ql/src/test/results/clientpositive/udf_mask_first_n.q.out| 8 +++- ql/src/test/results/clientpositive/udf_mask_last_n.q.out | 8 +++- .../test/results/clientpositive/udf_mask_show_first_n.q.out | 8 +++- .../test/results/clientpositive/udf_mask_show_last_n.q.out | 8 +++- 15 files changed, 113 insertions(+), 5 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMask.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMask.java index 27c3bf8..812b028 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMask.java @@ -121,6 +121,9 @@ class MaskTransformer extends AbstractTransformer { @Override Byte transform(final Byte value) { +if (value == 0) { + return (byte) maskedNumber; +} byte val = value; if(value < 0) { @@ -145,6 +148,9 @@ class MaskTransformer extends AbstractTransformer { @Override Short transform(final Short value) { +if (value == 0) { + return (short) maskedNumber; +} short val = value; if(value < 0) { @@ -169,6 +175,9 @@ class MaskTransformer extends AbstractTransformer { @Override Integer transform(final Integer value) { +if (value == 0) { + return maskedNumber; +} int val = value; if(value < 0) { @@ -193,6 +202,9 @@ class MaskTransformer extends AbstractTransformer { @Override Long transform(final Long value) { +if (value == 0) { + return (long) maskedNumber; +} long val = value; if(value < 0) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskFirstN.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskFirstN.java index 76ee292..c1ea157 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskFirstN.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskFirstN.java @@ -81,6 +81,9 @@ class MaskFirstNTransformer extends MaskTransformer { @Override Byte transform(final Byte value) { +if (value == 0) { + return charCount > 0 ? (byte) maskedNumber : 0; +} byte val = value; if(value < 0) { @@ -118,6 +121,9 @@ class MaskFirstNTransformer extends MaskTransformer { @Override Short transform(final Short value) { +if (value == 0) { + return charCount > 0 ? (short) maskedNumber : 0; +} short val = value; if(value < 0) { @@ -155,6 +161,9 @@ class MaskFirstNTransformer extends MaskTransformer { @Override Integer transform(final Integer value) { +if (value == 0) { + return charCount > 0 ? maskedNumber : 0; +} int val = value; if(value < 0) { @@ -192,6 +201,9 @@ class MaskFirstNTransformer extends MaskTransformer { @Override Long transform(final Long value) { +if (value == 0) { + return charCount > 0 ? maskedNumber : 0L; +} long val = value; if(value < 0) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskLastN.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskLastN.java index c0c5c61..684c049 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskLastN.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskLastN.java @@ -81,6 +81,9 @@ class MaskLastNTransformer extends MaskTransforme
[hive] branch master updated: HIVE-22769 : Incorrect query results and query failure during split generation for compressed text files (Panos G via Ashutosh Chauhan , Mustafa Iman) Change-Id: Ifb68bd9
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new c4d5dc6 HIVE-22769 : Incorrect query results and query failure during split generation for compressed text files (Panos G via Ashutosh Chauhan , Mustafa Iman) Change-Id: Ifb68bd9e3b103424aed2b9d6871b536a5437a095 Signed-off-by: Ashutosh Chauhan c4d5dc6 is described below commit c4d5dc68843b3764b22fc8bccf211962abf6549d Author: Panos Garefalakis AuthorDate: Tue Apr 28 14:24:25 2020 +0100 HIVE-22769 : Incorrect query results and query failure during split generation for compressed text files (Panos G via Ashutosh Chauhan , Mustafa Iman) Change-Id: Ifb68bd9e3b103424aed2b9d6871b536a5437a095 Signed-off-by: Ashutosh Chauhan --- data/files/compressed_4line_file1.csv | 3 + data/files/compressed_4line_file1.csv.bz2 | Bin 0 -> 55 bytes data/files/compressed_4line_file2.csv | 3 + data/files/compressed_4line_file2.csv.bz2 | Bin 0 -> 60 bytes .../test/resources/testconfiguration.properties| 5 +- .../hive/llap/io/encoded/LineRrOffsetReader.java | 20 +- .../hive/llap/io/encoded/PassThruOffsetReader.java | 65 ++- .../llap/io/encoded/SerDeEncodedDataReader.java| 19 +- .../hive/ql/io/HiveContextAwareRecordReader.java | 2 + .../apache/hadoop/hive/ql/io/HiveInputFormat.java | 21 ++- .../hadoop/hive/ql/io/SkippingTextInputFormat.java | 14 +- .../{LineBufferTest.java => TestLineBuffer.java} | 2 +- .../hive/ql/io/TestSkippingTextInputFormat.java| 205 + .../compressed_skip_header_footer_aggr.q | 28 +++ ...ter_aggregation.q => skip_header_footer_aggr.q} | 0 .../{skiphf_aggr2.q => skip_header_footer_proj.q} | 0 .../llap/compressed_skip_header_footer_aggr.q.out | 64 +++ ...egation.q.out => skip_header_footer_aggr.q.out} | 0 ...f_aggr2.q.out => skip_header_footer_proj.q.out} | 16 +- 19 files changed, 435 insertions(+), 32 deletions(-) diff --git a/data/files/compressed_4line_file1.csv b/data/files/compressed_4line_file1.csv new file mode 100644 index 000..efe52db --- /dev/null +++ b/data/files/compressed_4line_file1.csv @@ -0,0 +1,3 @@ +1,2019-12-31 +2,2019-12-31 +3,2019-12-31 diff --git a/data/files/compressed_4line_file1.csv.bz2 b/data/files/compressed_4line_file1.csv.bz2 new file mode 100644 index 000..ada697d Binary files /dev/null and b/data/files/compressed_4line_file1.csv.bz2 differ diff --git a/data/files/compressed_4line_file2.csv b/data/files/compressed_4line_file2.csv new file mode 100644 index 000..629a850 --- /dev/null +++ b/data/files/compressed_4line_file2.csv @@ -0,0 +1,3 @@ +1,2019-12-31 00 +2,2019-12-31 01 +3,2019-12-31 02 diff --git a/data/files/compressed_4line_file2.csv.bz2 b/data/files/compressed_4line_file2.csv.bz2 new file mode 100644 index 000..4b5353a Binary files /dev/null and b/data/files/compressed_4line_file2.csv.bz2 differ diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index de14c81..39e78d6 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -406,7 +406,9 @@ minillap.query.files=acid_bucket_pruning.q,\ reduce_deduplicate_distinct.q, \ remote_script.q,\ file_with_header_footer.q,\ - file_with_header_footer_aggregation.q,\ + skip_header_footer_aggr.q,\ + skip_header_footer_proj.q,\ + compressed_skip_header_footer_aggr.q,\ external_table_purge.q,\ external_table_with_space_in_location_path.q,\ import_exported_table.q,\ @@ -1041,7 +1043,6 @@ minillaplocal.query.files=\ smb_mapjoin_15.q,\ vectorized_nested_mapjoin.q,\ skiphf_aggr.q,\ - skiphf_aggr2.q,\ multi_insert_lateral_view.q,\ smb_mapjoin_4.q,\ cbo_udf_udaf.q,\ diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/LineRrOffsetReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/LineRrOffsetReader.java index 3fc1fa2..a0f949b 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/LineRrOffsetReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/LineRrOffsetReader.java @@ -24,6 +24,7 @@ import java.lang.reflect.Method; import org.apache.hadoop.hive.llap.io.api.impl.LlapIoImpl; import org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.ReaderWithOffsets; import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.LineRecordReader; final class LineRrOffsetReader extends PassThruOffsetReader { @@ -43,24 +44,29 @@ final class LineRrOffsetReader extends PassThruOffsetReader { isCompressedMethod = isCompressedMethodTmp; } - static Rea
[hive] branch master updated: HIVE-22967 : Support hive.reloadable.aux.jars.path for Hive on Tez ( Toshihiko Uchida via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 48c5ff1 HIVE-22967 : Support hive.reloadable.aux.jars.path for Hive on Tez ( Toshihiko Uchida via Ashutosh Chauhan) 48c5ff1 is described below commit 48c5ff1dc34582cc1d597b984e9e8122c017c99e Author: Ashutosh Chauhan AuthorDate: Sat May 9 18:01:36 2020 -0700 HIVE-22967 : Support hive.reloadable.aux.jars.path for Hive on Tez ( Toshihiko Uchida via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index c5b7171..78b5e32 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -51,6 +51,7 @@ import java.util.zip.ZipOutputStream; import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.hive.common.util.HiveStringUtils; import org.apache.tez.mapreduce.common.MRInputSplitDistributor; import org.apache.tez.mapreduce.hadoop.InputSplitInfo; import org.apache.tez.mapreduce.output.MROutput; @@ -1074,7 +1075,9 @@ public class DagUtils { String addedFiles = Utilities.getLocalResourceFiles(conf, SessionState.ResourceType.FILE); String addedJars = Utilities.getLocalResourceFiles(conf, SessionState.ResourceType.JAR); String auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS); -String allFiles = auxJars + "," + addedJars + "," + addedFiles; +String reloadableAuxJars = SessionState.get() == null ? null : SessionState.get().getReloadableAuxJars(); +String allFiles = +HiveStringUtils.joinIgnoringEmpty(new String[]{auxJars, reloadableAuxJars, addedJars, addedFiles}, ','); return allFiles.split(","); } @@ -1091,10 +1094,12 @@ public class DagUtils { HiveConf.setVar(conf, ConfVars.HIVEADDEDJARS, addedJars); } String auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS); +String reloadableAuxJars = SessionState.get() == null ? null : SessionState.get().getReloadableAuxJars(); // need to localize the additional jars and files // we need the directory on hdfs to which we shall put all these files -String allFiles = auxJars + "," + addedJars + "," + addedFiles; +String allFiles = +HiveStringUtils.joinIgnoringEmpty(new String[]{auxJars, reloadableAuxJars, addedJars, addedFiles}, ','); return allFiles.split(","); }
[hive] branch master updated: HIVE-23184 : Upgrade druid to 0.17.1 ( Nishant Bangarwa via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 9a6ec1e HIVE-23184 : Upgrade druid to 0.17.1 ( Nishant Bangarwa via Ashutosh Chauhan) 9a6ec1e is described below commit 9a6ec1e351d59b76419a21d1f2c8781e306b02d0 Author: Nishant Bangarwa AuthorDate: Mon Apr 13 22:29:26 2020 +0530 HIVE-23184 : Upgrade druid to 0.17.1 ( Nishant Bangarwa via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- data/scripts/kafka_init_data.csv | 2 +- druid-handler/pom.xml | 6 + .../apache/hadoop/hive/druid/DruidKafkaUtils.java | 43 ++- .../hadoop/hive/druid/DruidStorageHandler.java | 61 ++-- .../hive/druid/DruidStorageHandlerUtils.java | 62 ++--- .../hadoop/hive/druid/io/DruidOutputFormat.java| 22 +- .../hive/druid/io/DruidQueryBasedInputFormat.java | 56 +--- .../hadoop/hive/druid/io/DruidRecordWriter.java| 10 +- .../druid/json/KafkaIndexTaskTuningConfig.java | 128 + .../hive/druid/json/KafkaSupervisorSpec.java | 20 +- .../druid/json/KafkaSupervisorTuningConfig.java| 208 +- .../hadoop/hive/druid/json/KafkaTuningConfig.java | 307 .../json/SeekableStreamIndexTaskTuningConfig.java | 308 + .../json/SeekableStreamSupervisorTuningConfig.java | 59 .../hive/druid/serde/DruidQueryRecordReader.java | 3 +- .../druid/serde/DruidSelectQueryRecordReader.java | 92 -- .../hadoop/hive/druid/TestDruidStorageHandler.java | 8 +- .../druid/TestHiveDruidQueryBasedInputFormat.java | 44 --- .../hadoop/hive/druid/serde/TestDruidSerDe.java| 244 .../hadoop/hive/ql/io/TestDruidRecordWriter.java | 21 +- itests/qtest-druid/pom.xml | 22 ++ pom.xml| 2 +- .../druid/druidkafkamini_delimited.q.out | 2 +- .../druidmini_semijoin_reduction_all_types.q.out | 24 +- 24 files changed, 753 insertions(+), 1001 deletions(-) diff --git a/data/scripts/kafka_init_data.csv b/data/scripts/kafka_init_data.csv index 5dc094e..d818144 100644 --- a/data/scripts/kafka_init_data.csv +++ b/data/scripts/kafka_init_data.csv @@ -1,4 +1,4 @@ -"2013-08-31T01:02:33Z", "Gypsy Danger","en","nuclear","true","true","false","false","article","North America","United States","Bay Area","San Francisco",57,200,-143 +"2013-08-31T01:02:33Z","Gypsy Danger","en","nuclear","true","true","false","false","article","North America","United States","Bay Area","San Francisco",57,200,-143 "2013-08-31T03:32:45Z","Striker Eureka","en","speed","false","true","true","false","wikipedia","Australia","Australia","Cantebury","Syndey",459,129,330 "2013-08-31T07:11:21Z","Cherno Alpha","ru","masterYi","false","true","true","false","article","Asia","Russia","Oblast","Moscow",123,12,111 "2013-08-31T11:58:39Z","Crimson Typhoon","zh","triplets","true","false","true","false","wikipedia","Asia","China","Shanxi","Taiyuan",905,5,900 diff --git a/druid-handler/pom.xml b/druid-handler/pom.xml index c7a2d4c..e6ca298 100644 --- a/druid-handler/pom.xml +++ b/druid-handler/pom.xml @@ -293,6 +293,12 @@ + + org.apache.logging.log4j + log4j-api + ${log4j2.version} + test + diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidKafkaUtils.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidKafkaUtils.java index b56d48a..fb6ce30 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidKafkaUtils.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidKafkaUtils.java @@ -29,8 +29,8 @@ import org.apache.druid.data.input.impl.JSONParseSpec; import org.apache.druid.data.input.impl.StringInputRowParser; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.java.util.http.client.Request; -import org.apache.druid.java.util.http.client.response.FullResponseHandler; -import org.apache.druid.java.util.http.client.response.FullResponseHolder; +import org.apache.druid.java.util.http.client.response.StringFullResponseHandler; +import
[hive] branch master updated: HIVE-23350 : Upgrade DBCP To DBCP 2.7.0 (David Mollitor via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 809a859 HIVE-23350 : Upgrade DBCP To DBCP 2.7.0 (David Mollitor via Ashutosh Chauhan) 809a859 is described below commit 809a859f98b520f2989db6aff7bd5451bb546596 Author: David Mollitor AuthorDate: Sat May 9 10:24:02 2020 -0700 HIVE-23350 : Upgrade DBCP To DBCP 2.7.0 (David Mollitor via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- itests/hive-unit/pom.xml | 6 -- jdbc-handler/pom.xml | 1 - metastore/pom.xml | 10 pom.xml| 10 ++-- standalone-metastore/metastore-common/pom.xml | 4 -- standalone-metastore/metastore-server/pom.xml | 4 +- .../datasource/DbCPDataSourceProvider.java | 67 +++--- .../datasource/TestDataSourceProviderFactory.java | 3 +- .../schematool/TestSchemaToolForMetastore.java | 2 +- standalone-metastore/pom.xml | 12 ++-- 10 files changed, 52 insertions(+), 67 deletions(-) diff --git a/itests/hive-unit/pom.xml b/itests/hive-unit/pom.xml index 2219002..026c465 100644 --- a/itests/hive-unit/pom.xml +++ b/itests/hive-unit/pom.xml @@ -431,12 +431,6 @@ ${plexus.version} test - - commons-dbcp - commons-dbcp - ${commons-dbcp.version} - test - diff --git a/jdbc-handler/pom.xml b/jdbc-handler/pom.xml index b2d9ec8..da137cd 100644 --- a/jdbc-handler/pom.xml +++ b/jdbc-handler/pom.xml @@ -89,7 +89,6 @@ org.apache.commons commons-dbcp2 - ${commons-dbcp2.version} diff --git a/metastore/pom.xml b/metastore/pom.xml index cca56a5..f1d5b66 100644 --- a/metastore/pom.xml +++ b/metastore/pom.xml @@ -87,16 +87,6 @@ ${derby.version} - commons-pool - commons-pool - ${commons-pool.version} - - - commons-dbcp - commons-dbcp - ${commons-dbcp.version} - - org.antlr antlr-runtime ${antlr.version} diff --git a/pom.xml b/pom.xml index 2e1e9bb..1432bcf 100644 --- a/pom.xml +++ b/pom.xml @@ -141,10 +141,7 @@ 1.1 2.6 3.9 -1.5.4 -1.4 -2.6.1 -2.6.0 +2.7.0 1.8 10.14.1.0 3.1.0 @@ -351,6 +348,11 @@ ${commons-io.version} +org.apache.commons +commons-dbcp2 +${commons-dbcp2.version} + + io.netty netty-all ${netty.version} diff --git a/standalone-metastore/metastore-common/pom.xml b/standalone-metastore/metastore-common/pom.xml index ab26915..e8748c7 100644 --- a/standalone-metastore/metastore-common/pom.xml +++ b/standalone-metastore/metastore-common/pom.xml @@ -69,10 +69,6 @@ HikariCP - commons-dbcp - commons-dbcp - - io.dropwizard.metrics metrics-core diff --git a/standalone-metastore/metastore-server/pom.xml b/standalone-metastore/metastore-server/pom.xml index 7355e93..021c25a 100644 --- a/standalone-metastore/metastore-server/pom.xml +++ b/standalone-metastore/metastore-server/pom.xml @@ -71,8 +71,8 @@ HikariCP - commons-dbcp - commons-dbcp + org.apache.commons + commons-dbcp2 io.dropwizard.metrics diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DbCPDataSourceProvider.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DbCPDataSourceProvider.java index c687a25..1a5a1d2 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DbCPDataSourceProvider.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DbCPDataSourceProvider.java @@ -17,24 +17,26 @@ */ package org.apache.hadoop.hive.metastore.datasource; -import org.apache.commons.dbcp.BasicDataSource; -import org.apache.commons.dbcp.ConnectionFactory; -import org.apache.commons.dbcp.DataSourceConnectionFactory; -import org.apache.commons.dbcp.PoolableConnectionFactory; -import org.apache.commons.dbcp.PoolingDataSource; -import org.apache.commons.pool.impl.GenericObjectPool; +import static org.apache.hadoop.hive.metastore.DatabaseProduct.MYSQL; +import static org.apache.hadoop.hive.metastore.DatabaseProduct.determineDatabaseProduct; + +import java.sql.SQLException; + +import javax.sql.DataSource; + +import org.apache.commons.dbcp2.BasicDataSource; +import org.apache.commons.dbcp2.ConnectionFactory; +import org.apache.commons.dbcp2.DataSourceConnectionFactory; +import org.apache.commons.dbcp2.PoolableConnectionFactory; +import
[hive] branch master updated: HIVE-23393 : LLapInputFormat reader policy for Random IO formats (Panos G via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 8c88676 HIVE-23393 : LLapInputFormat reader policy for Random IO formats (Panos G via Ashutosh Chauhan) 8c88676 is described below commit 8c88676a91115be56eee1a2fca5c9c2c3ee0402d Author: Panagiotis Garefalakis AuthorDate: Sat May 9 10:17:42 2020 -0700 HIVE-23393 : LLapInputFormat reader policy for Random IO formats (Panos G via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java | 11 +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index ac1aca8..e184655 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -19,6 +19,9 @@ package org.apache.hadoop.hive.llap.io.api.impl; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.s3a.S3AInputPolicy; import org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport; import org.apache.hadoop.hive.ql.io.BatchToRowInputFormat; import org.apache.hadoop.hive.conf.HiveConf; @@ -62,6 +65,9 @@ import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hive.common.util.HiveStringUtils; +import static org.apache.hadoop.hive.common.FileUtils.isS3a; +import static org.apache.hadoop.hive.ql.io.HiveInputFormat.isRandomAccessInputFormat; + public class LlapInputFormat implements InputFormat, VectorizedInputFormatInterface, SelfDescribingInputFormatInterface, AvoidSplitCombination { @@ -100,6 +106,11 @@ public class LlapInputFormat implements InputFormat * @param inputFormat * @return */ - private static boolean isRandomAccessInputFormat(InputFormat inputFormat) { + public static boolean isRandomAccessInputFormat(InputFormat inputFormat) { if (inputFormat instanceof OrcInputFormat || inputFormat instanceof VectorizedParquetInputFormat) { return true; @@ -449,7 +449,7 @@ public class HiveInputFormat FileSystem splitFileSystem = splitPath.getFileSystem(job); if (isS3a(splitFileSystem) && isRandomAccessInputFormat(inputFormat)) { - LOG.debug("Changing S3A input policy to RANDOM for split {}", splitPath); + LOG.debug("Changing S3A input policy to RANDOM"); ((S3AFileSystem) splitFileSystem).setInputPolicy(S3AInputPolicy.Random); }
[hive] branch master updated: HIVE-23424 : Remove Dependency on Log4J from hive-shims-common (David Mollitor via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new fd82258 HIVE-23424 : Remove Dependency on Log4J from hive-shims-common (David Mollitor via Ashutosh Chauhan) fd82258 is described below commit fd82258d0e1c043579b4378e2bedf73bd5bc74fc Author: David Mollitor AuthorDate: Sat May 9 09:43:53 2020 -0700 HIVE-23424 : Remove Dependency on Log4J from hive-shims-common (David Mollitor via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- shims/common/pom.xml | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/shims/common/pom.xml b/shims/common/pom.xml index e2a4711..34e3b43 100644 --- a/shims/common/pom.xml +++ b/shims/common/pom.xml @@ -36,11 +36,6 @@ - org.apache.logging.log4j - log4j-slf4j-impl - ${log4j2.version} - - com.google.guava guava ${guava.version} @@ -86,6 +81,10 @@ + org.slf4j + slf4j-api + + junit junit test
[hive] branch master updated: HIVE-23282 : Reduce number of DB calls in ObjectStore::getPartitionsByExprInternal (Attila Magyar via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 1caa309 HIVE-23282 : Reduce number of DB calls in ObjectStore::getPartitionsByExprInternal (Attila Magyar via Ashutosh Chauhan) 1caa309 is described below commit 1caa3092789a010d87d8ea6a3edf63c6a6e0feb0 Author: Attila Magyar AuthorDate: Wed May 6 08:44:09 2020 -0700 HIVE-23282 : Reduce number of DB calls in ObjectStore::getPartitionsByExprInternal (Attila Magyar via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../ql/metadata/SessionHiveMetaStoreClient.java| 3 +- .../hadoop/hive/metastore/MetaStoreDirectSql.java | 94 .../apache/hadoop/hive/metastore/ObjectStore.java | 267 ++--- .../hive/metastore/parser/ExpressionTree.java | 73 +++--- 4 files changed, 310 insertions(+), 127 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index 4f58cd9..f958720 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -1543,9 +1543,10 @@ public class SessionHiveMetaStoreClient extends HiveMetaStoreClient implements I private String generateJDOFilter(org.apache.hadoop.hive.metastore.api.Table table, ExpressionTree exprTree) throws MetaException { +assert table != null; ExpressionTree.FilterBuilder filterBuilder = new ExpressionTree.FilterBuilder(true); Map params = new HashMap<>(); -exprTree.generateJDOFilterFragment(conf, table, params, filterBuilder); +exprTree.generateJDOFilterFragment(conf, params, filterBuilder, table.getPartitionKeys()); StringBuilder stringBuilder = new StringBuilder(filterBuilder.getFilter()); // replace leading && stringBuilder.replace(0, 4, ""); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index d155887..b69277e 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -43,7 +43,6 @@ import javax.jdo.Query; import javax.jdo.Transaction; import javax.jdo.datastore.JDOConnection; -import com.google.common.collect.ImmutableMap; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.AggregateStatsCache.AggrColStats; @@ -93,13 +92,14 @@ import org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator; import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode; import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeVisitor; import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hive.common.util.BloomFilter; import org.datanucleus.store.rdbms.query.ForwardQueryResult; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; /** @@ -530,13 +530,10 @@ class MetaStoreDirectSql { * @param max The maximum number of partitions to return. * @return List of partitions. */ - public List getPartitionsViaSqlFilter( + public List getPartitionsViaSqlFilter(String catName, String dbName, String tableName, SqlFilterForPushdown filter, Integer max) throws MetaException { -Boolean isViewTable = isViewTable(filter.table); -String catName = filter.table.isSetCatName() ? filter.table.getCatName() : -DEFAULT_CATALOG_NAME; List partitionIds = getPartitionIdsViaSqlFilter(catName, -filter.table.getDbName(), filter.table.getTableName(), filter.filter, filter.params, +dbName, tableName, filter.filter, filter.params, filter.joins, max); if (partitionIds.isEmpty()) { return Collections.emptyList(); // no partitions, bail early. @@ -544,8 +541,8 @@ class MetaStoreDirectSql { return Batchable.runBatched(batchSize, partitionIds, new Batchable() { @Override public List run(List input) throws MetaException { -return getPartitionsFromPartitionIds(catName, filter.table.getDbName(), -filter.table.getTableName(), isViewTable, input
[hive] branch master updated: HIVE-23321 : Tolerate in tests that metastore is not removing rows from the skewed_string_list_values table (Zoltan Haindrich via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new b283bbd HIVE-23321 : Tolerate in tests that metastore is not removing rows from the skewed_string_list_values table (Zoltan Haindrich via Ashutosh Chauhan) b283bbd is described below commit b283bbda0aa93f10e3a4418bef2534cbb510b5e5 Author: Zoltan Haindrich AuthorDate: Sun May 3 10:15:58 2020 -0700 HIVE-23321 : Tolerate in tests that metastore is not removing rows from the skewed_string_list_values table (Zoltan Haindrich via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- ql/src/test/queries/clientpositive/sysdb.q | 5 +++-- ql/src/test/results/clientpositive/llap/sysdb.q.out | 10 -- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/ql/src/test/queries/clientpositive/sysdb.q b/ql/src/test/queries/clientpositive/sysdb.q index 2463ce6..c2201a9 100644 --- a/ql/src/test/queries/clientpositive/sysdb.q +++ b/ql/src/test/queries/clientpositive/sysdb.q @@ -81,9 +81,10 @@ select skewed_col_name from skewed_col_names order by skewed_col_name limit 5; select count(*) from skewed_col_value_loc_map; -select count(*) from skewed_string_list; +-- HIVE-23289: there are fallout in these tables from previous tests +select * from skewed_string_list limit 0; -select count(*) from skewed_string_list_values; +select * from skewed_string_list_values limit 0; select count(*) from skewed_values; diff --git a/ql/src/test/results/clientpositive/llap/sysdb.q.out b/ql/src/test/results/clientpositive/llap/sysdb.q.out index 619c0d2..bffbedb 100644 --- a/ql/src/test/results/clientpositive/llap/sysdb.q.out +++ b/ql/src/test/results/clientpositive/llap/sysdb.q.out @@ -1228,24 +1228,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: sys@skewed_col_value_loc_map A masked pattern was here 0 -PREHOOK: query: select count(*) from skewed_string_list +PREHOOK: query: select * from skewed_string_list limit 0 PREHOOK: type: QUERY PREHOOK: Input: sys@skewed_string_list A masked pattern was here -POSTHOOK: query: select count(*) from skewed_string_list +POSTHOOK: query: select * from skewed_string_list limit 0 POSTHOOK: type: QUERY POSTHOOK: Input: sys@skewed_string_list A masked pattern was here -3 -PREHOOK: query: select count(*) from skewed_string_list_values +PREHOOK: query: select * from skewed_string_list_values limit 0 PREHOOK: type: QUERY PREHOOK: Input: sys@skewed_string_list_values A masked pattern was here -POSTHOOK: query: select count(*) from skewed_string_list_values +POSTHOOK: query: select * from skewed_string_list_values limit 0 POSTHOOK: type: QUERY POSTHOOK: Input: sys@skewed_string_list_values A masked pattern was here -3 PREHOOK: query: select count(*) from skewed_values PREHOOK: type: QUERY PREHOOK: Input: sys@skewed_values
[hive] branch master updated: HIVE-23252 : Change spark related tests to be optional (Zoltan Haindrich via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new f85ccf2 HIVE-23252 : Change spark related tests to be optional (Zoltan Haindrich via Ashutosh Chauhan) f85ccf2 is described below commit f85ccf2eb4077ce1a0eb13063cd540737fea3458 Author: Zoltan Haindrich AuthorDate: Sun May 3 10:06:45 2020 -0700 HIVE-23252 : Change spark related tests to be optional (Zoltan Haindrich via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- itests/hive-unit/pom.xml | 5 - itests/pom.xml | 5 - pom.xml | 7 +-- 3 files changed, 1 insertion(+), 16 deletions(-) diff --git a/itests/hive-unit/pom.xml b/itests/hive-unit/pom.xml index 103975f..2219002 100644 --- a/itests/hive-unit/pom.xml +++ b/itests/hive-unit/pom.xml @@ -442,11 +442,6 @@ spark-test - - - !skipSparkTests - - org.apache.spark diff --git a/itests/pom.xml b/itests/pom.xml index 3dc95bf..d4fb252 100644 --- a/itests/pom.xml +++ b/itests/pom.xml @@ -476,11 +476,6 @@ spark-test - - - !skipSparkTests - - qtest-spark diff --git a/pom.xml b/pom.xml index 29cde34..2e1e9bb 100644 --- a/pom.xml +++ b/pom.xml @@ -99,7 +99,7 @@ - + **/TestSparkStatistics*,**/TestSparkSessionTimeout*,**/TestJdbcWithMiniHS2ErasureCoding*,**/TestLocalHiveSparkClient* 1.0b3 @@ -1656,11 +1656,6 @@ spark-test - - - !skipSparkTests - - **/ql/exec/spark/session/TestSparkSessionManagerImpl.java,**/TestMultiSessionsHS2WithLocalClusterSpark.java,**/TestJdbcWithLocalClusterSpark.java
[hive] branch master updated: HIVE-23215 : Make FilterContext and MutableFilterContext interfaces (Owen O'malley, Panos G via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 1f4bc02 HIVE-23215 : Make FilterContext and MutableFilterContext interfaces (Owen O'malley, Panos G via Ashutosh Chauhan) 1f4bc02 is described below commit 1f4bc024d202ff69ff512d4ba50ad9719cbe2854 Author: Owen O'Malley AuthorDate: Sun May 3 01:05:47 2020 -0700 HIVE-23215 : Make FilterContext and MutableFilterContext interfaces (Owen O'malley, Panos G via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../hive/ql/io/filter/TestFilterContext.java | 38 +++- .../hive/ql/exec/vector/VectorizedRowBatch.java| 63 +++- .../hadoop/hive/ql/io/filter/FilterContext.java| 28 ++--- .../hive/ql/io/filter/MutableFilterContext.java| 68 +++--- 4 files changed, 84 insertions(+), 113 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java b/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java index 0bda620..c59cc09 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/filter/TestFilterContext.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.io.filter; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.junit.Assert; import org.junit.Test; @@ -43,7 +44,7 @@ public class TestFilterContext { @Test public void testInitFilterContext(){ -MutableFilterContext mutableFilterContext = new MutableFilterContext(); +MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0); int[] selected = makeValidSelected(); mutableFilterContext.setFilterContext(true, selected, selected.length); @@ -57,7 +58,7 @@ public class TestFilterContext { @Test public void testResetFilterContext(){ -MutableFilterContext mutableFilterContext = new MutableFilterContext(); +MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0); int[] selected = makeValidSelected(); mutableFilterContext.setFilterContext(true, selected, selected.length); @@ -67,55 +68,32 @@ public class TestFilterContext { Assert.assertEquals(512, filterContext.getSelectedSize()); Assert.assertEquals(512, filterContext.getSelected().length); -filterContext.resetFilterContext(); +filterContext.reset(); Assert.assertEquals(false, filterContext.isSelectedInUse()); Assert.assertEquals(0, filterContext.getSelectedSize()); -Assert.assertEquals(null, filterContext.getSelected()); } @Test(expected=AssertionError.class) public void testInitInvalidFilterContext(){ -MutableFilterContext mutableFilterContext = new MutableFilterContext(); +MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0); int[] selected = makeInvalidSelected(); mutableFilterContext.setFilterContext(true, selected, selected.length); } - - @Test - public void testCopyFilterContext(){ -MutableFilterContext mutableFilterContext = new MutableFilterContext(); -int[] selected = makeValidSelected(); - -mutableFilterContext.setFilterContext(true, selected, selected.length); - -MutableFilterContext mutableFilterContextToCopy = new MutableFilterContext(); -mutableFilterContextToCopy.setFilterContext(true, new int[] {100}, 1); - -mutableFilterContext.copyFilterContextFrom(mutableFilterContextToCopy); -FilterContext filterContext = mutableFilterContext.immutable(); - -Assert.assertEquals(true, filterContext.isSelectedInUse()); -Assert.assertEquals(1, filterContext.getSelectedSize()); -Assert.assertEquals(100, filterContext.getSelected()[0]); -// make sure we kept the remaining array space -Assert.assertEquals(512, filterContext.getSelected().length); - } - - @Test public void testBorrowSelected(){ -MutableFilterContext mutableFilterContext = new MutableFilterContext(); +MutableFilterContext mutableFilterContext = new VectorizedRowBatch(0); mutableFilterContext.setFilterContext(true, new int[] {100, 200}, 2); -int[] borrowedSelected = mutableFilterContext.borrowSelected(1); +int[] borrowedSelected = mutableFilterContext.updateSelected(1); // make sure we borrowed the existing array Assert.assertEquals(2, borrowedSelected.length); Assert.assertEquals(100, borrowedSelected[0]); Assert.assertEquals(200, borrowedSelected[1]); -borrowedSelected = mutableFilterContext.borrowSelected(3); +borrowedSelected = mutableFilterContext.updateSelected(3); Assert.assertEquals(3, borrowedSelected.length); Assert.assertEquals(0, borrowedSelected[0]); Assert.assertEquals(0, borrowedSelected[1]); diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec
[hive] branch master updated: HIVE-23322 : Update some tests to better tolerate a more busy environment (Zoltan Haindrich via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new e48a682 HIVE-23322 : Update some tests to better tolerate a more busy environment (Zoltan Haindrich via Ashutosh Chauhan) e48a682 is described below commit e48a6827dc78c38b282b590e95152cdabdd79d85 Author: Zoltan Haindrich AuthorDate: Sun May 3 01:00:06 2020 -0700 HIVE-23322 : Update some tests to better tolerate a more busy environment (Zoltan Haindrich via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../hive/common/metrics/metrics2/TestCodahaleMetrics.java | 7 ++- .../hive/llap/tezplugins/TestLlapTaskCommunicator.java| 4 ++-- .../hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java | 15 +-- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/common/src/test/org/apache/hadoop/hive/common/metrics/metrics2/TestCodahaleMetrics.java b/common/src/test/org/apache/hadoop/hive/common/metrics/metrics2/TestCodahaleMetrics.java index 1c49d95..9c4e475 100644 --- a/common/src/test/org/apache/hadoop/hive/common/metrics/metrics2/TestCodahaleMetrics.java +++ b/common/src/test/org/apache/hadoop/hive/common/metrics/metrics2/TestCodahaleMetrics.java @@ -154,7 +154,12 @@ public class TestCodahaleMetrics { public void testFileReporting() throws Exception { int runs = 5; String counterName = "count2"; -for (int i = 0; i < runs; i++) { + +// on the first write the metrics writer should initialize stuff +MetricsFactory.getInstance().incrementCounter(counterName); +sleep(5 * REPORT_INTERVAL_MS); + +for (int i = 1; i <= runs; i++) { MetricsFactory.getInstance().incrementCounter(counterName); sleep(REPORT_INTERVAL_MS + REPORT_INTERVAL_MS / 2); Assert.assertEquals(i + 1, getCounterValue(counterName)); diff --git a/llap-tez/src/test/org/apache/hadoop/hive/llap/tezplugins/TestLlapTaskCommunicator.java b/llap-tez/src/test/org/apache/hadoop/hive/llap/tezplugins/TestLlapTaskCommunicator.java index 9b4ac27..3bbbdf3 100644 --- a/llap-tez/src/test/org/apache/hadoop/hive/llap/tezplugins/TestLlapTaskCommunicator.java +++ b/llap-tez/src/test/org/apache/hadoop/hive/llap/tezplugins/TestLlapTaskCommunicator.java @@ -66,7 +66,7 @@ import org.mockito.stubbing.Answer; public class TestLlapTaskCommunicator { - @Test (timeout = 5000) + @Test (timeout = 3) public void testEntityTracker1() { LlapTaskCommunicator.EntityTracker entityTracker = new LlapTaskCommunicator.EntityTracker(); @@ -121,7 +121,7 @@ public class TestLlapTaskCommunicator { } - @Test(timeout = 5000) + @Test(timeout = 3) public void testFinishableStateUpdateFailure() throws Exception { LlapTaskCommunicatorWrapperForTest wrapper = null; diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java index 0d3..fbf2b8b 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java @@ -25,7 +25,6 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.session.SessionState; import org.junit.Assert; -import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; @@ -41,11 +40,6 @@ public class TestSQL11ReservedKeyWordsNegative { private static HiveConf conf = new HiveConf(SemanticAnalyzer.class); private static ParseDriver pd = new ParseDriver(); - @BeforeClass - public static void initialize() { -SessionState.start(conf); - } - private static ASTNode parse(String query) throws ParseException { ASTNode nd = null; try { @@ -57,6 +51,11 @@ public class TestSQL11ReservedKeyWordsNegative { } public static class TestSQL11ReservedKeyWordsNegativeMisc { +@BeforeClass +public static void initialize() { + SessionState.start(conf); +} + @Test public void testSQL11ReservedKeyWords_KILL() { try { @@ -72,6 +71,10 @@ public class TestSQL11ReservedKeyWordsNegative { @RunWith(Parameterized.class) public static class TestSQL11ReservedKeyWordsNegativeParametrized { +@BeforeClass +public static void initialize() { + SessionState.start(conf); +} @Parameters(name = "{0}") public static Collection data() {
[hive] branch master updated: HIVE-23240 : loadDynamicPartition complains about static partitions even when they are provided in the description (Reza Safi via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new d3ee2ae HIVE-23240 : loadDynamicPartition complains about static partitions even when they are provided in the description (Reza Safi via Ashutosh Chauhan) d3ee2ae is described below commit d3ee2ae414686de90f13e5db1d857ec51e783bad Author: Ashutosh Chauhan AuthorDate: Sun May 3 00:55:38 2020 -0700 HIVE-23240 : loadDynamicPartition complains about static partitions even when they are provided in the description (Reza Safi via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 7bb90d3..bd1fae1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -2879,7 +2879,12 @@ private void constructOneLBLocationMap(FileStatus fSta, // calculate full path spec for each valid partition path validPartitions.forEach(partPath -> { Map fullPartSpec = Maps.newLinkedHashMap(partSpec); - if (!Warehouse.makeSpecFromName(fullPartSpec, partPath, new HashSet<>(partSpec.keySet( { + String staticParts = Warehouse.makeDynamicPartName(partSpec); + Path computedPath = partPath; + if (!staticParts.isEmpty() ) { +computedPath = new Path(new Path(partPath.getParent(), staticParts), partPath.getName()); + } + if (!Warehouse.makeSpecFromName(fullPartSpec, computedPath, new HashSet<>(partSpec.keySet( { Utilities.FILE_OP_LOGGER.warn("Ignoring invalid DP directory " + partPath); } else { PartitionDetails details = new PartitionDetails();
[hive] branch master updated: HIVE-22737 : Concurrency: FunctionRegistry::getFunctionInfo is static object locked (Ashutosh Chauhan via Rajesh Balamohan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 2b177db HIVE-22737 : Concurrency: FunctionRegistry::getFunctionInfo is static object locked (Ashutosh Chauhan via Rajesh Balamohan) 2b177db is described below commit 2b177db2fd71ccd602247fae87362801a9095f1a Author: Ashutosh Chauhan AuthorDate: Sat Apr 25 19:00:13 2020 -0700 HIVE-22737 : Concurrency: FunctionRegistry::getFunctionInfo is static object locked (Ashutosh Chauhan via Rajesh Balamohan) --- .../org/apache/hadoop/hive/ql/exec/Registry.java | 25 ++ .../results/clientpositive/llap/udf_substr.q.out | 2 +- .../clientpositive/llap/udf_substring.q.out| 2 +- 3 files changed, 8 insertions(+), 21 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java index 40e9e97..6ceea2f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java @@ -78,7 +78,7 @@ public class Registry { /** * The mapping from expression function names to expression classes. */ - private final Map mFunctions = new LinkedHashMap(); + private final Map mFunctions = new ConcurrentHashMap(); private final Set> builtIns = Collections.synchronizedSet(new HashSet>()); /** * Persistent map contains refcounts that are only modified in synchronized methods for now, @@ -91,6 +91,7 @@ public class Registry { /** * The epic lock for the registry. This was added to replace the synchronized methods with * minimum disruption; the locking should really be made more granular here. + * This lock is protecting mFunctions, builtIns and persistent maps. */ private final ReentrantLock lock = new ReentrantLock(); @@ -331,11 +332,9 @@ public class Registry { * @return */ public FunctionInfo getFunctionInfo(String functionName) throws SemanticException { -lock.lock(); -try { functionName = functionName.toLowerCase(); if (FunctionUtils.isQualifiedFunctionName(functionName)) { -FunctionInfo functionInfo = getQualifiedFunctionInfoUnderLock(functionName); +FunctionInfo functionInfo = getQualifiedFunctionInfo(functionName); addToCurrentFunctions(functionName, functionInfo); return functionInfo; } @@ -348,14 +347,10 @@ public class Registry { if (functionInfo == null) { functionName = FunctionUtils.qualifyFunctionName( functionName, SessionState.get().getCurrentDatabase().toLowerCase()); -functionInfo = getQualifiedFunctionInfoUnderLock(functionName); +functionInfo = getQualifiedFunctionInfo(functionName); } addToCurrentFunctions(functionName, functionInfo); return functionInfo; -} finally { - lock.unlock(); -} - } private void addToCurrentFunctions(String functionName, FunctionInfo functionInfo) { @@ -633,7 +628,7 @@ public class Registry { return null; } - private FunctionInfo getQualifiedFunctionInfoUnderLock(String qualifiedName) throws SemanticException { + private FunctionInfo getQualifiedFunctionInfo(String qualifiedName) throws SemanticException { FunctionInfo info = mFunctions.get(qualifiedName); if (info != null && info.isBlockedFunction()) { throw new SemanticException ("UDF " + qualifiedName + " is not allowed"); @@ -658,15 +653,7 @@ public class Registry { if (conf == null || !HiveConf.getBoolVar(conf, ConfVars.HIVE_ALLOW_UDF_LOAD_ON_DEMAND)) { return null; } -// This is a little bit weird. We'll do the MS call outside of the lock. Our caller calls us -// under lock, so we'd preserve the lock state for them; their finally block will release the -// lock correctly. See the comment on the lock field - the locking needs to be reworked. -lock.unlock(); -try { - return getFunctionInfoFromMetastoreNoLock(qualifiedName, conf); -} finally { - lock.lock(); -} +return getFunctionInfoFromMetastoreNoLock(qualifiedName, conf); } // should be called after session registry is checked diff --git a/ql/src/test/results/clientpositive/llap/udf_substr.q.out b/ql/src/test/results/clientpositive/llap/udf_substr.q.out index 00fa606..7c1a0f1 100644 --- a/ql/src/test/results/clientpositive/llap/udf_substr.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_substr.q.out @@ -8,7 +8,7 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED substr POSTHOOK: type: DESCFUNCTION substr(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstr(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of le
[hive] branch master updated: HIVE-23345: Enable Parquet timestamps types (INT64 and INT96) conversion to Hive BIGINT type Adding test cases (Panos G via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 659e28d HIVE-23345: Enable Parquet timestamps types (INT64 and INT96) conversion to Hive BIGINT type Adding test cases (Panos G via Ashutosh Chauhan) 659e28d is described below commit 659e28de0b609d114e20e6294348abb74f49f6e0 Author: Panos Garefalakis AuthorDate: Fri May 1 14:24:39 2020 +0100 HIVE-23345: Enable Parquet timestamps types (INT64 and INT96) conversion to Hive BIGINT type Adding test cases (Panos G via Ashutosh Chauhan) Change-Id: I8666a95cc7ff7495a86b960c2ea173cd875bfa4f Signed-off-by: Ashutosh Chauhan --- .../test/resources/testconfiguration.properties| 1 + .../hive/ql/io/parquet/convert/ETypeConverter.java | 40 +-- .../ql/io/parquet/convert/TestETypeConverter.java | 29 +++ .../clientpositive/parquet_timestampt_to_bigint.q | 25 + .../llap/parquet_timestampt_to_bigint.q.out| 60 ++ 5 files changed, 152 insertions(+), 3 deletions(-) diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index b639718..5468728 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -695,6 +695,7 @@ minillaplocal.query.files=\ parquet_legacy_mixed_date.q,\ parquet_legacy_mixed_timestamp.q,\ parquet_proleptic_mixed_date.q,\ + parquet_timestampt_to_bigint.q,\ partition_ctas.q,\ partition_multilevels.q,\ partition_shared_scan.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java index 6082321..8e436bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java @@ -14,6 +14,8 @@ package org.apache.hadoop.hive.ql.io.parquet.convert; import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.util.ArrayList; import java.util.Map; import java.util.Optional; @@ -43,6 +45,7 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; +import org.apache.parquet.Preconditions; import org.apache.parquet.column.Dictionary; import org.apache.parquet.io.api.Binary; import org.apache.parquet.io.api.PrimitiveConverter; @@ -662,9 +665,25 @@ public enum ETypeConverter { }; } }, - ETIMESTAMP_CONVERTER(TimestampWritableV2.class) { + EINT96_TIMESTAMP_CONVERTER(TimestampWritableV2.class) { @Override PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { + if (hiveTypeInfo != null) { +String typeName = TypeInfoUtils.getBaseName(hiveTypeInfo.getTypeName()); +switch (typeName) { + case serdeConstants.BIGINT_TYPE_NAME: +return new BinaryConverter(type, parent, index) { + @Override + protected LongWritable convert(Binary binary) { +Preconditions.checkArgument(binary.length() == 12, "Must be 12 bytes"); +ByteBuffer buf = binary.toByteBuffer(); +buf.order(ByteOrder.LITTLE_ENDIAN); +long longVal = buf.getLong(); +return new LongWritable(longVal); + } +}; +} + } return new BinaryConverter(type, parent, index) { @Override protected TimestampWritableV2 convert(Binary binary) { @@ -690,6 +709,22 @@ public enum ETypeConverter { @Override PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { + if (hiveTypeInfo != null) { +String typeName = TypeInfoUtils.getBaseName(hiveTypeInfo.getTypeName()); +switch (typeName) { + case serdeConstants.BIGINT_TYPE_NAME: +return new BinaryConverter(type, parent, index) { + @Override + protected LongWritable convert(Binary binary) { +Preconditions.checkArgument(binary.length() == 8, "Must be 8 bytes"); +ByteBuffer buf = binary.toByteBuffer(); +buf.order(ByteOrder.LITTLE_ENDIAN); +long longVal = buf.getLong(); +return new LongWritable(longVal); + } +}; +} + } return new PrimitiveConverter() { @Override public void addLong(final long value) { @@ -735,8 +770,7 @@ public enum ETypeConverter { public static PrimitiveConverter getNewConverter(fina
[hive] branch master updated: HIVE-23356 : Hash aggregation is always disabled while processing querys with grouping sets expressions. (Qiang Kang via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 0fd99df HIVE-23356 : Hash aggregation is always disabled while processing querys with grouping sets expressions. (Qiang Kang via Ashutosh Chauhan) 0fd99df is described below commit 0fd99df99dc07540d8818d179bcdcb2972f09752 Author: Qiang Kang AuthorDate: Sat May 2 12:12:06 2020 -0700 HIVE-23356 : Hash aggregation is always disabled while processing querys with grouping sets expressions. (Qiang Kang via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../hadoop/hive/ql/exec/GroupByOperator.java | 17 +-- .../apache/hadoop/hive/ql/exec/TestOperators.java | 131 + 2 files changed, 140 insertions(+), 8 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java index 7220f33..b94e3fd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java @@ -739,20 +739,21 @@ public class GroupByOperator extends Operator implements IConfigure // if hash aggregation is not behaving properly, disable it if (numRowsInput == numRowsCompareHashAggr) { numRowsCompareHashAggr += groupbyMapAggrInterval; +long numRowsProcessed = groupingSetsPresent ? numRowsInput * groupingSets.size() : numRowsInput; // map-side aggregation should reduce the entries by at-least half -if (numRowsHashTbl > numRowsInput * minReductionHashAggr) { +if (numRowsHashTbl > numRowsProcessed * minReductionHashAggr) { LOG.warn("Disable Hash Aggr: #hash table = " + numRowsHashTbl - + " #total = " + numRowsInput + " reduction = " + 1.0 - * (numRowsHashTbl / numRowsInput) + " minReduction = " - + minReductionHashAggr); + + " #numRowsInput = " + numRowsInput + " reduction = " + 1.0 * (numRowsHashTbl / numRowsProcessed) + + " minReduction = " + minReductionHashAggr + " groupingSetsPresent = " + groupingSetsPresent + + " numRowsProcessed = " + numRowsProcessed); flushHashTable(true); hashAggr = false; } else { if (LOG.isTraceEnabled()) { -LOG.trace("Hash Aggr Enabled: #hash table = " + numRowsHashTbl -+ " #total = " + numRowsInput + " reduction = " + 1.0 -* (numRowsHashTbl / numRowsInput) + " minReduction = " -+ minReductionHashAggr); +LOG.trace("Hash Aggr Enabled: #hash table = " + numRowsHashTbl + " #numRowsInput = " + numRowsInput ++ " reduction = " + 1.0 * (numRowsHashTbl / numRowsProcessed) + " minReduction = " ++ minReductionHashAggr + " groupingSetsPresent = " + groupingSetsPresent + " numRowsProcessed = " ++ numRowsProcessed); } } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java index 8a0606b..3c0a7eb 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java @@ -32,14 +32,18 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.io.IOContextMap; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin; import org.apache.hadoop.hive.ql.optimizer.physical.LlapClusterStateForCompile; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.type.ExprNodeTypeCheck; import org.apache.hadoop.hive.ql.parse.type.TypeCheckProcFactory; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.CollectDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; @@ -49,6 +53,7 @@ import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.sess
[hive] branch master updated: HIVE-23305 : NullPointerException in LlapTaskSchedulerService addNode due to race condition ( Attila Magyar via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new f48c267 HIVE-23305 : NullPointerException in LlapTaskSchedulerService addNode due to race condition ( Attila Magyar via Ashutosh Chauhan) f48c267 is described below commit f48c26781d8a1a51c6a00d2e557ba6da46b28856 Author: Attila Magyar AuthorDate: Thu Apr 30 10:27:56 2020 -0700 HIVE-23305 : NullPointerException in LlapTaskSchedulerService addNode due to race condition ( Attila Magyar via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java index 565afdc..9cb8bc9 100644 --- a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java +++ b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java @@ -813,8 +813,8 @@ public class LlapTaskSchedulerService extends TaskScheduler { Futures.addCallback(schedulerFuture, new LoggingFutureCallback("SchedulerThread", LOG)); registry.start(); - registry.registerStateChangeListener(new NodeStateChangeListener()); activeInstances = registry.getInstances(); + registry.registerStateChangeListener(new NodeStateChangeListener()); for (LlapServiceInstance inst : activeInstances.getAll()) { registerAndAddNode(new NodeInfo(inst, nodeBlacklistConf, clock, numSchedulableTasksPerNode, metrics), inst);
[hive] branch master updated: HIVE-23261 : Check whether encryption is enabled in the cluster before moving files (Ramesh Kumar via Rajesh Balamohan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new e13486b HIVE-23261 : Check whether encryption is enabled in the cluster before moving files (Ramesh Kumar via Rajesh Balamohan) e13486b is described below commit e13486be06a5160b41a5310388dd9c704361bead Author: RAMESH KUMAR THANGARAJAN AuthorDate: Wed Apr 29 11:29:28 2020 -0700 HIVE-23261 : Check whether encryption is enabled in the cluster before moving files (Ramesh Kumar via Rajesh Balamohan) Signed-off-by: Ashutosh Chauhan --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 8d194c3..7bb90d3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -89,6 +89,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.HiveStatsUtils; @@ -4121,7 +4122,7 @@ private void constructOneLBLocationMap(FileStatus fSta, Arrays.sort(files); for (final FileStatus srcFile : files) { final Path srcP = srcFile.getPath(); -final boolean needToCopy = needToCopy(srcP, destf, srcFs, destFs, configuredOwner, isManaged); +final boolean needToCopy = needToCopy(conf, srcP, destf, srcFs, destFs, configuredOwner, isManaged); final boolean isRenameAllowed = !needToCopy && !isSrcLocal; @@ -4436,7 +4437,7 @@ private void constructOneLBLocationMap(FileStatus fSta, destFs.copyFromLocalFile(srcf, destf); return true; } else { -if (needToCopy(srcf, destf, srcFs, destFs, configuredOwner, isManaged)) { +if (needToCopy(conf, srcf, destf, srcFs, destFs, configuredOwner, isManaged)) { //copy if across file system or encryption zones. LOG.debug("Copying source " + srcf + " to " + destf + " because HDFS encryption zones are different."); return FileUtils.copy(srcf.getFileSystem(conf), srcf, destf.getFileSystem(conf), destf, @@ -4561,7 +4562,7 @@ private void constructOneLBLocationMap(FileStatus fSta, * TODO- consider if need to do this for different file authority. * @throws HiveException */ - static private boolean needToCopy(Path srcf, Path destf, FileSystem srcFs, + static private boolean needToCopy(final HiveConf conf, Path srcf, Path destf, FileSystem srcFs, FileSystem destFs, String configuredOwner, boolean isManaged) throws HiveException { //Check if different FileSystems if (!FileUtils.equalsFileSystem(srcFs, destFs)) { @@ -4602,6 +4603,10 @@ private void constructOneLBLocationMap(FileStatus fSta, } } +// if Encryption not enabled, no copy needed +if (!DFSUtilClient.isHDFSEncryptionEnabled(conf)) { + return false; +} //Check if different encryption zones HadoopShims.HdfsEncryptionShim srcHdfsEncryptionShim = SessionState.get().getHdfsEncryptionShim(srcFs); HadoopShims.HdfsEncryptionShim destHdfsEncryptionShim = SessionState.get().getHdfsEncryptionShim(destFs);
[hive] branch master updated: HIVE-23294 : Remove sync bottleneck in TezConfigurationFactory (Rajesh Balamohan via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new c4e7376 HIVE-23294 : Remove sync bottleneck in TezConfigurationFactory (Rajesh Balamohan via Ashutosh Chauhan) c4e7376 is described below commit c4e73766a5c8cc0da0ba5d9387a00d651af4e820 Author: Ashutosh Chauhan AuthorDate: Tue Apr 28 21:19:37 2020 -0700 HIVE-23294 : Remove sync bottleneck in TezConfigurationFactory (Rajesh Balamohan via Ashutosh Chauhan) --- .../hive/ql/exec/tez/TezConfigurationFactory.java | 17 - 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java index a0da0ad..84ae541 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java @@ -19,6 +19,7 @@ */ package org.apache.hadoop.hive.ql.exec.tez; +import java.lang.reflect.Field; import java.util.Iterator; import java.util.Map; import java.util.function.Predicate; @@ -33,6 +34,7 @@ import static org.apache.hadoop.security.ssl.SSLFactory.SSL_CLIENT_CONF_KEY; public class TezConfigurationFactory { private static TezConfiguration defaultConf = new TezConfiguration(); + private static final Field updatingResource; private static final Logger LOG = LoggerFactory.getLogger(TezConfigurationFactory.class.getName()); @@ -41,6 +43,14 @@ public class TezConfigurationFactory { String sslConf = defaultConf.get(SSL_CLIENT_CONF_KEY, "ssl-client.xml"); defaultConf.addResource(sslConf); LOG.info("SSL conf : " + sslConf); +try { + //Cache the field handle so that we can avoid expensive conf.getPropertySources(key) later + updatingResource = Configuration.class.getDeclaredField("updatingResource"); +} catch (NoSuchFieldException | SecurityException e) { + throw new RuntimeException(e); +} +updatingResource.setAccessible(true); + } public static Configuration copyInto(Configuration target, Configuration src, @@ -50,7 +60,12 @@ public class TezConfigurationFactory { Map.Entry entry = iter.next(); String name = entry.getKey(); String value = entry.getValue(); - String[] sources = src.getPropertySources(name); + String[] sources; + try { +sources = ((Map)updatingResource.get(src)).get(name); + } catch (IllegalArgumentException | IllegalAccessException e) { +throw new RuntimeException(e); + } final String source; if (sources == null || sources.length == 0) { source = null;
[hive] branch master updated: HIVE-23295 : Possible NPE when on getting predicate literal list when dynamic values are not available (Attila Magyar via Jason Dere)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new ea74565 HIVE-23295 : Possible NPE when on getting predicate literal list when dynamic values are not available (Attila Magyar via Jason Dere) ea74565 is described below commit ea74565b5e0095f2b99be8431a683fab3442c275 Author: Attila Magyar AuthorDate: Mon Apr 27 16:02:58 2020 -0700 HIVE-23295 : Possible NPE when on getting predicate literal list when dynamic values are not available (Attila Magyar via Jason Dere) Signed-off-by: Ashutosh Chauhan --- .../src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java index 8aa97d9..c2b7c4a 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java @@ -22,6 +22,7 @@ import java.sql.Timestamp; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Deque; import java.util.HashMap; import java.util.HashSet; @@ -125,7 +126,7 @@ public final class SearchArgumentImpl implements SearchArgument { } } catch (NoDynamicValuesException err) { LOG.debug("Error while retrieving literalList, returning null", err); - return null; + return Collections.emptyList(); } return newLiteraList; }
[hive] branch master updated: HIVE-23268 : Eliminate beanutils transitive dependency
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 19314f5 HIVE-23268 : Eliminate beanutils transitive dependency 19314f5 is described below commit 19314f5dfdd298a469ed8a34afcfb96cfd910f1b Author: Ashutosh Chauhan AuthorDate: Tue Apr 21 23:35:15 2020 -0700 HIVE-23268 : Eliminate beanutils transitive dependency --- accumulo-handler/pom.xml | 24 ++- beeline/pom.xml| 16 +- cli/pom.xml| 16 +- common/pom.xml | 22 -- contrib/pom.xml| 16 +- druid-handler/pom.xml | 6 +++- hbase-handler/pom.xml | 22 -- hcatalog/core/pom.xml | 12 ++-- hcatalog/hcatalog-pig-adapter/pom.xml | 22 -- hcatalog/pom.xml | 16 +- hcatalog/server-extensions/pom.xml | 16 +- hcatalog/streaming/pom.xml | 16 +- hcatalog/webhcat/java-client/pom.xml | 16 +- hcatalog/webhcat/svr/pom.xml | 6 +++- hplsql/pom.xml | 16 +- jdbc/pom.xml | 8 - kafka-handler/pom.xml | 6 +++- kryo-registrator/pom.xml | 8 - kudu-handler/pom.xml | 16 +- llap-client/pom.xml| 12 ++-- llap-common/pom.xml| 12 ++-- llap-ext-client/pom.xml| 12 ++-- llap-server/pom.xml| 12 ++-- llap-tez/pom.xml | 12 ++-- metastore/pom.xml | 6 +++- ql/pom.xml | 6 +++- serde/pom.xml | 34 -- service/pom.xml| 6 +++- shims/0.23/pom.xml | 34 -- shims/common/pom.xml | 22 -- shims/scheduler/pom.xml| 14 + standalone-metastore/metastore-common/pom.xml | 6 +++- standalone-metastore/metastore-server/pom.xml | 25 +++- .../metastore-tools/metastore-benchmarks/pom.xml | 8 + standalone-metastore/pom.xml | 1 + storage-api/pom.xml| 4 +++ streaming/pom.xml | 16 +- upgrade-acid/pre-upgrade/pom.xml | 4 +++ 38 files changed, 454 insertions(+), 72 deletions(-) diff --git a/accumulo-handler/pom.xml b/accumulo-handler/pom.xml index 23433a5..bee3eb8 100644 --- a/accumulo-handler/pom.xml +++ b/accumulo-handler/pom.xml @@ -36,7 +36,15 @@ org.apache.accumulo accumulo-core + +commons-beanutils +commons-beanutils-core + +commons-digester +commons-digester + + commons-collections commons-collections @@ -134,7 +142,21 @@ hadoop-common ${hadoop.version} true - + + +org.slf4j +slf4j-log4j12 + + +commons-beanutils +commons-beanutils + + +commons-logging +commons-logging + + + org.apache.hadoop hadoop-mapreduce-client-core diff --git a/beeline/pom.xml b/beeline/pom.xml index a5a1e42..bb627f2 100644 --- a/beeline/pom.xml +++ b/beeline/pom.xml @@ -81,7 +81,21 @@ hadoop-common ${hadoop.version} true - + + + commons-beanutils + commons-beanutils + + + org.slf4j + slf4j-log4j12 + + + commons-logging + commons-logging + + + org.apache.thrift libthrift diff --git a/cli/pom.xml b/cli/pom.xml index 79fa973..eca0282 100644 --- a/cli/pom.xml +++ b/cli/pom.xml @@ -97,7 +97,21 @@ hadoop-common ${hadoop.version} true - + + + commons-beanutils + commons-beanutils + + + org.slf4j + slf4j-log4j12 + + + commons-logging + commons-logging
[hive] branch master updated: HIVE-23262 : Remove dependency on activemq
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 40ca5f6 HIVE-23262 : Remove dependency on activemq 40ca5f6 is described below commit 40ca5f65666b007baa13a2c3e2d561712d683506 Author: Ashutosh Chauhan AuthorDate: Mon Apr 20 21:53:24 2020 -0700 HIVE-23262 : Remove dependency on activemq --- hcatalog/conf/jndi.properties | 36 --- hcatalog/server-extensions/pom.xml | 18 -- .../hcatalog/listener/TestMsgBusConnection.java| 117 - .../listener/TestNotificationListener.java | 270 - pom.xml| 17 -- 5 files changed, 458 deletions(-) diff --git a/hcatalog/conf/jndi.properties b/hcatalog/conf/jndi.properties deleted file mode 100644 index f718111..000 --- a/hcatalog/conf/jndi.properties +++ /dev/null @@ -1,36 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# If ActiveMQ is used then uncomment following properties, else substitute it accordingly. -#java.naming.factory.initial = org.apache.activemq.jndi.ActiveMQInitialContextFactory - -# use the following property to provide location of MQ broker. -#java.naming.provider.url = tcp://localhost:61616 - -# use the following property to specify the JNDI name the connection factory -# should appear as. -#connectionFactoryNames = connectionFactory, queueConnectionFactory, topicConnectionFactry - -# register some queues in JNDI using the form -# queue.[jndiName] = [physicalName] -# queue.MyQueue = example.MyQueue - - -# register some topics in JNDI using the form -# topic.[jndiName] = [physicalName] -# topic.MyTopic = example.MyTopic - diff --git a/hcatalog/server-extensions/pom.xml b/hcatalog/server-extensions/pom.xml index 9bc28f2..e00dec1 100644 --- a/hcatalog/server-extensions/pom.xml +++ b/hcatalog/server-extensions/pom.xml @@ -94,24 +94,6 @@ test - org.apache.activemq - activemq-core - ${activemq.version} - test - - - org.springframework - spring-context - - - - - org.apache.activemq - kahadb - ${activemq.version} - test - - org.apache.pig pig ${pig.version} diff --git a/hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestMsgBusConnection.java b/hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestMsgBusConnection.java deleted file mode 100644 index d793770..000 --- a/hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestMsgBusConnection.java +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hive.hcatalog.listener; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import javax.jms.Connection; -import javax.jms.ConnectionFactory; -import javax.jms.Destination; -import javax.jms.JMSException; -import javax.jms.Message; -import javax.jms.MessageConsumer; -import javax.jms.Session; -import javax.jms.TextMessage; - -import org.apache.activemq.ActiveMQConnectionFactory; -import org.apache.activemq.broker.BrokerService; -import org.apache.
[hive] branch master updated: HIVE-23287 : Reduce dependency on icu4j
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new ca275be HIVE-23287 : Reduce dependency on icu4j ca275be is described below commit ca275befa1b9c39c76e539c6b4506e58ab39f51c Author: Ashutosh Chauhan AuthorDate: Thu Apr 23 09:03:14 2020 -0700 HIVE-23287 : Reduce dependency on icu4j --- druid-handler/pom.xml | 4 1 file changed, 4 insertions(+) diff --git a/druid-handler/pom.xml b/druid-handler/pom.xml index 18dfca0..58a17bc 100644 --- a/druid-handler/pom.xml +++ b/druid-handler/pom.xml @@ -124,6 +124,10 @@ ${druid.version} + com.ibm.icu + icu4j + + org.codehaus.plexus plexus-utils
[hive] branch master updated: HIVE-23267 : Reduce dependency on groovy
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 2c1f2fd HIVE-23267 : Reduce dependency on groovy 2c1f2fd is described below commit 2c1f2fddf4931a71c67ca63ece6ca232c25ea372 Author: Ashutosh Chauhan AuthorDate: Tue Apr 21 13:50:57 2020 -0700 HIVE-23267 : Reduce dependency on groovy Signed-off-by: Ashutosh Chauhan --- hcatalog/hcatalog-pig-adapter/pom.xml | 4 hcatalog/pom.xml | 4 upgrade-acid/pre-upgrade/pom.xml | 4 3 files changed, 12 insertions(+) diff --git a/hcatalog/hcatalog-pig-adapter/pom.xml b/hcatalog/hcatalog-pig-adapter/pom.xml index bc8bfba..9beb35d 100644 --- a/hcatalog/hcatalog-pig-adapter/pom.xml +++ b/hcatalog/hcatalog-pig-adapter/pom.xml @@ -68,6 +68,10 @@ h2 + org.codehaus.groovy + groovy-all + + tomcat jasper-runtime diff --git a/hcatalog/pom.xml b/hcatalog/pom.xml index ae278bc..4f0fd9f 100644 --- a/hcatalog/pom.xml +++ b/hcatalog/pom.xml @@ -74,6 +74,10 @@ test + org.codehaus.groovy + groovy-all + + tomcat jasper-compiler diff --git a/upgrade-acid/pre-upgrade/pom.xml b/upgrade-acid/pre-upgrade/pom.xml index 3abe596..b3abde9 100644 --- a/upgrade-acid/pre-upgrade/pom.xml +++ b/upgrade-acid/pre-upgrade/pom.xml @@ -88,6 +88,10 @@ provided +org.codehaus.groovy +groovy-all + + org.apache.zookeeper zookeeper
[hive] branch master updated: HIVE-23278 : Remove dependency on bouncycastle
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new c78d0da HIVE-23278 : Remove dependency on bouncycastle c78d0da is described below commit c78d0da5e8a49355ec1e9cf42c44a9d058488b46 Author: Ashutosh Chauhan AuthorDate: Wed Apr 22 23:27:51 2020 -0700 HIVE-23278 : Remove dependency on bouncycastle --- kryo-registrator/pom.xml | 6 ++ ql/pom.xml| 4 spark-client/pom.xml | 4 standalone-metastore/metastore-server/pom.xml | 6 ++ 4 files changed, 20 insertions(+) diff --git a/kryo-registrator/pom.xml b/kryo-registrator/pom.xml index ea2eb65..cbba1e7 100644 --- a/kryo-registrator/pom.xml +++ b/kryo-registrator/pom.xml @@ -43,6 +43,12 @@ spark-core_${scala.binary.version} ${spark.version} true + + + net.java.dev.jets3t + jets3t + + org.apache.hadoop diff --git a/ql/pom.xml b/ql/pom.xml index a0e77a1..7c42c0e 100644 --- a/ql/pom.xml +++ b/ql/pom.xml @@ -730,6 +730,10 @@ commons-logging + net.java.dev.jets3t + jets3t + + org.glassfish.jersey.containers * diff --git a/spark-client/pom.xml b/spark-client/pom.xml index a3cf922..33d03b9 100644 --- a/spark-client/pom.xml +++ b/spark-client/pom.xml @@ -82,6 +82,10 @@ * + net.java.dev.jets3t + jets3t + + org.slf4j slf4j-log4j12 diff --git a/standalone-metastore/metastore-server/pom.xml b/standalone-metastore/metastore-server/pom.xml index e492ce1..14ea09b 100644 --- a/standalone-metastore/metastore-server/pom.xml +++ b/standalone-metastore/metastore-server/pom.xml @@ -250,6 +250,12 @@ apacheds-server-integ ${apache-directory-server.version} test + + + bouncycastle + bcprov-jdk15 + +
[hive] branch master updated: HIVE-23169 : Probe runtime support for LLAP (Panagiotis Garefalakis via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 8b9fadb HIVE-23169 : Probe runtime support for LLAP (Panagiotis Garefalakis via Ashutosh Chauhan) 8b9fadb is described below commit 8b9fadb5515aace73db5068cc81317b6f10e0f32 Author: Ashutosh Chauhan AuthorDate: Tue Apr 21 16:54:58 2020 -0700 HIVE-23169 : Probe runtime support for LLAP (Panagiotis Garefalakis via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../hive/llap/io/api/impl/LlapRecordReader.java| 51 ++ .../hive/llap/io/decode/ColumnVectorProducer.java | 6 +++ .../llap/io/decode/OrcEncodedDataConsumer.java | 4 ++ 3 files changed, 61 insertions(+) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java index acb6b2d..417a42a 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java @@ -25,6 +25,8 @@ import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; @@ -41,6 +43,7 @@ import org.apache.hadoop.hive.llap.io.decode.ColumnVectorProducer.Includes; import org.apache.hadoop.hive.llap.io.decode.ColumnVectorProducer.SchemaEvolutionFactory; import org.apache.hadoop.hive.llap.io.decode.ReadPipeline; import org.apache.hadoop.hive.llap.tezplugins.LlapTezUtils; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.io.AcidUtils; @@ -82,6 +85,7 @@ class LlapRecordReader implements RecordReader private final SearchArgument sarg; private final VectorizedRowBatchCtx rbCtx; private final boolean isVectorized; + private final boolean probeDecodeEnabled; private VectorizedOrcAcidRowBatchReader acidReader; private final Object[] partitionValues; @@ -196,6 +200,12 @@ class LlapRecordReader implements RecordReader this.includes = new IncludesImpl(tableIncludedCols, isAcidFormat, rbCtx, schema, job, isAcidScan && acidReader.includeAcidColumns()); +this.probeDecodeEnabled = HiveConf.getBoolVar(jobConf, ConfVars.HIVE_OPTIMIZE_SCAN_PROBEDECODE); +if (this.probeDecodeEnabled) { + includes.setProbeDecodeContext(mapWork.getProbeDecodeContext()); + LOG.info("LlapRecordReader ProbeDecode is enabled"); +} + // Create the consumer of encoded data; it will coordinate decoding to CVBs. feedback = rp = cvp.createReadPipeline(this, split, includes, sarg, counters, includes, sourceInputFormat, sourceSerDe, reporter, job, mapWork.getPathToPartitionInfo()); @@ -629,6 +639,9 @@ class LlapRecordReader implements RecordReader private TypeDescription readerSchema; private JobConf jobConf; +// ProbeDecode Context for row-level filtering +private TableScanOperator.ProbeDecodeContext probeDecodeContext = null; + public IncludesImpl(List tableIncludedCols, boolean isAcidScan, VectorizedRowBatchCtx rbCtx, TypeDescription readerSchema, JobConf jobConf, boolean includeAcidColumns) { @@ -710,6 +723,10 @@ class LlapRecordReader implements RecordReader fileSchema, filePhysicalColumnIds, acidStructColumnId); } +public void setProbeDecodeContext(TableScanOperator.ProbeDecodeContext currProbeDecodeContext) { + this.probeDecodeContext = currProbeDecodeContext; +} + @Override public List getPhysicalColumnIds() { return filePhysicalColumnIds; @@ -725,5 +742,39 @@ class LlapRecordReader implements RecordReader return OrcInputFormat.genIncludedTypes( fileSchema, filePhysicalColumnIds, acidStructColumnId); } + +@Override +public String getQueryId() { + return HiveConf.getVar(jobConf, HiveConf.ConfVars.HIVEQUERYID); +} + +@Override +public boolean isProbeDecodeEnabled() { + return this.probeDecodeContext != null; +} + +@Override +public byte getProbeMjSmallTablePos() { + return this.probeDecodeContext.getMjSmallTablePos(); +} + +@Override +public int getProbeColIdx() { + // TODO: is this the best way to get the ColId? + Pattern pattern = Pattern.compile("_col([0-9]+)"); + Matcher matcher = pattern.matcher(this.probeDecodeCon
[hive] branch master updated: HIVE-23258 : Remove BoneCP Connection Pool (David Mollitor via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 00423c1 HIVE-23258 : Remove BoneCP Connection Pool (David Mollitor via Ashutosh Chauhan) 00423c1 is described below commit 00423c1c31f9aac9efa98570a2b954f3f1ae56fa Author: David Mollitor AuthorDate: Mon Apr 20 20:02:34 2020 -0700 HIVE-23258 : Remove BoneCP Connection Pool (David Mollitor via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- binary-package-licenses/README | 1 - .../java/org/apache/hadoop/hive/conf/HiveConf.java | 3 +- hcatalog/src/test/e2e/hcatalog/drivers/Util.pm | 3 - .../org/apache/hive/jdbc/TestRestrictedList.java | 1 - metastore/pom.xml | 10 -- pom.xml| 6 - standalone-metastore/metastore-common/pom.xml | 4 - .../hadoop/hive/metastore/conf/MetastoreConf.java | 2 +- standalone-metastore/metastore-server/pom.xml | 4 - .../datasource/BoneCPDataSourceProvider.java | 191 - .../metastore/datasource/DataSourceProvider.java | 7 - .../datasource/DataSourceProviderFactory.java | 1 - .../datasource/DbCPDataSourceProvider.java | 6 - .../datasource/HikariCPDataSourceProvider.java | 6 - .../hadoop/hive/metastore/txn/TxnHandler.java | 28 ++- .../datasource/TestDataSourceProviderFactory.java | 55 -- standalone-metastore/pom.xml | 6 - 17 files changed, 12 insertions(+), 322 deletions(-) diff --git a/binary-package-licenses/README b/binary-package-licenses/README index db6b7fe..02a4d11 100644 --- a/binary-package-licenses/README +++ b/binary-package-licenses/README @@ -19,7 +19,6 @@ ant* apache-curator avatica* avro -bonecp calcite* classmate commons* diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 16bae92..e3ddbf1 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1005,7 +1005,7 @@ public class HiveConf extends Configuration { * @deprecated Use MetastoreConf.CONNECTION_POOLING_TYPE */ @Deprecated -METASTORE_CONNECTION_POOLING_TYPE("datanucleus.connectionPoolingType", "HikariCP", new StringSet("BONECP", "DBCP", +METASTORE_CONNECTION_POOLING_TYPE("datanucleus.connectionPoolingType", "HikariCP", new StringSet("DBCP", "HikariCP", "NONE"), "Specify connection pool library for datanucleus"), /** @@ -4831,7 +4831,6 @@ public class HiveConf extends Configuration { "hive.spark.client.rpc.server.address," + "hive.spark.client.rpc.server.port," + "hive.spark.client.rpc.sasl.mechanisms," + -"bonecp.," + "hive.druid.broker.address.default," + "hive.druid.coordinator.address.default," + "hikaricp.," + diff --git a/hcatalog/src/test/e2e/hcatalog/drivers/Util.pm b/hcatalog/src/test/e2e/hcatalog/drivers/Util.pm index 65e6c81..b860480 100644 --- a/hcatalog/src/test/e2e/hcatalog/drivers/Util.pm +++ b/hcatalog/src/test/e2e/hcatalog/drivers/Util.pm @@ -424,9 +424,6 @@ sub getHiveLibsForPig($$) /jdo-api-.*\.jar$/ && do { $cp .= $cfg->{'hivelib'} . '/' . $_ . ':'; }; -/bonecp-.*\.jar$/ && do { -$cp .= $cfg->{'hivelib'} . '/' . $_ . ':'; -}; /commons-pool-.*\.jar$/ && do { $cp .= $cfg->{'hivelib'} . '/' . $_ . ':'; }; diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestRestrictedList.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestRestrictedList.java index 596c3d6..d5641d9 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestRestrictedList.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestRestrictedList.java @@ -94,7 +94,6 @@ public class TestRestrictedList { addToExpectedRestrictedMap("hive.spark.client.rpc.server.port"); addToExpectedRestrictedMap("hive.spark.client.rpc.sasl.mechanisms"); addToExpectedRestrictedMap("hive.query.max.length"); -addToExpectedRestrictedMap("bonecp.test"); addToExpectedRestrictedMap("hive.druid.broker.address.default"); addToExpectedRestrictedMap("hive.druid.coordinator.address.default"); addToExpectedRestrictedMap("hikaricp.test"); diff --git a/metastore/pom.xml b/metastore/pom.xml index 95e2d07..c82dc26
[hive] branch master updated: HIVE-23241 : Reduce transitive dependencies
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 57b2024 HIVE-23241 : Reduce transitive dependencies 57b2024 is described below commit 57b2024cebc680e5b8165a73a9be32259cf78794 Author: Ashutosh Chauhan AuthorDate: Mon Apr 20 14:04:34 2020 -0700 HIVE-23241 : Reduce transitive dependencies Signed-off-by: Ashutosh Chauhan --- druid-handler/pom.xml| 20 .../metastore-tools/metastore-benchmarks/pom.xml | 6 ++ standalone-metastore/metastore-tools/pom.xml | 6 ++ .../metastore-tools/tools-common/pom.xml | 13 - 4 files changed, 32 insertions(+), 13 deletions(-) diff --git a/druid-handler/pom.xml b/druid-handler/pom.xml index 5ee99b1..18dfca0 100644 --- a/druid-handler/pom.xml +++ b/druid-handler/pom.xml @@ -92,6 +92,10 @@ ${druid.version} + io.tesla.aether + tesla-aether + + com.fasterxml.jackson.core jackson-core @@ -120,6 +124,22 @@ ${druid.version} + org.codehaus.plexus + plexus-utils + + + org.ow2.asm + asm-commons + + + org.ow2.asm + asm + + + org.checkerframework + checker-qual + + com.google.code.findbugs annotations diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml b/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml index 13524a4..534583d 100644 --- a/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml +++ b/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml @@ -97,6 +97,12 @@ org.apache.maven.plugins maven-jxr-plugin 2.5 + + + org.codehaus.plexus + plexus-utils + + diff --git a/standalone-metastore/metastore-tools/pom.xml b/standalone-metastore/metastore-tools/pom.xml index d8c4788..df1b3ab 100644 --- a/standalone-metastore/metastore-tools/pom.xml +++ b/standalone-metastore/metastore-tools/pom.xml @@ -115,6 +115,12 @@ org.apache.maven.plugins maven-jxr-plugin + + +org.codehaus.plexus +plexus-utils + + 2.5 diff --git a/standalone-metastore/metastore-tools/tools-common/pom.xml b/standalone-metastore/metastore-tools/tools-common/pom.xml index 4c226fa..44f02b7 100644 --- a/standalone-metastore/metastore-tools/tools-common/pom.xml +++ b/standalone-metastore/metastore-tools/tools-common/pom.xml @@ -101,17 +101,4 @@ - - - - - -org.apache.maven.plugins -maven-jxr-plugin -2.5 - - - - -
[hive] branch master updated: HIVE-20078 : Remove ATSHook
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 11abab2 HIVE-20078 : Remove ATSHook 11abab2 is described below commit 11abab21be0f5fbf6eeb39acbf2963618352b6dd Author: Ashutosh Chauhan AuthorDate: Sat Dec 14 08:50:54 2019 -0800 HIVE-20078 : Remove ATSHook --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 3 - .../org/apache/hadoop/hive/ql/hooks/ATSHook.java | 495 - .../apache/hadoop/hive/ql/hooks/TestATSHook.java | 59 --- 3 files changed, 557 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 7b3acad..9e46e7b 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -582,9 +582,6 @@ public class HiveConf extends Configuration { "Comma-separated list of statistics publishers to be invoked on counters on each job. \n" + "A client stats publisher is specified as the name of a Java class which implements the \n" + "org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface."), -ATSHOOKQUEUECAPACITY("hive.ats.hook.queue.capacity", 64, -"Queue size for the ATS Hook executor. If the number of outstanding submissions \n" + -"to the ATS executor exceed this amount, the Hive ATS Hook will not try to log queries to ATS."), EXECPARALLEL("hive.exec.parallel", false, "Whether to execute jobs in parallel"), EXECPARALLETHREADNUMBER("hive.exec.parallel.thread.number", 8, "How many jobs at most can be executed in parallel"), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java deleted file mode 100644 index 0632f6e..000 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java +++ /dev/null @@ -1,495 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.hooks; - -import java.io.IOException; -import java.net.InetAddress; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ThreadFactory; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; - -import com.google.common.annotations.VisibleForTesting; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.llap.registry.impl.LlapRegistryService; -import org.apache.hadoop.hive.ql.QueryPlan; -import org.apache.hadoop.hive.ql.QueryState; -import org.apache.hadoop.hive.ql.exec.ExplainTask; -import org.apache.hadoop.hive.ql.exec.TaskFactory; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.exec.tez.TezTask; -import org.apache.hadoop.hive.ql.log.PerfLogger; -import org.apache.hadoop.hive.ql.parse.ExplainConfiguration; -import org.apache.hadoop.hive.ql.plan.ExplainWork; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.timeline.TimelineDomain; -import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity; -import org.apache.hadoop.yarn.api.records.timeline.TimelineEvent; -import org.apache.hadoop.yarn.client.api.TimelineClient; -import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.apache.hive.common.util.ShutdownHookManager; -import org.apache.tez.dag.api.TezConfiguration; -import org.json.JSONObject; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.util.concurr
[hive] branch master updated: HIVE-22684 : Run Eclipse Cleanup Against hbase-handler Module (David Mollitor via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new c3fb689 HIVE-22684 : Run Eclipse Cleanup Against hbase-handler Module (David Mollitor via Ashutosh Chauhan) c3fb689 is described below commit c3fb689ffbac7f8dd45ee5219145d8c3d231e27f Author: David Mollitor AuthorDate: Sat Apr 18 12:31:07 2020 -0700 HIVE-22684 : Run Eclipse Cleanup Against hbase-handler Module (David Mollitor via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../org/apache/hadoop/hive/hbase/HBaseScanRange.java | 1 + .../java/org/apache/hadoop/hive/hbase/HBaseSerDe.java | 1 - .../apache/hadoop/hive/hbase/HBaseSerDeParameters.java | 1 + .../apache/hadoop/hive/hbase/HBaseStorageHandler.java | 1 - .../hadoop/hive/hbase/HiveHBaseTableOutputFormat.java | 2 ++ .../hive/hbase/HiveHBaseTableSnapshotInputFormat.java | 3 --- .../hive/hbase/struct/AvroHBaseValueFactory.java | 1 - .../org/apache/hadoop/hive/hbase/TestHBaseSerDe.java | 11 +-- .../apache/hadoop/hive/hbase/TestLazyHBaseObject.java | 18 -- .../org/apache/hadoop/hive/hbase/avro/Address.java | 5 - .../org/apache/hadoop/hive/hbase/avro/ContactInfo.java | 3 +++ .../org/apache/hadoop/hive/hbase/avro/Employee.java| 3 +++ .../org/apache/hadoop/hive/hbase/avro/HomePhone.java | 3 +++ .../org/apache/hadoop/hive/hbase/avro/OfficePhone.java | 3 +++ 14 files changed, 33 insertions(+), 23 deletions(-) diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseScanRange.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseScanRange.java index f01748c..79d687f 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseScanRange.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseScanRange.java @@ -87,6 +87,7 @@ public class HBaseScanRange implements Serializable { scan.setFilter(new FilterList(filters)); } + @Override public String toString() { return (startRow == null ? "" : new BytesWritable(startRow).toString()) + " ~ " + (stopRow == null ? "" : new BytesWritable(stopRow).toString()); diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java index 1588283..5147d0e 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; -import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeSpec; import org.apache.hadoop.hive.serde2.SerDeStats; diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java index eb3560c..480484c 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java @@ -167,6 +167,7 @@ public class HBaseSerDeParameters { throw new IllegalArgumentException("Invalid column name " + columnName); } + @Override public String toString() { return "[" + columnMappingString + ":" + getColumnNames() + ":" + getColumnTypes() + "]"; } diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java index f3735a3..16658d0 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java @@ -42,7 +42,6 @@ import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping; import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer; import org.apache.hadoop.hive.ql.index.IndexSearchCondition; diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableOutputFormat.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableOutputFormat.java index b344e16..f0eccd7 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseT
[hive] branch master updated: HIVE-22698 : Support Statement#closeOnCompletion() (Iwao Ave via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new bb95ad2 HIVE-22698 : Support Statement#closeOnCompletion() (Iwao Ave via Ashutosh Chauhan) bb95ad2 is described below commit bb95ad243cc0ab028deed516b5f36616d9fd3354 Author: Iwao AVE AuthorDate: Sat Apr 18 12:26:07 2020 -0700 HIVE-22698 : Support Statement#closeOnCompletion() (Iwao Ave via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../java/org/apache/hive/jdbc/TestJdbcDriver2.java | 37 ++ .../org/apache/hive/jdbc/HiveQueryResultSet.java | 1 + .../java/org/apache/hive/jdbc/HiveStatement.java | 12 +-- 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java index dbe282d..ba1f39c 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java @@ -3250,4 +3250,41 @@ public class TestJdbcDriver2 { public void testConnectInvalidDatabase() throws SQLException { DriverManager.getConnection("jdbc:hive2:///databasedoesnotexist", "", ""); } + + @Test + public void testStatementCloseOnCompletion() throws SQLException { +Statement stmt = con.createStatement(); +stmt.closeOnCompletion(); +ResultSet res = stmt.executeQuery("select under_col from " + tableName + " limit 1"); +assertTrue(res.next()); +assertFalse(stmt.isClosed()); +assertFalse(res.next()); +assertFalse(stmt.isClosed()); +res.close(); +assertTrue(stmt.isClosed()); + } + + @Test + public void testPreparedStatementCloseOnCompletion() throws SQLException { +PreparedStatement stmt = con.prepareStatement("select under_col from " + tableName + " limit 1"); +stmt.closeOnCompletion(); +ResultSet res = stmt.executeQuery(); +assertTrue(res.next()); +assertFalse(stmt.isClosed()); +assertFalse(res.next()); +assertFalse(stmt.isClosed()); +res.close(); +assertTrue(stmt.isClosed()); + } + + @Test + public void testCloseOnAlreadyOpenedResultSetCompletion() throws Exception { +PreparedStatement stmt = con.prepareStatement("select under_col from " + tableName + " limit 1"); +ResultSet res = stmt.executeQuery(); +assertTrue(res.next()); +stmt.closeOnCompletion(); +assertFalse(stmt.isClosed()); +res.close(); +assertTrue(stmt.isClosed()); + } } diff --git a/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java b/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java index 8563cee..df31a25 100644 --- a/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java +++ b/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java @@ -276,6 +276,7 @@ public class HiveQueryResultSet extends HiveBaseResultSet { if (this.statement != null && (this.statement instanceof HiveStatement)) { HiveStatement s = (HiveStatement) this.statement; s.closeClientOperation(); + s.closeOnResultSetCompletion(); } else { // for those stmtHandle passed from HiveDatabaseMetaData instead of Statement closeOperationHandle(stmtHandle); diff --git a/jdbc/src/java/org/apache/hive/jdbc/HiveStatement.java b/jdbc/src/java/org/apache/hive/jdbc/HiveStatement.java index 543bf8c..a74a3a8 100644 --- a/jdbc/src/java/org/apache/hive/jdbc/HiveStatement.java +++ b/jdbc/src/java/org/apache/hive/jdbc/HiveStatement.java @@ -76,6 +76,7 @@ public class HiveStatement implements java.sql.Statement { private final int defaultFetchSize; private boolean isScrollableResultset = false; private boolean isOperationComplete = false; + private boolean closeOnResultSetCompletion = false; /** * We need to keep a reference to the result set to support the following: * @@ -233,6 +234,13 @@ public class HiveStatement implements java.sql.Statement { stmtHandle = null; } + void closeOnResultSetCompletion() throws SQLException { +if (closeOnResultSetCompletion) { + resultSet = null; + close(); +} + } + /* * (non-Javadoc) * @@ -254,7 +262,7 @@ public class HiveStatement implements java.sql.Statement { // JDK 1.7 public void closeOnCompletion() throws SQLException { -throw new SQLFeatureNotSupportedException("Method not supported"); +closeOnResultSetCompletion = true; } /* @@ -752,7 +760,7 @@ public class HiveStatement implements java.sql.Statement { // JDK 1.7 public boolean isCloseOnCompletion() throws SQLException { -return false; +return closeOnResultSetCompletion; } /*
[hive] branch master updated: HIVE-23051 : Clean up BucketCodec (David Mollitor via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new f888527 HIVE-23051 : Clean up BucketCodec (David Mollitor via Ashutosh Chauhan) f888527 is described below commit f888527feb3cd912850b4c62cf52bc191558bd7a Author: David Mollitor AuthorDate: Sat Apr 18 11:15:52 2020 -0700 HIVE-23051 : Clean up BucketCodec (David Mollitor via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../org/apache/hadoop/hive/ql/io/BucketCodec.java | 50 +- .../apache/hadoop/hive/ql/io/TestBucketCodec.java | 101 + 2 files changed, 127 insertions(+), 24 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/BucketCodec.java b/ql/src/java/org/apache/hadoop/hive/ql/io/BucketCodec.java index eb9ded7..10d9604 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/BucketCodec.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/BucketCodec.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.io; +import com.google.common.base.Preconditions; + /** * This class makes sense of {@link RecordIdentifier#getBucketProperty()}. Up until ASF Hive 3.0 this * field was simply the bucket ID. Since 3.0 it does bit packing to store several things: @@ -86,50 +88,50 @@ public enum BucketCodec { } @Override public int encode(AcidOutputFormat.Options options) { - int statementId = options.getStatementId() >= 0 ? options.getStatementId() : 0; + final int statementId = options.getStatementId(); + final int bucketId = options.getBucketId(); + + Preconditions.checkArgument(bucketId >= 0 && bucketId <= MAX_BUCKET_ID, "Bucket ID out of range: " + bucketId); + Preconditions.checkArgument(statementId >= -1 && statementId <= MAX_STATEMENT_ID, + "Statement ID out of range: " + statementId); - assert this.version >=0 && this.version <= MAX_VERSION -: "Version out of range: " + version; - if(!(options.getBucketId() >= 0 && options.getBucketId() <= MAX_BUCKET_ID)) { -throw new IllegalArgumentException("bucketId out of range: " + options.getBucketId()); - } - if(!(statementId >= 0 && statementId <= MAX_STATEMENT_ID)) { -throw new IllegalArgumentException("statementId out of range: " + statementId); - } - return this.version << (1 + NUM_BUCKET_ID_BITS + 4 + NUM_STATEMENT_ID_BITS) | -options.getBucketId() << (4 + NUM_STATEMENT_ID_BITS) | statementId; + return this.version << (1 + NUM_BUCKET_ID_BITS + 4 + NUM_STATEMENT_ID_BITS) + | options.getBucketId() << (4 + NUM_STATEMENT_ID_BITS) | Math.max(0, statementId); } }; private static final int TOP3BITS_MASK = 0b1110_______; private static final int NUM_VERSION_BITS = 3; private static final int NUM_BUCKET_ID_BITS = 12; private static final int NUM_STATEMENT_ID_BITS = 12; - private static final int MAX_VERSION = (1 << NUM_VERSION_BITS) - 1; + public static final int MAX_VERSION = (1 << NUM_VERSION_BITS) - 1; public static final int MAX_BUCKET_ID = (1 << NUM_BUCKET_ID_BITS) - 1; - private static final int MAX_STATEMENT_ID = (1 << NUM_STATEMENT_ID_BITS) - 1; + public static final int MAX_STATEMENT_ID = (1 << NUM_STATEMENT_ID_BITS) - 1; public static BucketCodec determineVersion(int bucket) { -assert 7 << 29 == BucketCodec.TOP3BITS_MASK; -//look at top 3 bits and return appropriate enum try { + // look at top 3 bits and return appropriate enum return getCodec((BucketCodec.TOP3BITS_MASK & bucket) >>> 29); -} -catch(IllegalArgumentException ex) { - throw new IllegalArgumentException(ex.getMessage() + " Cannot decode version from " + bucket); +} catch (IllegalArgumentException iae) { + throw new IllegalArgumentException("Cannot decode version from bucket number: " + Integer.toHexString(bucket), + iae); } } + public static BucketCodec getCodec(int version) { switch (version) { - case 0: -return BucketCodec.V0; - case 1: -return BucketCodec.V1; - default: -throw new IllegalArgumentException("Illegal 'bucket' format. Version=" + version); +case 0: + return BucketCodec.V0; +case 1: + return BucketCodec.V1; +default: + throw new IllegalArgumentException("Illegal 'bucket' format. Version=" + version); } } + final int version; + BucketCodec(int version) { +Preconditions.checkPositionIndex(version, MAX_VERSION, "Version out of range: " + version); this.versio
[hive] branch master updated: HIVE-23239 : Remove snakeyaml lib from Hive distribution via transitive dependency (Roohi Syeda via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 9940cab HIVE-23239 : Remove snakeyaml lib from Hive distribution via transitive dependency (Roohi Syeda via Ashutosh Chauhan) 9940cab is described below commit 9940cab2805527c567f55760f9856b0f9bf98b67 Author: Roohi Syeda AuthorDate: Sat Apr 18 10:54:32 2020 -0700 HIVE-23239 : Remove snakeyaml lib from Hive distribution via transitive dependency (Roohi Syeda via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- kafka-handler/pom.xml | 8 llap-server/pom.xml | 4 ql/pom.xml| 8 3 files changed, 20 insertions(+) diff --git a/kafka-handler/pom.xml b/kafka-handler/pom.xml index 4e58cb9..0ad3973 100644 --- a/kafka-handler/pom.xml +++ b/kafka-handler/pom.xml @@ -68,6 +68,10 @@ org.slf4j slf4j-api + + org.yaml + snakeyaml + @@ -134,6 +138,10 @@ test + org.yaml + snakeyaml + + org.apache.avro avro diff --git a/llap-server/pom.xml b/llap-server/pom.xml index e03de9c..3d4e3da 100644 --- a/llap-server/pom.xml +++ b/llap-server/pom.xml @@ -129,6 +129,10 @@ commons-logging commons-logging + + org.yaml + snakeyaml + diff --git a/ql/pom.xml b/ql/pom.xml index d1846c9..a0e77a1 100644 --- a/ql/pom.xml +++ b/ql/pom.xml @@ -386,6 +386,10 @@ jackson-core + org.yaml + snakeyaml + + org.apache.calcite.avatica avatica-core @@ -399,6 +403,10 @@ org.apache.calcite.avatica avatica-core + + org.yaml + snakeyaml +
[hive] branch master updated (15ebf9e -> a3f3df0)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from 15ebf9e HIVE-23194 : Use Queue Instead of List for CollectOperator (David Mollitor via Ashutosh Chauhan) new bc225fc HIVE-23153 : deregister from zookeeper is not properly worked on kerberized environment (Eugene Chung via Ashutosh Chauhan) new a3f3df0 HIVE-23196 : Reduce number of delete calls to NN during Context::clear (Attila Magyar via Ashutosh Chauhan) The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: ql/src/java/org/apache/hadoop/hive/ql/Context.java | 23 -- .../apache/hive/service/server/HiveServer2.java| 19 +- 2 files changed, 35 insertions(+), 7 deletions(-)
[hive] 02/02: HIVE-23196 : Reduce number of delete calls to NN during Context::clear (Attila Magyar via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git commit a3f3df0f97d857482a3042feca940251c4557673 Author: Attila Magyar AuthorDate: Sat Apr 18 10:44:47 2020 -0700 HIVE-23196 : Reduce number of delete calls to NN during Context::clear (Attila Magyar via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- ql/src/java/org/apache/hadoop/hive/ql/Context.java | 23 -- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index d618ef9..9f59d4c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -23,6 +23,7 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; import java.text.SimpleDateFormat; +import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.HashSet; @@ -671,6 +672,10 @@ public class Context { for (Map.Entry entry : fsScratchDirs.entrySet()) { try { Path p = entry.getValue(); +if (p.toUri().getPath().contains(stagingDir) && subDirOf(p, fsScratchDirs.values()) ) { + LOG.debug("Skip deleting stagingDir: " + p); + continue; // staging dir is deleted when deleting the scratch dir +} if(resultCacheDir == null || !p.toUri().getPath().contains(resultCacheDir)) { // delete only the paths which aren't result cache dir path // because that will be taken care by removeResultCacheDir @@ -687,6 +692,15 @@ public class Context { fsScratchDirs.clear(); } + private boolean subDirOf(Path path, Collection parents) { +for (Path each : parents) { + if (!path.equals(each) && FileUtils.isPathWithinSubtree(path, each)) { +return true; + } +} +return false; + } + /** * Remove any created directories for CTEs. */ @@ -843,7 +857,7 @@ public class Context { subContext.clear(); } // Then clear this context - if (resDir != null) { + if (resDir != null && !isInScratchDir(resDir)) { // resDir is inside the scratch dir, removeScratchDir will take care of removing it try { FileSystem fs = resDir.getFileSystem(conf); LOG.debug("Deleting result dir: {}", resDir); @@ -853,7 +867,7 @@ public class Context { } } -if (resFile != null) { +if (resFile != null && !isInScratchDir(resFile.getParent())) { // resFile is inside the scratch dir, removeScratchDir will take care of removing it try { FileSystem fs = resFile.getFileSystem(conf); LOG.debug("Deleting result file: {}", resFile); @@ -871,6 +885,11 @@ public class Context { setNeedLockMgr(false); } + private boolean isInScratchDir(Path path) { +return path.toUri().getPath().startsWith(localScratchDir) + || path.toUri().getPath().startsWith(nonLocalScratchPath.toUri().getPath()); + } + public DataInput getStream() { try { if (!initialized) {
[hive] 01/02: HIVE-23153 : deregister from zookeeper is not properly worked on kerberized environment (Eugene Chung via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git commit bc225fcb0cd9c21ba38a466f98488b74d6c83d13 Author: Eugene Chung AuthorDate: Fri Apr 17 21:16:48 2020 -0700 HIVE-23153 : deregister from zookeeper is not properly worked on kerberized environment (Eugene Chung via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../org/apache/hive/service/server/HiveServer2.java | 19 ++- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java index 42b7e59..d600f3a 100644 --- a/service/src/java/org/apache/hive/service/server/HiveServer2.java +++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java @@ -115,6 +115,7 @@ import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.apache.logging.log4j.util.Strings; +import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; import org.apache.zookeeper.ZooDefs.Ids; @@ -133,10 +134,12 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder; * */ public class HiveServer2 extends CompositeService { - private static CountDownLatch deleteSignal; private static final Logger LOG = LoggerFactory.getLogger(HiveServer2.class); public static final String INSTANCE_URI_CONFIG = "hive.server2.instance.uri"; private static final int SHUTDOWN_TIME = 60; + private static CountDownLatch zkDeleteSignal; + private static volatile KeeperException.Code zkDeleteResultCode; + private CLIService cliService; private ThriftCLIService thriftCLIService; private CuratorFramework zKClientForPrivSync = null; @@ -566,7 +569,7 @@ public class HiveServer2 extends CompositeService { * @return * @throws Exception */ - private void setUpZooKeeperAuth(HiveConf hiveConf) throws Exception { + private static void setUpZooKeeperAuth(HiveConf hiveConf) throws Exception { if (ZookeeperUtils.isKerberosEnabled(hiveConf)) { String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL); if (principal.isEmpty()) { @@ -1099,6 +1102,7 @@ public class HiveServer2 extends CompositeService { */ static void deleteServerInstancesFromZooKeeper(String versionNumber) throws Exception { HiveConf hiveConf = new HiveConf(); +setUpZooKeeperAuth(hiveConf); CuratorFramework zooKeeperClient = hiveConf.getZKConfig().getNewZookeeperClient(); zooKeeperClient.start(); String rootNamespace = hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_ZOOKEEPER_NAMESPACE); @@ -1109,7 +1113,7 @@ public class HiveServer2 extends CompositeService { // Now for each path that is for the given versionNumber, delete the znode from ZooKeeper for (int i = 0; i < znodePaths.size(); i++) { String znodePath = znodePaths.get(i); - deleteSignal = new CountDownLatch(1); + zkDeleteSignal = new CountDownLatch(1); if (znodePath.contains("version=" + versionNumber + ";")) { String fullZnodePath = ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + rootNamespace @@ -1119,7 +1123,11 @@ public class HiveServer2 extends CompositeService { zooKeeperClient.delete().guaranteed().inBackground(new DeleteCallBack()) .forPath(fullZnodePath); // Wait for the delete to complete -deleteSignal.await(); +zkDeleteSignal.await(); +final KeeperException.Code rc = HiveServer2.zkDeleteResultCode; +if (rc != KeeperException.Code.OK) { + throw KeeperException.create(rc); +} // Get the updated path list znodePathsUpdated = zooKeeperClient.getChildren().forPath( @@ -1138,7 +1146,8 @@ public class HiveServer2 extends CompositeService { public void processResult(CuratorFramework zooKeeperClient, CuratorEvent event) throws Exception { if (event.getType() == CuratorEventType.DELETE) { -deleteSignal.countDown(); +zkDeleteResultCode = KeeperException.Code.get(event.getResultCode()); +zkDeleteSignal.countDown(); } } }
[hive] branch master updated: HIVE-23194 : Use Queue Instead of List for CollectOperator (David Mollitor via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 15ebf9e HIVE-23194 : Use Queue Instead of List for CollectOperator (David Mollitor via Ashutosh Chauhan) 15ebf9e is described below commit 15ebf9e208f56b0e54ed513bff099473683dc6eb Author: David Mollitor AuthorDate: Fri Apr 17 20:48:08 2020 -0700 HIVE-23194 : Use Queue Instead of List for CollectOperator (David Mollitor via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- ql/src/java/org/apache/hadoop/hive/ql/exec/CollectOperator.java | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CollectOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/CollectOperator.java index 2ec9a4f..df69bf2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CollectOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CollectOperator.java @@ -19,7 +19,8 @@ package org.apache.hadoop.hive.ql.exec; import java.io.Serializable; -import java.util.ArrayList; +import java.util.ArrayDeque; +import java.util.Queue; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.CompilationOpContext; @@ -37,7 +38,7 @@ public class CollectOperator extends Operator implements Serializable { private static final long serialVersionUID = 1L; - protected transient ArrayList rowList; + protected transient Queue rowList; protected transient ObjectInspector standardRowInspector; transient int maxSize; @@ -53,7 +54,7 @@ public class CollectOperator extends Operator implements @Override protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); -rowList = new ArrayList(); +this.rowList = new ArrayDeque<>(); maxSize = conf.getBufferSize().intValue(); } @@ -83,7 +84,7 @@ public class CollectOperator extends Operator implements result.o = null; result.oi = null; } else { - result.o = rowList.remove(0); + result.o = rowList.poll(); result.oi = standardRowInspector; } }
[hive] branch master updated: HIVE-23233 : Using default operation logs location cause hive service session testing failed (RuiChen via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 465f698 HIVE-23233 : Using default operation logs location cause hive service session testing failed (RuiChen via Ashutosh Chauhan) 465f698 is described below commit 465f69809aebecd1e79f03d00678d26beb9213ec Author: Ashutosh Chauhan AuthorDate: Fri Apr 17 20:34:55 2020 -0700 HIVE-23233 : Using default operation logs location cause hive service session testing failed (RuiChen via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../service/cli/session/TestSessionCleanup.java| 27 ++ .../cli/session/TestSessionManagerMetrics.java | 4 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/service/src/test/org/apache/hive/service/cli/session/TestSessionCleanup.java b/service/src/test/org/apache/hive/service/cli/session/TestSessionCleanup.java index 51ce2c2..c19d97a 100644 --- a/service/src/test/org/apache/hive/service/cli/session/TestSessionCleanup.java +++ b/service/src/test/org/apache/hive/service/cli/session/TestSessionCleanup.java @@ -23,12 +23,12 @@ import java.io.FilenameFilter; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; +import java.util.Random; import java.util.Set; - +import java.util.concurrent.atomic.AtomicInteger; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hive.service.cli.CLIService; import org.apache.hive.service.cli.OperationHandle; import org.apache.hive.service.cli.SessionHandle; @@ -41,6 +41,11 @@ import org.junit.Test; * TestSessionCleanup. */ public class TestSessionCleanup { + + private static final AtomicInteger salt = new AtomicInteger(new Random().nextInt()); + private final String TEST_DATA_DIR = System.getProperty("java.io.tmpdir") + File.separator + + TestSessionCleanup.class.getCanonicalName() + "-" + System.currentTimeMillis() + "_" + salt.getAndIncrement(); + // Create subclass of EmbeddedThriftBinaryCLIService, just so we can get an accessor to the CLIService. // Needed for access to the OperationManager. private class MyEmbeddedThriftBinaryCLIService extends EmbeddedThriftBinaryCLIService { @@ -61,10 +66,14 @@ public class TestSessionCleanup { hiveConf .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); +//NOTES: Apply a random tmp directory to avoid default location conflicting with other tests +hiveConf +.setVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION, +TEST_DATA_DIR + File.separator + "operation_logs"); service.init(hiveConf); ThriftCLIServiceClient client = new ThriftCLIServiceClient(service); -Set existingPipeoutFiles = new HashSet(Arrays.asList(getPipeoutFiles())); +Set existingPipeoutFiles = new HashSet(Arrays.asList(getPipeoutFiles(hiveConf))); SessionHandle sessionHandle = client.openSession("user1", "foobar", Collections.emptyMap()); OperationHandle opHandle1 = client.executeStatement(sessionHandle, "set a=b", null); @@ -74,15 +83,15 @@ public class TestSessionCleanup { String queryId2 = service.getCliService().getQueryId(opHandle2.toTOperationHandle()); Assert.assertNotNull(queryId2); File operationLogRootDir = new File( -new HiveConf().getVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION)); -Assert.assertNotEquals(operationLogRootDir.list().length, 0); + hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION)); +Assert.assertNotEquals(0, operationLogRootDir.list().length); client.closeSession(sessionHandle); // Check if session files are removed -Assert.assertEquals(operationLogRootDir.list().length, 0); +Assert.assertEquals(0, operationLogRootDir.list().length); // Check if the pipeout files are removed -Set finalPipeoutFiles = new HashSet(Arrays.asList(getPipeoutFiles())); +Set finalPipeoutFiles = new HashSet(Arrays.asList(getPipeoutFiles(hiveConf))); finalPipeoutFiles.removeAll(existingPipeoutFiles); Assert.assertTrue(finalPipeoutFiles.isEmpty()); @@ -94,9 +103,9 @@ public class TestSessionCleanup { Assert.assertNull(service.getCliService().getSessionManager().getOperationManager().getOperationByQueryId(queryId1)); } - private String[] getPipeoutFiles() { + private String[] getPipeoutFiles(HiveConf hiveConf) { File localScratchDir = new File( -new HiveConf().getVar(HiveConf.ConfVars.LOCALSCRATCHDIR)); +hiveConf.getVar(HiveConf.ConfVars.LOCALSC
[hive] branch master updated: HIVE-23004 : Support Decimal64 operations across multiple vertices (Ramesh Kumar via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. hashutosh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 11d8298 HIVE-23004 : Support Decimal64 operations across multiple vertices (Ramesh Kumar via Ashutosh Chauhan) 11d8298 is described below commit 11d829828e401dd941c8010ecf5f87a8248852fb Author: RAMESH KUMAR THANGARAJAN AuthorDate: Fri Apr 17 20:20:12 2020 -0700 HIVE-23004 : Support Decimal64 operations across multiple vertices (Ramesh Kumar via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../test/resources/testconfiguration.properties| 1 + .../ql/exec/spark/SparkReduceRecordHandler.java| 42 +-- .../hive/ql/exec/tez/ReduceRecordSource.java | 34 ++- .../hive/ql/optimizer/physical/Vectorizer.java | 18 +- .../clientpositive/vector_decimal64_multi_vertex.q | 46 +++ .../results/clientpositive/llap/keep_uniform.q.out | 10 +- .../llap/mapjoin_decimal_vectorized.q.out | 8 +- .../clientpositive/llap/vector_case_when_2.q.out | 8 +- .../clientpositive/llap/vector_cast_constant.q.out | 6 +- .../llap/vector_decimal64_case_when_nvl.q.out | 12 +- .../llap/vector_decimal64_case_when_nvl_cbo.q.out | 8 +- .../llap/vector_decimal64_multi_vertex.q.out | 328 + .../llap/vector_decimal_aggregate.q.out| 8 +- .../clientpositive/llap/vector_decimal_join.q.out | 4 +- .../clientpositive/llap/vector_decimal_round.q.out | 12 +- .../llap/vector_decimal_trailing.q.out | 2 +- .../clientpositive/llap/vector_decimal_udf.q.out | 8 +- .../llap/vector_outer_reference_windowed.q.out | 40 +-- .../llap/vector_windowing_rank.q.out | 4 +- .../clientpositive/llap/vectorization_17.q.out | 2 +- .../vectorized_dynamic_semijoin_reduction2.q.out | 4 +- .../llap/vectorized_parquet_types.q.out| 2 +- .../spark/vector_cast_constant.q.out | 6 +- .../spark/vector_decimal_aggregate.q.out | 8 +- .../clientpositive/spark/vectorization_17.q.out| 2 +- .../fast/BinarySortableDeserializeRead.java| 11 +- .../lazy/fast/LazySimpleDeserializeRead.java | 3 +- .../lazybinary/fast/LazyBinaryDeserializeRead.java | 16 +- 28 files changed, 540 insertions(+), 113 deletions(-) diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 48f90fe..e04f7d5 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -890,6 +890,7 @@ minillaplocal.query.files=\ vector_decimal_udf.q,\ vector_decimal64_case_when_nvl.q,\ vector_decimal64_case_when_nvl_cbo.q,\ + vector_decimal64_multi_vertex.q,\ vector_full_outer_join.q,\ vector_fullouter_mapjoin_1_fast.q,\ vector_fullouter_mapjoin_1_optimized.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java index 07cb5cb..86f3aaa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java @@ -24,6 +24,7 @@ import java.util.Arrays; import java.util.Iterator; import java.util.List; +import org.apache.hadoop.hive.ql.exec.vector.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.MapredContext; @@ -32,11 +33,6 @@ import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; @@ -96,6 +92,7 @@ public class SparkReduceRecordHandler extends SparkRecordHandler { private VectorDeserializeRow valueLazyBinaryDeserializeToRow; private VectorizedRowBatch batch; + private VectorizedRowBatchCtx batchContext; private long batchBytes = 0; private boolean handleGroupKey = true; // For now. @@ -131,6 +128,7 @@ public class SparkReduceRecordHandler extends SparkRecordHandler { reducer = gWork.getReducer(); vectorized = gWork.getVectorMode(); reducer.setParentOperators(null); // clear out any parents as reducer