[hive] branch branch-2.3 updated: HIVE-26522: Backport of HIVE-22033 and HIVE-26522 to branch-2.3 (#3586)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-2.3 by this push: new bba9d66454f HIVE-26522: Backport of HIVE-22033 and HIVE-26522 to branch-2.3 (#3586) bba9d66454f is described below commit bba9d66454fa3a3e9d0b0b58abbccc09aee380cf Author: Pavan Lanka AuthorDate: Mon Nov 14 10:51:31 2022 -0800 HIVE-26522: Backport of HIVE-22033 and HIVE-26522 to branch-2.3 (#3586) * HIVE-26522: Backport HIVE-22033 to branch-2.3 HIVE-22033: HiveServer2: fix delegation token renewal (Ion ALberdi via Szehon) * HIVE-26522: Backport HIVE-26522 to branch 2.3 HIVE-26522: Added test for HIVE-22033 regarding delegation token renewal Co-authored-by: Szehon Ho --- ...TestTokenStoreDelegationTokenSecretManager.java | 109 + .../TokenStoreDelegationTokenSecretManager.java| 5 +- 2 files changed, 113 insertions(+), 1 deletion(-) diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/TestTokenStoreDelegationTokenSecretManager.java b/metastore/src/test/org/apache/hadoop/hive/metastore/TestTokenStoreDelegationTokenSecretManager.java new file mode 100644 index 000..f2d701723e5 --- /dev/null +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/TestTokenStoreDelegationTokenSecretManager.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore; + +import org.apache.hadoop.hive.thrift.DelegationTokenIdentifier; +import org.apache.hadoop.hive.thrift.DelegationTokenStore; +import org.apache.hadoop.hive.thrift.HiveDelegationTokenManager; +import org.apache.hadoop.hive.thrift.MemoryTokenStore; +import org.apache.hadoop.hive.thrift.TokenStoreDelegationTokenSecretManager; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.SecretManager; +import org.apache.hadoop.security.token.Token; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; + +/** + * Test the renewal of Delegation tokens obtained from the Metastore. + */ +public class TestTokenStoreDelegationTokenSecretManager { + + private TokenStoreDelegationTokenSecretManager createTokenMgr(DelegationTokenStore tokenStore, + long renewSecs) { +return new TokenStoreDelegationTokenSecretManager( +HiveDelegationTokenManager.DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT, +HiveDelegationTokenManager.DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT, +renewSecs * 1000, +360, +tokenStore); + } + + private DelegationTokenIdentifier getID(String tokenStr) throws IOException { +DelegationTokenIdentifier id = new DelegationTokenIdentifier(); +Token token = new Token<>(); +token.decodeFromUrlString(tokenStr); +try (DataInputStream in = new DataInputStream( +new ByteArrayInputStream(token.getIdentifier( { + id.readFields(in); +} +return id; + } + + @Test public void testRenewal() throws IOException, InterruptedException { +DelegationTokenStore tokenStore = new MemoryTokenStore(); +// Have a long renewal to ensure that Thread.sleep does not overshoot the initial validity +TokenStoreDelegationTokenSecretManager mgr = createTokenMgr(tokenStore, 3600); +try { + mgr.startThreads(); + String tokenStr = + mgr.getDelegationToken(UserGroupInformation.getCurrentUser().getShortUserName()); + Assert.assertNotNull(mgr.verifyDelegationToken(tokenStr)); + DelegationTokenIdentifier id = getID(tokenStr); + long initialExpiry = tokenStore.getToken(id).getRenewDate(); + Thread.sleep(100); + Assert.assertTrue(System.currentTimeMillis() > id.getIssueDate()); + // No change in renewal date without renewal + Assert.assertEquals(tokenStore.getToken(id).getRenewDate(), initialExpiry); + mgr.renewDelegationToken(tokenStr); +
[hive] branch branch-3.1 updated: [WIP] HIVE-26522: Backport of HIVE-22033 and HIVE-26522 to branch-3.1 (#3587)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3.1 by this push: new 10184f5f647 [WIP] HIVE-26522: Backport of HIVE-22033 and HIVE-26522 to branch-3.1 (#3587) 10184f5f647 is described below commit 10184f5f6470df7dec7cf5ccc1fca3328ab93e67 Author: Pavan Lanka AuthorDate: Mon Nov 14 10:51:20 2022 -0800 [WIP] HIVE-26522: Backport of HIVE-22033 and HIVE-26522 to branch-3.1 (#3587) * HIVE-26522: Backport HIVE-22033 to branch-3.1 HIVE-22033: HiveServer2: fix delegation token renewal (Ion ALberdi via Szehon) * HIVE-26522: Backport HIVE-26522 to branch-3.1 HIVE-26522: Added test for HIVE-22033 regarding delegation token renewal Co-authored-by: Szehon Ho --- .../TokenStoreDelegationTokenSecretManager.java| 5 +- ...TestTokenStoreDelegationTokenSecretManager.java | 121 + 2 files changed, 125 insertions(+), 1 deletion(-) diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/security/TokenStoreDelegationTokenSecretManager.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/security/TokenStoreDelegationTokenSecretManager.java index 7b325449ce1..ee2ace8cbed 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/security/TokenStoreDelegationTokenSecretManager.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/security/TokenStoreDelegationTokenSecretManager.java @@ -161,7 +161,10 @@ public class TokenStoreDelegationTokenSecretManager extends DelegationTokenSecre synchronized (this) { super.currentTokens.put(id, tokenInfo); try { -return super.renewToken(token, renewer); +long res = super.renewToken(token, renewer); +this.tokenStore.removeToken(id); +this.tokenStore.addToken(id, super.currentTokens.get(id)); +return res; } finally { super.currentTokens.remove(id); } diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/security/TestTokenStoreDelegationTokenSecretManager.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/security/TestTokenStoreDelegationTokenSecretManager.java new file mode 100644 index 000..35e3d36ff8a --- /dev/null +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/security/TestTokenStoreDelegationTokenSecretManager.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.security; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.SecretManager; +import org.apache.hadoop.security.token.Token; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.ExpectedException; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.util.concurrent.TimeUnit; + +/** + * Test the renewal of Delegation tokens obtained from the Metastore. + */ +@Category(MetastoreUnitTest.class) public class TestTokenStoreDelegationTokenSecretManager { + private final Configuration conf = MetastoreConf.newMetastoreConf(); + + private TokenStoreDelegationTokenSecretManager createTokenMgr(DelegationTokenStore tokenStore, + long renewSecs) { +MetastoreConf.setTimeVar(conf, MetastoreConf.ConfVars.DELEGATION_TOKEN_RENEW_INTERVAL, +renewSecs, TimeUnit.SECONDS); +long secretKeyInterval = +MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.DELEGATION_KEY_UPDATE_INTERVAL, +TimeUnit.MILLISECONDS); +long tokenMaxLifetime = +MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.DELEGATION_TOKEN_MAX_LIFETIME, +TimeUnit.MILL
[hive] branch master updated: HIVE-26522: Added test for HIVE-22033 regarding delegation token renewal (#3585)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 1ef7c958b22 HIVE-26522: Added test for HIVE-22033 regarding delegation token renewal (#3585) 1ef7c958b22 is described below commit 1ef7c958b22c9f1d696d1b8a523b4efc52572c49 Author: Pavan Lanka AuthorDate: Mon Nov 14 10:43:36 2022 -0800 HIVE-26522: Added test for HIVE-22033 regarding delegation token renewal (#3585) --- ...TestTokenStoreDelegationTokenSecretManager.java | 119 + 1 file changed, 119 insertions(+) diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/security/TestTokenStoreDelegationTokenSecretManager.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/security/TestTokenStoreDelegationTokenSecretManager.java new file mode 100644 index 000..aaabd69e599 --- /dev/null +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/security/TestTokenStoreDelegationTokenSecretManager.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.security; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.SecretManager; +import org.apache.hadoop.security.token.Token; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.ExpectedException; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.util.concurrent.TimeUnit; + +/** + * Test the renewal of Delegation tokens obtained from the Metastore. + */ +@Category(MetastoreUnitTest.class) public class TestTokenStoreDelegationTokenSecretManager { + private final Configuration conf = MetastoreConf.newMetastoreConf(); + + private TokenStoreDelegationTokenSecretManager createTokenMgr(DelegationTokenStore tokenStore, + long renewSecs) { +MetastoreConf.setTimeVar(conf, MetastoreConf.ConfVars.DELEGATION_TOKEN_RENEW_INTERVAL, +renewSecs, TimeUnit.SECONDS); +long secretKeyInterval = +MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.DELEGATION_KEY_UPDATE_INTERVAL, +TimeUnit.MILLISECONDS); +long tokenMaxLifetime = +MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.DELEGATION_TOKEN_MAX_LIFETIME, +TimeUnit.MILLISECONDS); +long tokenRenewInterval = +MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.DELEGATION_TOKEN_RENEW_INTERVAL, +TimeUnit.MILLISECONDS); +long tokenGcInterval = +MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.DELEGATION_TOKEN_GC_INTERVAL, +TimeUnit.MILLISECONDS); +return new TokenStoreDelegationTokenSecretManager(secretKeyInterval, tokenMaxLifetime, +tokenRenewInterval, tokenGcInterval, tokenStore); + } + + private DelegationTokenIdentifier getID(String tokenStr) throws IOException { +DelegationTokenIdentifier id = new DelegationTokenIdentifier(); +Token token = new Token<>(); +token.decodeFromUrlString(tokenStr); +try (DataInputStream in = new DataInputStream( +new ByteArrayInputStream(token.getIdentifier( { + id.readFields(in); +} +return id; + } + + @Test public void testRenewal() throws IOException, InterruptedException { +DelegationTokenStore tokenStore = new MemoryTokenStore(); +// Have a long renewal to ensure that Thread.sleep does not overshoot the initial validity +TokenStoreDelegationTokenSecretManager mgr = createTokenMgr(tokenStore, 3600); +try { + mgr.startThreads(); + String tokenStr = + mgr.getDelegationToken(UserGroupInformation.getCurrentUser().getShortUserName(), + UserGroup
[hive] branch master updated: HIVE-25646: Thrift metastore URI reverse resolution could fail in some environments (#2934)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 519a3c9 HIVE-25646: Thrift metastore URI reverse resolution could fail in some environments (#2934) 519a3c9 is described below commit 519a3c9e748af12cf781b8ef27e687fab5dab5fb Author: Prasanth Jayachandran AuthorDate: Wed Jan 19 13:27:12 2022 -0800 HIVE-25646: Thrift metastore URI reverse resolution could fail in some environments (#2934) * HIVE-25646: Thrift metastore URI reverse resolution could fail in some environments * Empty commit to trigger test --- .../org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java | 10 +- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index 9fc0987..372afd1 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -355,15 +355,7 @@ public class HiveMetaStoreClient implements IMetaStoreClient, AutoCloseable { if (uriResolverHook != null) { metastoreURIArray.addAll(uriResolverHook.resolveURI(tmpUri)); } else { - metastoreURIArray.add(new URI( - tmpUri.getScheme(), - tmpUri.getUserInfo(), - HadoopThriftAuthBridge.getBridge().getCanonicalHostName(tmpUri.getHost()), - tmpUri.getPort(), - tmpUri.getPath(), - tmpUri.getQuery(), - tmpUri.getFragment() - )); + metastoreURIArray.add(tmpUri); } } metastoreUris = new URI[metastoreURIArray.size()];
[hive] branch branch-3 updated: HIVE-25646: Thrift metastore URI reverse resolution could fail in some environments (#2933)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3 by this push: new d70b516 HIVE-25646: Thrift metastore URI reverse resolution could fail in some environments (#2933) d70b516 is described below commit d70b516a0be7ce1096336542fcd61a61633db2d2 Author: Prasanth Jayachandran AuthorDate: Wed Jan 19 13:27:19 2022 -0800 HIVE-25646: Thrift metastore URI reverse resolution could fail in some environments (#2933) --- .../org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java | 10 +- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index 5709b13..66ea806 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -240,15 +240,7 @@ public class HiveMetaStoreClient implements IMetaStoreClient, AutoCloseable { if (uriResolverHook != null) { metastoreURIArray.addAll(uriResolverHook.resolveURI(tmpUri)); } else { - metastoreURIArray.add(new URI( - tmpUri.getScheme(), - tmpUri.getUserInfo(), - HadoopThriftAuthBridge.getBridge().getCanonicalHostName(tmpUri.getHost()), - tmpUri.getPort(), - tmpUri.getPath(), - tmpUri.getQuery(), - tmpUri.getFragment() - )); + metastoreURIArray.add(tmpUri); } } metastoreUris = new URI[metastoreURIArray.size()];
[hive] branch master updated (056fdaf -> fc93e54)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from 056fdaf HIVE-24715: Increase bucketId range (Attila Magyar, reviewed by Peter Varga) add fc93e54 HIVE-24786: JDBC HttpClient should retry for idempotent and unsent http methods (#1983) No new revisions were added by this update. Summary of changes: .../java/org/apache/hive/jdbc/HiveConnection.java | 114 +++-- 1 file changed, 107 insertions(+), 7 deletions(-)
[hive] branch master updated (2e59376 -> 7935534)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from 2e59376 HIVE-24456: Added HiveConf to check if FIPS mode is enabled, if so column masking should be done with SHA512 (#1721) (Sai Hemanth Gantasala, reviewed by Yongzhi Chen) add 7935534 HIVE-12371: Adding a timeout connection parameter for JDBC (#1611) No new revisions were added by this update. Summary of changes: jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java | 14 +++--- jdbc/src/java/org/apache/hive/jdbc/Utils.java | 1 + 2 files changed, 12 insertions(+), 3 deletions(-)
[hive] branch master updated (69357e9 -> 985266c)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from 69357e9 HIVE-24754: Skip extra work in Cleaner if queue is empty (Denys Kuzmenko, reviewed by Karen Coppage) add 985266c HIVE-24501: UpdateInputAccessTimeHook should not update stats (#1752) No new revisions were added by this update. Summary of changes: .../hadoop/hive/ql/hooks/UpdateInputAccessTimeHook.java | 12 ++-- 1 file changed, 10 insertions(+), 2 deletions(-)
[hive] branch master updated (58552a0 -> 89073a9)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from 58552a0 HIVE-24632: Replace with null when GenericUDFBaseCompare has a non-interpretable val (#1863) (Zhihua Deng reviewed by Zoltan Haindrich) add 89073a9 HIVE-24514: UpdateMDatabaseURI does not update managed location URI (#1761) No new revisions were added by this update. Summary of changes: .../apache/hadoop/hive/metastore/ObjectStore.java | 22 ++ 1 file changed, 22 insertions(+)
[hive] branch master updated (a0c9f8e -> 4d0d6d6)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from a0c9f8e HIVE-24475: Generalize fixacidkeyindex utility (Antal Sinkovits, reviewed by Peter Varga and Mahesh Kumar Behera)(#1730) add 4d0d6d6 HIVE-24497: Node heartbeats from LLAP Daemon to the client are not ma… (#1755) No new revisions were added by this update. Summary of changes: .../hadoop/hive/llap/daemon/impl/AMReporter.java | 28 ++ .../hive/llap/daemon/impl/TaskRunnerCallable.java | 2 +- .../daemon/impl/comparator/TestAMReporter.java | 4 ++-- 3 files changed, 26 insertions(+), 8 deletions(-)
[hive] branch master updated (72d983a -> c636bdf)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from 72d983a HIVE-24144: getIdentifierQuoteString in HiveDatabaseMetaData returns incorrect value (Jesus Camacho Rodriguez, reviewed by Vineet Garg) add c636bdf HIVE-24426: Spark job fails with fixed LlapTaskUmbilicalServer port. (#1705) No new revisions were added by this update. Summary of changes: .../java/org/apache/hadoop/hive/conf/HiveConf.java | 5 +- .../helpers/LlapTaskUmbilicalServer.java | 92 ++ .../hive/llap/tezplugins/LlapTaskCommunicator.java | 91 + 3 files changed, 156 insertions(+), 32 deletions(-)
[hive] branch master updated (1dc2564 -> 90ad548)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from 1dc2564 HIVE-24009 : Support partition pruning and other physical transformations for EXECUTE statement (Vineet Garg, reviewed by Jesus Camacho Rodriguez) add 90ad548 HIVE-24188 - CTLT from MM to External fails because table txn properties are not skipped (#1516) No new revisions were added by this update. Summary of changes: .../create/like/CreateTableLikeOperation.java | 1 + .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +- ql/src/test/queries/clientpositive/create_like2.q | 32 +++ .../results/clientpositive/llap/create_like2.q.out | 282 + 4 files changed, 316 insertions(+), 1 deletion(-)
[hive] branch master updated: HIVE-22290: ObjectStore.cleanWriteNotificationEvents and ObjectStore.cleanupEvents OutOfMemory on large number of pending events (#1484)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 3a8edc0 HIVE-22290: ObjectStore.cleanWriteNotificationEvents and ObjectStore.cleanupEvents OutOfMemory on large number of pending events (#1484) 3a8edc0 is described below commit 3a8edc02f542d1dc7c6f715e4b7f11e30bf65c83 Author: Naresh P R AuthorDate: Tue Sep 15 20:05:37 2020 -0700 HIVE-22290: ObjectStore.cleanWriteNotificationEvents and ObjectStore.cleanupEvents OutOfMemory on large number of pending events (#1484) --- .../apache/hadoop/hive/metastore/ObjectStore.java | 81 -- 1 file changed, 60 insertions(+), 21 deletions(-) diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index f866b94..f12ce84 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -10426,9 +10426,41 @@ public class ObjectStore implements RawStore, Configurable { int tooOld = (tmp > Integer.MAX_VALUE) ? 0 : (int) tmp; query = pm.newQuery(MTxnWriteNotificationLog.class, "eventTime < tooOld"); query.declareParameters("java.lang.Integer tooOld"); - Collection toBeRemoved = (Collection) query.execute(tooOld); - if (CollectionUtils.isNotEmpty(toBeRemoved)) { + + int max_events = MetastoreConf.getIntVar(conf, MetastoreConf.ConfVars.EVENT_CLEAN_MAX_EVENTS); + max_events = max_events > 0 ? max_events : Integer.MAX_VALUE; + query.setRange(0, max_events); + query.setOrdering("txnId ascending"); + + List toBeRemoved = (List) query.execute(tooOld); + int iteration = 0; + int eventCount = 0; + long minTxnId = 0; + long minEventTime = 0; + long maxTxnId = 0; + long maxEventTime = 0; + while (CollectionUtils.isNotEmpty(toBeRemoved)) { +int listSize = toBeRemoved.size(); +if (iteration == 0) { + MTxnWriteNotificationLog firstNotification = toBeRemoved.get(0); + minTxnId = firstNotification.getTxnId(); + minEventTime = firstNotification.getEventTime(); +} +MTxnWriteNotificationLog lastNotification = toBeRemoved.get(listSize - 1); +maxTxnId = lastNotification.getTxnId(); +maxEventTime = lastNotification.getEventTime(); + pm.deletePersistentAll(toBeRemoved); +eventCount += listSize; +iteration++; +toBeRemoved = (List) query.execute(tooOld); + } + if (iteration == 0) { +LOG.info("No WriteNotification events found to be cleaned with eventTime < {}.", tooOld); + } else { +LOG.info("WriteNotification Cleaned {} events with eventTime < {} in {} iteration, " + +"minimum txnId {} (with eventTime {}) and maximum txnId {} (with eventTime {})", +eventCount, tooOld, iteration, minTxnId, minEventTime, maxTxnId, maxEventTime); } commited = commitTransaction(); } finally { @@ -10617,26 +10649,33 @@ public class ObjectStore implements RawStore, Configurable { query.setOrdering("eventId ascending"); List toBeRemoved = (List) query.execute(tooOld); - if (toBeRemoved == null || toBeRemoved.size() == 0) { -LOG.info("No events found to be cleaned with eventTime < {}.", tooOld); - } else { -NotificationEvent firstEvent = translateDbToThrift(toBeRemoved.get(0)); -long minEventId = firstEvent.getEventId(); -long minEventTime = firstEvent.getEventTime(); -long maxEventId = minEventId; -long maxEventTime = minEventTime; -if (toBeRemoved.size() > 1) { - NotificationEvent lastEvent = - translateDbToThrift(toBeRemoved.get(toBeRemoved.size() - 1)); - maxEventId = lastEvent.getEventId(); - maxEventTime = lastEvent.getEventTime(); -} -LOG.info("Cleaned {} events with eventTime < {}, minimum eventId {} (with eventTime {}) " + -"and maximum eventId {} (with eventTime {})", -toBeRemoved.size(), tooOld, minEventId, minEventTime, maxEventId, maxEventTime); - } - if (CollectionUtils.isNotEmpty(toBeRemoved)) { + int iteration = 0; + int eventCount = 0; + long minEventId = 0; + long minEventTime = 0; + long maxEventId = 0; + long maxEventTime = 0; + while (CollectionUtils.isNotEmpty(toBeRemoved)) { +int listSize = toBeRemoved.size
[hive] branch master updated (0a0c03b -> 13ac9cd)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from 0a0c03b HIVE-24020: Fix Automatic compaction for streaming ingest with dynamic partition (#1382) add 13ac9cd HIVE-24061: Improve llap task scheduling for better cache hit rate (#1431) No new revisions were added by this update. Summary of changes: .../llap/tezplugins/LlapTaskSchedulerService.java | 40 +++--- 1 file changed, 36 insertions(+), 4 deletions(-)
[hive] branch master updated (cd4154e -> 0a0c03b)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from cd4154e HIVE-24068: Add re-execution plugin for handling DAG submission and unmanaged AM failures (#1428) add 0a0c03b HIVE-24020: Fix Automatic compaction for streaming ingest with dynamic partition (#1382) No new revisions were added by this update. Summary of changes: .../hive/ql/txn/compactor/TestCompactor.java | 115 + .../hive/streaming/AbstractRecordWriter.java | 21 ++-- 2 files changed, 124 insertions(+), 12 deletions(-)
[hive] branch master updated (1635a9d -> cd4154e)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from 1635a9d HIVE-24041: Extend semijoin conversion rules (Jesus Camacho Rodriguez, reviewed by Krisztian Kasa) (addendum) add cd4154e HIVE-24068: Add re-execution plugin for handling DAG submission and unmanaged AM failures (#1428) No new revisions were added by this update. Summary of changes: .../java/org/apache/hadoop/hive/conf/HiveConf.java | 2 +- .../org/apache/hadoop/hive/ql/DriverFactory.java | 4 ++ .../hive/ql/reexec/ReExecuteLostAMQueryPlugin.java | 24 +--- ...Plugin.java => ReExecutionDagSubmitPlugin.java} | 45 +- 4 files changed, 41 insertions(+), 34 deletions(-) copy ql/src/java/org/apache/hadoop/hive/ql/reexec/{ReExecutionOverlayPlugin.java => ReExecutionDagSubmitPlugin.java} (65%)
[hive] branch master updated (1635a9d -> cd4154e)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from 1635a9d HIVE-24041: Extend semijoin conversion rules (Jesus Camacho Rodriguez, reviewed by Krisztian Kasa) (addendum) add cd4154e HIVE-24068: Add re-execution plugin for handling DAG submission and unmanaged AM failures (#1428) No new revisions were added by this update. Summary of changes: .../java/org/apache/hadoop/hive/conf/HiveConf.java | 2 +- .../org/apache/hadoop/hive/ql/DriverFactory.java | 4 ++ .../hive/ql/reexec/ReExecuteLostAMQueryPlugin.java | 24 +--- ...Plugin.java => ReExecutionDagSubmitPlugin.java} | 45 +- 4 files changed, 41 insertions(+), 34 deletions(-) copy ql/src/java/org/apache/hadoop/hive/ql/reexec/{ReExecutionOverlayPlugin.java => ReExecutionDagSubmitPlugin.java} (65%)
[hive] branch master updated: HIVE-23972: Add external client ID to LLAP external client (#1350)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new d8bc9dc HIVE-23972: Add external client ID to LLAP external client (#1350) d8bc9dc is described below commit d8bc9dc5a35cb20861be090ff171a77a81618625 Author: jdere AuthorDate: Thu Aug 13 13:31:15 2020 -0700 HIVE-23972: Add external client ID to LLAP external client (#1350) Change-Id: Ica5fd933d26dc76c9f6fbbd4c8b578eaf20d5e69 Co-authored-by: Jason Dere --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java| 1 + .../apache/hadoop/hive/llap/daemon/impl/TaskExecutorService.java | 3 +++ .../apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java | 9 - 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index d58326f..4b69576 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -6240,6 +6240,7 @@ public class HiveConf extends Configuration { "hive\\.parquet\\..*", "hive\\.ppd\\..*", "hive\\.prewarm\\..*", +"hive\\.query\\.name", "hive\\.server2\\.thrift\\.resultset\\.default\\.fetch\\.size", "hive\\.server2\\.proxy\\.user", "hive\\.skewjoin\\..*", diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorService.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorService.java index 3195ea7..e44dd2c 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorService.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorService.java @@ -342,6 +342,9 @@ public class TaskExecutorService extends AbstractService SignableVertexSpec fs = c.getVertexSpec(); value.append(isFirst ? " (" : ", ").append(c.getQueryId()) .append("/").append(fs.getVertexName()).append(c.isGuaranteed() ? ", guaranteed" : ""); + if (fs.getDagName() != null) { +value.append(", dagName ").append(fs.getDagName()); + } isFirst = false; } value.append(isFirst ? " (" : ", "); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java index 01b2037..ebd041b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java @@ -233,7 +233,14 @@ public class GenericUDTFGetSplits extends GenericUDTF { + ConfVars.LLAP_HS2_ENABLE_COORDINATOR.varname + " enabled"); } ApplicationId applicationId = coordinator.createExtClientAppId(); -LOG.info("Generated appID {} for LLAP splits", applicationId.toString()); +String externalDagName = SessionState.get().getConf().getVar(ConfVars.HIVEQUERYNAME); + +StringBuilder sb = new StringBuilder(); +sb.append("Generated appID ").append(applicationId.toString()).append(" for LLAP splits"); +if (externalDagName != null) { + sb.append(", with externalID ").append(externalDagName); +} +LOG.info(sb.toString()); PlanFragment fragment = createPlanFragment(inputArgQuery, applicationId); TezWork tezWork = fragment.work;
[hive] branch master updated: HIVE-23582: LLAP: Make SplitLocationProvider impl pluggable (Prasanth Jayachandran reviewed by Gopal V)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new e633ff8 HIVE-23582: LLAP: Make SplitLocationProvider impl pluggable (Prasanth Jayachandran reviewed by Gopal V) e633ff8 is described below commit e633ff8186c14377443881d8eb02f2644e20b510 Author: Prasanth Jayachandran AuthorDate: Fri Jun 5 16:46:27 2020 -0700 HIVE-23582: LLAP: Make SplitLocationProvider impl pluggable (Prasanth Jayachandran reviewed by Gopal V) --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 4 ++ .../org/apache/hadoop/hive/ql/exec/tez/Utils.java | 27 ++- .../apache/hadoop/hive/ql/exec/tez/TestUtils.java | 53 ++ 3 files changed, 82 insertions(+), 2 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index abd12c9..1464d6a 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -4704,6 +4704,10 @@ public class HiveConf extends Configuration { "instead of using the locations provided by the split itself. If there is no llap daemon " + "running, fall back to locations provided by the split. This is effective only if " + "hive.execution.mode is llap"), + LLAP_SPLIT_LOCATION_PROVIDER_CLASS("hive.llap.split.location.provider.class", + "org.apache.hadoop.hive.ql.exec.tez.HostAffinitySplitLocationProvider", + "Split location provider class to use during split generation for LLAP. This class should implement\n" + +"org.apache.hadoop.mapred.split.SplitLocationProvider interface"), LLAP_VALIDATE_ACLS("hive.llap.validate.acls", true, "Whether LLAP should reject permissive ACLs in some cases (e.g. its own management\n" + "protocol or ZK paths), similar to how ssh refuses a key with bad access permissions."), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java index d266bb1..92d64df 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java @@ -28,6 +28,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.llap.registry.LlapServiceInstance; import org.apache.hadoop.hive.llap.registry.impl.LlapRegistryService; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.utils.JavaUtils; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.split.SplitLocationProvider; import org.apache.tez.common.counters.TezCounters; @@ -48,8 +50,29 @@ public class Utils { && HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS) && useCacheAffinity; SplitLocationProvider splitLocationProvider; -LOG.info("SplitGenerator using llap affinitized locations: " + useCustomLocations); -if (useCustomLocations) { +final String locationProviderClass = HiveConf.getVar(conf, HiveConf.ConfVars.LLAP_SPLIT_LOCATION_PROVIDER_CLASS); +final boolean customLocationProvider = + !HostAffinitySplitLocationProvider.class.getName().equals(locationProviderClass); +LOG.info("SplitGenerator using llap affinitized locations: {} locationProviderClass: {}", useCustomLocations, locationProviderClass); +if (customLocationProvider) { + SplitLocationProvider locationProviderImpl; + try { +// the implementation of SplitLocationProvider may have Configuration as a single arg constructor, so we try +// invoking that constructor first. If that does not exist, the fallback will use no-arg constructor. +locationProviderImpl = JavaUtils + .newInstance(JavaUtils.getClass(locationProviderClass, SplitLocationProvider.class), +new Class[]{Configuration.class}, new Object[]{conf}); + } catch (Exception e) { +LOG.warn("Unable to instantiate {} class. Will try no-arg constructor invocation..", locationProviderClass, e); +try { + locationProviderImpl = JavaUtils.newInstance(JavaUtils.getClass(locationProviderClass, +SplitLocationProvider.class)); +} catch (Exception ex) { + throw new IOException(ex); +} + } + return locationProviderImpl; +} else if (useCustomLocations) { LlapRegistryService serviceRegistry = LlapRegistryService.getClient(conf); return getCustomSplitLocationProvider(serviceRegistry, LOG); } else { diff --git a/ql/src/test/org/apache/
[hive] branch master updated: HIVE-21624: LLAP: Cpu metrics at thread level is broken (Prasanth Jayachandran reviewed by Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new e5ce723 HIVE-21624: LLAP: Cpu metrics at thread level is broken (Prasanth Jayachandran reviewed by Ashutosh Chauhan) e5ce723 is described below commit e5ce7232d6182e20d87a743c6f73fdfb566119f5 Author: Prasanth Jayachandran AuthorDate: Mon Jun 1 17:28:34 2020 -0700 HIVE-21624: LLAP: Cpu metrics at thread level is broken (Prasanth Jayachandran reviewed by Ashutosh Chauhan) --- .../org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorService.java | 3 ++- .../org/apache/hadoop/hive/llap/metrics/LlapDaemonExecutorMetrics.java | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorService.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorService.java index 1d6e852..3195ea7 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorService.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorService.java @@ -91,7 +91,8 @@ import static com.google.common.base.Preconditions.checkNotNull; public class TaskExecutorService extends AbstractService implements Scheduler, SchedulerFragmentCompletingListener { private static final Logger LOG = LoggerFactory.getLogger(TaskExecutorService.class); - private static final String TASK_EXECUTOR_THREAD_NAME_FORMAT = "Task-Executor-%d"; + public static final String TASK_EXECUTOR_THREAD_NAME_FORMAT_PREFIX = "Task-Executor-"; + private static final String TASK_EXECUTOR_THREAD_NAME_FORMAT = TASK_EXECUTOR_THREAD_NAME_FORMAT_PREFIX + "%d"; private static final String WAIT_QUEUE_SCHEDULER_THREAD_NAME_FORMAT = "Wait-Queue-Scheduler-%d"; private static final long PREEMPTION_KILL_GRACE_MS = 500; // 500ms private static final int PREEMPTION_KILL_GRACE_SLEEP_MS = 50; // 50ms diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/metrics/LlapDaemonExecutorMetrics.java b/llap-server/src/java/org/apache/hadoop/hive/llap/metrics/LlapDaemonExecutorMetrics.java index 57547c1..c1f91e1 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/metrics/LlapDaemonExecutorMetrics.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/metrics/LlapDaemonExecutorMetrics.java @@ -71,6 +71,7 @@ import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; import org.apache.commons.math3.stat.descriptive.SynchronizedDescriptiveStatistics; import org.apache.hadoop.hive.common.JvmMetrics; import org.apache.hadoop.hive.llap.daemon.impl.ContainerRunnerImpl; +import org.apache.hadoop.hive.llap.daemon.impl.TaskExecutorService; import org.apache.hadoop.metrics2.MetricsCollector; import org.apache.hadoop.metrics2.MetricsInfo; import org.apache.hadoop.metrics2.MetricsRecordBuilder; @@ -213,7 +214,7 @@ public class LlapDaemonExecutorMetrics implements MetricsSource { this.userMetricsInfoMap.put(i, miu); this.executorThreadCpuTime[i] = registry.newGauge(mic, 0L); this.executorThreadUserTime[i] = registry.newGauge(miu, 0L); - this.executorNames.put(ContainerRunnerImpl.THREAD_NAME_FORMAT_PREFIX + i, i); + this.executorNames.put(TaskExecutorService.TASK_EXECUTOR_THREAD_NAME_FORMAT_PREFIX + i, i); } if (timedWindowAverageDataPoints > 0) {
[hive] branch master updated: HIVE-23477: LLAP: mmap allocation interruptions fails to notify other threads (Prasanth Jayachandran reviewed by Gopal V)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 80e0d0d HIVE-23477: LLAP: mmap allocation interruptions fails to notify other threads (Prasanth Jayachandran reviewed by Gopal V) 80e0d0d is described below commit 80e0d0d954d4d5680d659f17a06276b90b8571ad Author: Prasanth Jayachandran AuthorDate: Thu May 21 12:22:50 2020 -0700 HIVE-23477: LLAP: mmap allocation interruptions fails to notify other threads (Prasanth Jayachandran reviewed by Gopal V) --- .../hadoop/hive/llap/cache/BuddyAllocator.java | 39 ++ 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/BuddyAllocator.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/BuddyAllocator.java index 6934dba..c7a6402 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/BuddyAllocator.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/BuddyAllocator.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.llap.cache; +import java.nio.channels.ClosedByInterruptException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; @@ -217,7 +218,11 @@ public final class BuddyAllocator } int initCount = doPreallocate ? maxArenas : 1; for (int i = 0; i < initCount; ++i) { - arenas[i].init(i); + try { +arenas[i].init(i); + } catch (ClosedByInterruptException e) { +throw new RuntimeException("Failed pre-allocating buddy allocator arena. ", e); + } metrics.incrAllocatedArena(); } allocatedArenas.set(initCount); @@ -851,7 +856,7 @@ public final class BuddyAllocator return isDirect; } - private ByteBuffer preallocateArenaBuffer(int arenaSize) { + private ByteBuffer preallocateArenaBuffer(int arenaSize) throws ClosedByInterruptException { if (isMapped) { RandomAccessFile rwf = null; File rf = null; @@ -863,6 +868,15 @@ public final class BuddyAllocator // Use RW, not PRIVATE because the copy-on-write is irrelevant for a deleted file // see discussion in YARN-5551 for the memory accounting discussion return rwf.getChannel().map(MapMode.READ_WRITE, 0, arenaSize); + } catch (ClosedByInterruptException cbi) { +LlapIoImpl.LOG.warn("Interrupted while trying to allocate memory mapped arena", cbi); +// finally may not execute on thread interrupts so cleanup the arena file as it may be unmapped +IOUtils.closeQuietly(rwf); +if (rf != null) { + rf.delete(); + rf = null; +} +throw cbi; } catch (IOException ioe) { LlapIoImpl.LOG.warn("Failed trying to allocate memory mapped arena", ioe); // fail similarly when memory allocations fail @@ -892,7 +906,7 @@ public final class BuddyAllocator private byte[] headers; // Free list indices of each unallocated block, for quick lookup. private FreeList[] freeLists; -void init(int arenaIx) { +void init(int arenaIx) throws ClosedByInterruptException { this.arenaIx = arenaIx; try { data = preallocateArenaBuffer(arenaSize); @@ -1453,13 +1467,24 @@ public final class BuddyAllocator continue; // CAS race, look again. } assert data == null; -init(arenaIx); -boolean isCommited = allocatedArenas.compareAndSet(-arenaCount - 1, arenaCount + 1); -assert isCommited; +try { + init(arenaIx); + // if init did not throw interrupt exception then allocation succeeded and so increment and commit the arena + boolean isCommited = allocatedArenas.compareAndSet(-arenaCount - 1, arenaCount + 1); + assert isCommited; + metrics.incrAllocatedArena(); +} catch (ClosedByInterruptException e) { + LlapIoImpl.LOG.info("Received interrupt during arena {} allocation.. Ignoring..", arenaIx); + // not doing the notify in finally() block as thread interruptions may not execute finally + synchronized (this) { +this.notifyAll(); + } + continue; +} + synchronized (this) { this.notifyAll(); } -metrics.incrAllocatedArena(); return allocateWithSplit(freeListIx, dest, null, ix, dest.length, size, -1); } }
[hive] branch master updated: [Addendum: missed files] HIVE-23118: Option for exposing compile time counters as tez counters (Prasanth Jayachandran reviewed by Jesus Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new e89995a [Addendum: missed files] HIVE-23118: Option for exposing compile time counters as tez counters (Prasanth Jayachandran reviewed by Jesus Camacho Rodriguez) e89995a is described below commit e89995afe1de914caa86ef99c54993ef809d9402 Author: Prasanth Jayachandran AuthorDate: Fri Apr 3 18:11:24 2020 -0700 [Addendum: missed files] HIVE-23118: Option for exposing compile time counters as tez counters (Prasanth Jayachandran reviewed by Jesus Camacho Rodriguez) Signed-off-by: Prasanth Jayachandran --- .../hive/ql/exec/tez/CompileTimeCounters.java | 27 + .../hive/ql/hooks/CompileTimeCounterPreHook.java | 84 ++ .../queries/clientpositive/tez_compile_counters.q | 27 + .../clientpositive/llap/tez_compile_counters.q.out | 1540 4 files changed, 1678 insertions(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CompileTimeCounters.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CompileTimeCounters.java new file mode 100644 index 000..162643c --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CompileTimeCounters.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.tez; + +import org.apache.hadoop.classification.InterfaceAudience; + +@InterfaceAudience.Private +public enum CompileTimeCounters { + TOTAL_FILE_SIZE, + RAW_DATA_SIZE, +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/CompileTimeCounterPreHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/CompileTimeCounterPreHook.java new file mode 100644 index 000..415ac42 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/CompileTimeCounterPreHook.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.hooks; + +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.hive.ql.QueryPlan; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorUtils; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.tez.CompileTimeCounters; +import org.apache.hadoop.hive.ql.exec.tez.TezTask; +import org.apache.hadoop.hive.ql.hooks.HookContext.HookType; +import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.TezWork; +import org.apache.tez.common.counters.TezCounters; + +/** + * Implementation of a pre execute hook that adds compile time tez counters to tez tasks. + */ +public class CompileTimeCounterPreHook implements ExecuteWithHookContext { + + @Override + public void run(HookContext hookContext) throws Exception { +assert(hookContext.getHookType() == HookType.PRE_EXEC_HOOK); +QueryPlan plan = hookContext.getQueryPlan(); +if (plan == null) { + return; +} + +int numMrJobs = Utilities.getMRTasks(plan.getRootTasks()).size(); +List rootTasks = Utilities.getTezTasks(plan.getRootTasks()); +int numTezJobs = rootTasks.size(); +if (numMrJobs + numTezJobs <= 0) { + return; // ignore cli
[hive] branch master updated: HIVE-23118: Option for exposing compile time counters as tez counters (Prasanth Jayachandran reviewed by Jesus Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 265f183 HIVE-23118: Option for exposing compile time counters as tez counters (Prasanth Jayachandran reviewed by Jesus Camacho Rodriguez) 265f183 is described below commit 265f183338c786e47e72d9617dfef3c3023b7a4f Author: Prasanth Jayachandran AuthorDate: Fri Apr 3 18:08:22 2020 -0700 HIVE-23118: Option for exposing compile time counters as tez counters (Prasanth Jayachandran reviewed by Jesus Camacho Rodriguez) Signed-off-by: Prasanth Jayachandran --- .../test/resources/testconfiguration.properties| 1 + .../apache/hadoop/hive/ql/exec/tez/TezTask.java| 10 +++-- .../org/apache/hadoop/hive/ql/exec/tez/Utils.java | 26 ++ .../hive/ql/exec/tez/monitoring/TezJobMonitor.java | 12 +++--- .../hive/ql/hooks/PostExecTezSummaryPrinter.java | 6 + 5 files changed, 50 insertions(+), 5 deletions(-) diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 3e89071..f54c96e 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -689,6 +689,7 @@ minillaplocal.query.files=\ orc_llap.q,\ orc_llap_nonvector.q,\ orc_ppd_date.q,\ + tez_compile_counters.q,\ tez_input_counters.q,\ orc_ppd_decimal.q,\ orc_ppd_timestamp.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java index 0bacb05..3599d19 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java @@ -127,6 +127,9 @@ public class TezTask extends Task { return counters; } + public void setTezCounters(final TezCounters counters) { +this.counters = counters; + } @Override public int execute() { @@ -235,7 +238,7 @@ public class TezTask extends Task { } // finally monitor will print progress until the job is done -TezJobMonitor monitor = new TezJobMonitor(work.getAllWork(), dagClient, conf, dag, ctx); +TezJobMonitor monitor = new TezJobMonitor(work.getAllWork(), dagClient, conf, dag, ctx, counters); rc = monitor.monitorExecution(); if (rc != 0) { @@ -245,7 +248,10 @@ public class TezTask extends Task { // fetch the counters try { Set statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS); - counters = dagClient.getDAGStatus(statusGetOpts).getDAGCounters(); + TezCounters dagCounters = dagClient.getDAGStatus(statusGetOpts).getDAGCounters(); + // if initial counters exists, merge it with dag counters to get aggregated view + TezCounters mergedCounters = counters == null ? dagCounters : Utils.mergeTezCounters(dagCounters, counters); + counters = mergedCounters; } catch (Exception err) { // Don't fail execution due to counters - just don't print summary info LOG.warn("Failed to get counters. Ignoring, summary info will be incomplete. " + err, err); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java index cc9c4ce..d266bb1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.llap.registry.LlapServiceInstance; import org.apache.hadoop.hive.llap.registry.impl.LlapRegistryService; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.split.SplitLocationProvider; +import org.apache.tez.common.counters.TezCounters; import org.slf4j.Logger; public class Utils { @@ -101,4 +102,29 @@ public class Utils { } return new HostAffinitySplitLocationProvider(locations); } + + + /** + * Merges two different tez counters into one + * + * @param counter1 - tez counter 1 + * @param counter2 - tez counter 2 + * @return - merged tez counter + */ + public static TezCounters mergeTezCounters(final TezCounters counter1, final TezCounters counter2) { +TezCounters merged = new TezCounters(); +if (counter1 != null) { + for (String counterGroup : counter1.getGroupNames()) { +merged.addGroup(counter1.getGroup(counterGroup)); + } +} + +if (counter2 != null) { + for (String counterGroup : counter2.getGroupNames()) { +merged.addGroup(counter2.getGroup(counterGroup)); + } +} + +return merged; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/
[hive] branch master updated: HIVE-22988: LLAP: If consistent splits is disabled ordering instances is not required (Prasanth Jayachandran reviewed by Slim Bouguerra)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new b5de84a HIVE-22988: LLAP: If consistent splits is disabled ordering instances is not required (Prasanth Jayachandran reviewed by Slim Bouguerra) b5de84a is described below commit b5de84a6ce27904562bb15d7fe800b6ae3c32670 Author: Prasanth Jayachandran AuthorDate: Mon Mar 9 21:42:33 2020 -0700 HIVE-22988: LLAP: If consistent splits is disabled ordering instances is not required (Prasanth Jayachandran reviewed by Slim Bouguerra) --- .../hive/llap/tezplugins/LlapTaskSchedulerService.java | 14 +++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java index d11bf13..48501e5 100644 --- a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java +++ b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java @@ -296,6 +296,7 @@ public class LlapTaskSchedulerService extends TaskScheduler { private int totalGuaranteed = 0, unusedGuaranteed = 0; + private final boolean consistentSplits; /** * An internal version to make sure we don't race and overwrite a newer totalGuaranteed count in * ZK with an older one, without requiring us to make ZK updates under the main writeLock. @@ -345,6 +346,7 @@ public class LlapTaskSchedulerService extends TaskScheduler { LOCK_METRICS); readLock = lock.readLock(); writeLock = lock.writeLock(); +this.consistentSplits = HiveConf.getBoolVar(conf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS); if (conf.getBoolean(LLAP_PLUGIN_ENDPOINT_ENABLED, false)) { JobTokenSecretManager sm = null; @@ -444,8 +446,8 @@ public class LlapTaskSchedulerService extends TaskScheduler { String hostsString = HiveConf.getVar(conf, ConfVars.LLAP_DAEMON_SERVICE_HOSTS); LOG.info("Running with configuration: hosts={}, numSchedulableTasksPerNode={}, " -+ "nodeBlacklistConf={}, localityConf={}", -hostsString, numSchedulableTasksPerNode, nodeBlacklistConf, localityDelayConf); ++ "nodeBlacklistConf={}, localityConf={} consistentSplits={}", +hostsString, numSchedulableTasksPerNode, nodeBlacklistConf, localityDelayConf, consistentSplits); this.amRegistry = TezAmRegistryImpl.create(conf, true); synchronized (LlapTaskCommunicator.pluginInitLock) { @@ -1476,7 +1478,13 @@ public class LlapTaskSchedulerService extends TaskScheduler { } /* fall through - miss in locality or no locality-requested */ - Collection instances = activeInstances.getAllInstancesOrdered(true); + Collection instances; + if (consistentSplits) { +instances = activeInstances.getAllInstancesOrdered(true); + } else { +// if consistent splits are not used we don't need the ordering as there will be no cache benefit anyways +instances = activeInstances.getAll(); + } List allNodes = new ArrayList<>(instances.size()); List activeNodesWithFreeSlots = new ArrayList<>(); for (LlapServiceInstance inst : instances) {
[hive] branch master updated: HIVE-22979: Support total file size in statistics annotation (Prasanth Jayachandran reviewed by Jesus Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new a003428 HIVE-22979: Support total file size in statistics annotation (Prasanth Jayachandran reviewed by Jesus Camacho Rodriguez) a003428 is described below commit a0034284fe02a5012f883704fcd57652519a4cd5 Author: Prasanth Jayachandran AuthorDate: Mon Mar 9 10:39:42 2020 -0700 HIVE-22979: Support total file size in statistics annotation (Prasanth Jayachandran reviewed by Jesus Camacho Rodriguez) --- .../hive/ql/optimizer/spark/SparkMapJoinOptimizer.java | 2 +- .../stats/annotation/StatsRulesProcFactory.java| 2 +- .../org/apache/hadoop/hive/ql/plan/Statistics.java | 18 +++--- .../org/apache/hadoop/hive/ql/stats/BasicStats.java| 15 --- .../org/apache/hadoop/hive/ql/stats/StatsUtils.java| 6 -- .../ql/exec/tez/TestVectorMapJoinFastHashTable.java| 2 +- 6 files changed, 34 insertions(+), 11 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java index 5dcd49b..0638caf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java @@ -215,7 +215,7 @@ public class SparkMapJoinOptimizer implements SemanticNodeProcessor { LOG.debug("Found a big table branch with parent operator {} and position {}", parentOp, pos); bigTablePosition = pos; bigTableFound = true; -bigInputStat = new Statistics(0, Long.MAX_VALUE, 0); +bigInputStat = new Statistics(0, Long.MAX_VALUE, Long.MAX_VALUE, 0); } else { // Either we've found multiple big table branches, or the current branch cannot // be a big table branch. Disable mapjoin for these cases. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 0ada066..43fc449 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -2116,7 +2116,7 @@ public class StatsRulesProcFactory { } } -Statistics wcStats = new Statistics(newNumRows, newDataSize, 0); +Statistics wcStats = new Statistics(newNumRows, newDataSize, 0, 0); wcStats.setBasicStatsState(statsState); // evaluate filter expression and update statistics diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java index bc5f9d9..a4cb841 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java @@ -52,7 +52,10 @@ public class Statistics implements Serializable { private long numRows; private long runTimeNumRows; + // dataSize represents raw data size (estimated in-memory size based on row schema) after decompression and decoding. private long dataSize; + // totalFileSize represents on-disk size. + private long totalFileSize; private long numErasureCodedFiles; private State basicStatsState; private Map columnStats; @@ -60,12 +63,13 @@ public class Statistics implements Serializable { private boolean runtimeStats; public Statistics() { -this(0, 0, 0); +this(0, 0, 0, 0); } - public Statistics(long nr, long ds, long numEcFiles) { + public Statistics(long nr, long ds, long fs, long numEcFiles) { numRows = nr; dataSize = ds; +totalFileSize = fs; numErasureCodedFiles = numEcFiles; runTimeNumRows = -1; columnStats = null; @@ -74,6 +78,14 @@ public class Statistics implements Serializable { updateBasicStatsState(); } + public void setTotalFileSize(final long totalFileSize) { +this.totalFileSize = totalFileSize; + } + + public long getTotalFileSize() { +return totalFileSize; + } + public long getNumRows() { return numRows; } @@ -191,7 +203,7 @@ public class Statistics implements Serializable { @Override public Statistics clone() { -Statistics clone = new Statistics(numRows, dataSize, numErasureCodedFiles); +Statistics clone = new Statistics(numRows, dataSize, totalFileSize, numErasureCodedFiles); clone.setRunTimeNumRows(runTimeNumRows); clone.setBasicStatsState(basicStatsState); clone.setColumnStatsState(columnStatsState); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java b/ql/src/java/org/apa
[hive] branch master updated: HIVE-22927: LLAP should filter tasks in HB, instead of killing all tasks on error attempts (Rajesh Balamohan reviewed by Prasanth Jayachandran)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new cfc12f0 HIVE-22927: LLAP should filter tasks in HB, instead of killing all tasks on error attempts (Rajesh Balamohan reviewed by Prasanth Jayachandran) cfc12f0 is described below commit cfc12f05f0c034f9aad149960e58d40902e0dcfe Author: Rajesh Balamohan AuthorDate: Wed Feb 26 15:14:52 2020 -0800 HIVE-22927: LLAP should filter tasks in HB, instead of killing all tasks on error attempts (Rajesh Balamohan reviewed by Prasanth Jayachandran) --- .../hive/llap/tezplugins/LlapTaskCommunicator.java | 57 +- 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java index e5dc378..b168f76 100644 --- a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java +++ b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java @@ -762,46 +762,37 @@ public class LlapTaskCommunicator extends TezTaskCommunicatorImpl { BiMap biMap = entityTracker.getContainerAttemptMapForNode(nodeId); if (biMap != null) { - HashMap attempts = new HashMap<>(); - for (int i = 0; i < tasks.get().length; ++i) { -boolean isGuaranteed = false; -if (guaranteed != null) { - isGuaranteed = ((BooleanWritable)guaranteed.get()[i]).get(); -} -attempts.put((TezTaskAttemptID)tasks.get()[i], isGuaranteed); - } - String error = ""; + Set error = new HashSet<>(); synchronized (biMap) { -for (Map.Entry entry : biMap.entrySet()) { - // TODO: this is a stopgap fix. We really need to change all mappings by unique node ID, - // or at least (in this case) track the latest unique ID for LlapNode and retry all - // older-node tasks proactively. For now let the heartbeats fail them. - TezTaskAttemptID attemptId = entry.getValue(); - String taskNodeId = entityTracker.getUniqueNodeId(attemptId); - // Unique ID is registered based on Submit response. Theoretically, we could get a ping - // when the task is valid but we haven't stored the unique ID yet, so taskNodeId is null. - // However, the next heartbeat(s) should get the value eventually and mark task as alive. - // Also, we prefer a missed heartbeat over a stuck query in case of discrepancy in ET. - if (taskNodeId != null && taskNodeId.equals(uniqueId)) { -Boolean isGuaranteed = attempts.get(attemptId); -if (isGuaranteed != null) { - getContext().taskAlive(attemptId); - scheduler.taskInfoUpdated(attemptId, isGuaranteed.booleanValue()); +for (int i = 0; i < tasks.get().length; ++i) { + boolean isGuaranteed = false; + if (guaranteed != null) { +isGuaranteed = ((BooleanWritable) guaranteed.get()[i]).get(); + } + TezTaskAttemptID attemptID = (TezTaskAttemptID) tasks.get()[i]; + + // Check if the taskAttempt is present in AM view + if (biMap.containsValue(attemptID)) { +String taskNodeId = entityTracker.getUniqueNodeId(attemptID); +if (taskNodeId != null && taskNodeId.equals(uniqueId)) { + getContext().taskAlive(attemptID); + scheduler.taskInfoUpdated(attemptID, isGuaranteed); + getContext().containerAlive(biMap.inverse().get(attemptID)); } else { - error += (attemptId + ", "); + error.add(attemptID); } -getContext().containerAlive(entry.getKey()); } } + } -if (!error.isEmpty()) { - LOG.info("The tasks we expected to be on the node are not there: " + error); - for (Map.Entry entry : biMap.entrySet()) { -LOG.info("Sending a kill for attempt {}, due to a ping from node with same host and same port but " + -"registered with different unique ID", entry.getValue()); -getContext().taskKilled(entry.getValue(), TaskAttemptEndReason.NODE_FAILED, + if (!error.isEmpty()) { +LOG.info("The tasks we expected to be on the node are not there: " + error); +for (TezTaskAttemptID attempt: error) { + LOG.info("Sending a kill for attempt {}, due to a ping from " + + "node with same host and same port but " + + "registered with different unique ID", attempt); + getContext().task
[hive] branch master updated: HIVE-22922: LLAP: ShuffleHandler may not find shuffle data if pod restarts in k8s (Prasanth Jayachandran reviewed by Gopal V)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new f087eac HIVE-22922: LLAP: ShuffleHandler may not find shuffle data if pod restarts in k8s (Prasanth Jayachandran reviewed by Gopal V) f087eac is described below commit f087eac18a64f97e890427c5bdeff72269a86b55 Author: Prasanth Jayachandran AuthorDate: Wed Feb 26 15:12:33 2020 -0800 HIVE-22922: LLAP: ShuffleHandler may not find shuffle data if pod restarts in k8s (Prasanth Jayachandran reviewed by Gopal V) --- .../java/org/apache/hadoop/hive/conf/HiveConf.java| 3 +++ .../hive/llap/daemon/impl/ContainerRunnerImpl.java| 19 ++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index e419dc5..bfc2695 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -4380,6 +4380,9 @@ public class HiveConf extends Configuration { "considering the AM to be dead.", "llap.am.liveness.connection.timeout-millis"), LLAP_DAEMON_AM_USE_FQDN("hive.llap.am.use.fqdn", true, "Whether to use FQDN of the AM machine when submitting work to LLAP."), +LLAP_DAEMON_EXEC_USE_FQDN("hive.llap.exec.use.fqdn", true, + "On non-kerberized clusters, where the hostnames are stable but ip address changes, setting this config\n" + +" to false will use ip address of llap daemon in execution context instead of FQDN"), // Not used yet - since the Writable RPC engine does not support this policy. LLAP_DAEMON_AM_LIVENESS_CONNECTION_SLEEP_BETWEEN_RETRIES_MS( "hive.llap.am.liveness.connection.sleep.between.retries.ms", "2000ms", diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java index 2ae7871..6a13b55 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java @@ -15,6 +15,7 @@ package org.apache.hadoop.hive.llap.daemon.impl; import java.io.IOException; +import java.net.InetAddress; import java.net.InetSocketAddress; import java.nio.ByteBuffer; import java.util.Arrays; @@ -27,6 +28,7 @@ import java.util.concurrent.atomic.AtomicReference; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.UgiFactory; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.llap.DaemonId; import org.apache.hadoop.hive.llap.LlapNodeId; @@ -117,6 +119,7 @@ public class ContainerRunnerImpl extends CompositeService implements ContainerRu private final DaemonId daemonId; private final UgiFactory fsUgiFactory; private final SocketFactory socketFactory; + private final boolean execUseFQDN; public ContainerRunnerImpl(Configuration conf, int numExecutors, AtomicReference localShufflePort, AtomicReference localAddress, @@ -140,6 +143,7 @@ public class ContainerRunnerImpl extends CompositeService implements ContainerRu this.queryTracker = queryTracker; this.executorService = executorService; completionListener = (SchedulerFragmentCompletingListener) executorService; +this.execUseFQDN = conf.getBoolean(HiveConf.ConfVars.LLAP_DAEMON_EXEC_USE_FQDN.varname, true); // Distribute the available memory between the tasks. this.memoryPerExecutor = (long)(totalMemoryAvailableBytes / (float) numExecutors); @@ -285,10 +289,23 @@ public class ContainerRunnerImpl extends CompositeService implements ContainerRu boolean addTaskTimes = callableConf.getBoolean(tezSummary.varname, tezSummary.defaultBoolVal) && callableConf.getBoolean(llapTasks.varname, llapTasks.defaultBoolVal); + final String llapHost; + if (UserGroupInformation.isSecurityEnabled()) { +// when kerberos is enabled always use FQDN +llapHost = localAddress.get().getHostName(); + } else if (execUseFQDN) { +// when FQDN is explicitly requested (default) +llapHost = localAddress.get().getHostName(); + } else { +// when FQDN is not requested, use ip address +llapHost = localAddress.get().getAddress().getHostAddress(); + } + LOG.info("Using llap host: {} for execution context. hostName: {} hostAddress: {}", llapHost, +localAddress.get().getHostName(), localAddress.get().getAddress().getHostAddress()); // TODO: ide
[hive] branch master updated: HIVE-22908: AM caching connections to LLAP based on hostname and port does not work in kubernetes (Prasanth Jayachandran reviewed by Gopal Vijayaraghavan)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 703cf29 HIVE-22908: AM caching connections to LLAP based on hostname and port does not work in kubernetes (Prasanth Jayachandran reviewed by Gopal Vijayaraghavan) 703cf29 is described below commit 703cf29fa14fffe7a7e0ee30cd11d16a5245bf1c Author: Prasanth Jayachandran AuthorDate: Thu Feb 20 14:38:33 2020 -0800 HIVE-22908: AM caching connections to LLAP based on hostname and port does not work in kubernetes (Prasanth Jayachandran reviewed by Gopal Vijayaraghavan) --- .../org/apache/hadoop/hive/llap/AsyncPbRpcProxy.java | 16 +++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/llap-common/src/java/org/apache/hadoop/hive/llap/AsyncPbRpcProxy.java b/llap-common/src/java/org/apache/hadoop/hive/llap/AsyncPbRpcProxy.java index ad39963..25e10f4 100644 --- a/llap-common/src/java/org/apache/hadoop/hive/llap/AsyncPbRpcProxy.java +++ b/llap-common/src/java/org/apache/hadoop/hive/llap/AsyncPbRpcProxy.java @@ -15,6 +15,8 @@ package org.apache.hadoop.hive.llap; import java.io.IOException; +import java.net.InetAddress; +import java.net.UnknownHostException; import java.security.PrivilegedAction; import java.util.HashSet; import java.util.Iterator; @@ -430,6 +432,9 @@ public abstract class AsyncPbRpcProxy nodeToken) { String hostId = getHostIdentifier(nodeId.getHostname(), nodeId.getPort()); +if (LOG.isDebugEnabled()) { + LOG.debug("Getting host proxies for {}", hostId); +} try { return hostProxies.get(hostId, new Callable() { @Override @@ -481,7 +486,16 @@ public abstract class AsyncPbRpcProxy
[hive] branch master updated: HIVE-21146 Enforce TransactionBatch size=1 for blob stores (#797)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new bc7b38e HIVE-21146 Enforce TransactionBatch size=1 for blob stores (#797) bc7b38e is described below commit bc7b38e5fdb16d28319da407a80d78f4e8124d95 Author: dlavati AuthorDate: Thu Dec 5 11:51:23 2019 +0100 HIVE-21146 Enforce TransactionBatch size=1 for blob stores (#797) * HIVE-21146 Enforce TransactionBatch size=1 for blob stores Change-Id: Ia5f94c34a044c2990e95204de03b661d162874c7 * Apply _ prefix to tmp verification file * Rely on /tmp instead --- .../hive/streaming/HiveStreamingConnection.java| 25 ++ .../org/apache/hive/streaming/TestStreaming.java | 30 +- 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java b/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java index f4e71f9..27dc6f2 100644 --- a/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java +++ b/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java @@ -32,8 +32,11 @@ import java.util.UUID; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.StreamCapabilities; +import org.apache.hadoop.hive.common.BlobStorageUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreUtils; import org.apache.hadoop.hive.metastore.IMetaStoreClient; @@ -520,6 +523,28 @@ public class HiveStreamingConnection implements StreamingConnection { LOG.error(errMsg); throw new ConnectionError(errMsg); } + +// batch size is only used for managed transactions, not for unmanaged single transactions +if (transactionBatchSize > 1) { + try (FileSystem fs = tableObject.getDataLocation().getFileSystem(conf)) { +if (BlobStorageUtils.isBlobStorageFileSystem(conf, fs)) { + // currently not all filesystems implement StreamCapabilities, while FSDataOutputStream does + Path path = new Path("/tmp", "_tmp_stream_verify_" + UUID.randomUUID().toString()); + try(FSDataOutputStream out = fs.create(path, false)){ +if (!out.hasCapability(StreamCapabilities.HFLUSH)) { + throw new ConnectionError( + "The backing filesystem only supports transaction batch sizes of 1, but " + transactionBatchSize + + " was requested."); +} +fs.deleteOnExit(path); + } catch (IOException e){ +throw new ConnectionError("Could not create path for database", e); + } +} + } catch (IOException e) { +throw new ConnectionError("Could not retrieve FileSystem of table", e); + } +} } private void beginNextTransaction() throws StreamingException { diff --git a/streaming/src/test/org/apache/hive/streaming/TestStreaming.java b/streaming/src/test/org/apache/hive/streaming/TestStreaming.java index 055672f..58b3ae2 100644 --- a/streaming/src/test/org/apache/hive/streaming/TestStreaming.java +++ b/streaming/src/test/org/apache/hive/streaming/TestStreaming.java @@ -114,7 +114,6 @@ import org.junit.rules.TemporaryFolder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - public class TestStreaming { private static final Logger LOG = LoggerFactory.getLogger(TestStreaming.class); @@ -1314,6 +1313,35 @@ public class TestStreaming { connection.close(); } + @Test + public void testTransactionBatchSizeValidation() throws Exception { +final String schemes = conf.get(HiveConf.ConfVars.HIVE_BLOBSTORE_SUPPORTED_SCHEMES.varname); +// the output stream of this FS doesn't support hflush, so the below test will fail +conf.setVar(HiveConf.ConfVars.HIVE_BLOBSTORE_SUPPORTED_SCHEMES, "raw"); + +StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder() +.withFieldDelimiter(',') +.build(); + +try { + HiveStreamingConnection.newBuilder() + .withDatabase(dbName) + .withTable(tblName) + .withAgentInfo("UT_" + Thread.currentThread().getName()) + .withRecordWriter(writer) + .withTransactionBatchSize(2) + .withHiveConf(conf) + .connect(); + + Assert.fail(); +} catch (ConnectionError e) { + Assert.assertTrue("Expected connection error due to batch sizes", + e.getMessage().contains("only supports transaction batch&q
[hive] branch master updated: HIVE-21970: Avoid using RegistryUtils.currentUser() (Prasanth Jayachandran reviewed by Gopal V)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 7a098d4 HIVE-21970: Avoid using RegistryUtils.currentUser() (Prasanth Jayachandran reviewed by Gopal V) 7a098d4 is described below commit 7a098d49a3fb174e5bfd9ebe15e1cd041323cfa9 Author: Prasanth Jayachandran AuthorDate: Mon Jul 29 14:13:16 2019 -0700 HIVE-21970: Avoid using RegistryUtils.currentUser() (Prasanth Jayachandran reviewed by Gopal V) --- .../hadoop/hive/llap/registry/impl/LlapRegistryService.java | 7 ++- .../hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java | 4 ++-- .../org/apache/hadoop/hive/registry/impl/TezAmRegistryImpl.java | 3 ++- .../java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java | 8 .../apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java | 3 ++- .../apache/hive/service/server/HS2ActivePassiveHARegistry.java| 4 ++-- 6 files changed, 22 insertions(+), 7 deletions(-) diff --git a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapRegistryService.java b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapRegistryService.java index 2d05bda..9a64be0 100644 --- a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapRegistryService.java +++ b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapRegistryService.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.llap.registry.ServiceRegistry; import org.apache.hadoop.hive.llap.registry.impl.LlapZookeeperRegistryImpl.ConfigChangeLockResult; import org.apache.hadoop.hive.registry.ServiceInstanceStateChangeListener; import org.apache.hadoop.registry.client.binding.RegistryUtils; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.slf4j.Logger; @@ -92,7 +93,11 @@ public class LlapRegistryService extends AbstractService { } public static String currentUser() { -return RegistryUtils.currentUser(); +try { + return UserGroupInformation.getCurrentUser().getShortUserName(); +} catch (IOException e) { + throw new RuntimeException(e); +} } @Override diff --git a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java index 9e1da9b..92c4771 100644 --- a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java +++ b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java @@ -447,8 +447,8 @@ public class LlapZookeeperRegistryImpl @Override protected String getZkPathUser(Configuration conf) { // External LLAP clients would need to set LLAP_ZK_REGISTRY_USER to the LLAP daemon user (hive), -// rather than relying on RegistryUtils.currentUser(). -return HiveConf.getVar(conf, ConfVars.LLAP_ZK_REGISTRY_USER, RegistryUtils.currentUser()); +// rather than relying on LlapRegistryService.currentUser(). +return HiveConf.getVar(conf, ConfVars.LLAP_ZK_REGISTRY_USER, LlapRegistryService.currentUser()); } /** diff --git a/llap-client/src/java/org/apache/hadoop/hive/registry/impl/TezAmRegistryImpl.java b/llap-client/src/java/org/apache/hadoop/hive/registry/impl/TezAmRegistryImpl.java index 3ff732d..754e803 100644 --- a/llap-client/src/java/org/apache/hadoop/hive/registry/impl/TezAmRegistryImpl.java +++ b/llap-client/src/java/org/apache/hadoop/hive/registry/impl/TezAmRegistryImpl.java @@ -23,6 +23,7 @@ import org.apache.curator.framework.recipes.cache.PathChildrenCache; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.llap.registry.impl.LlapRegistryService; import org.apache.hadoop.registry.client.binding.RegistryTypeUtils; import org.apache.hadoop.registry.client.binding.RegistryUtils; import org.apache.hadoop.registry.client.types.Endpoint; @@ -118,7 +119,7 @@ public class TezAmRegistryImpl extends ZkRegistryBase { @Override protected String getZkPathUser(Configuration conf) { // We assume that AMs and HS2 run under the same user. -return RegistryUtils.currentUser(); +return LlapRegistryService.currentUser(); } public String getRegistryName() { diff --git a/llap-client/src/java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java b/llap-client/src/java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java index 7ca3548..e56ae11 100644 --- a/llap-client/src/java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java +++ b/llap-client/src/java/org/apache/hadoop/hive/registry/impl
[hive] branch branch-3 updated: HIVE-21970: Avoid using RegistryUtils.currentUser() (Prasanth Jayachandran reviewed by Gopal V)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3 by this push: new a062077 HIVE-21970: Avoid using RegistryUtils.currentUser() (Prasanth Jayachandran reviewed by Gopal V) a062077 is described below commit a062077f6699fc3d8310f64105a7237002dcb9c9 Author: Prasanth Jayachandran AuthorDate: Mon Jul 29 14:13:16 2019 -0700 HIVE-21970: Avoid using RegistryUtils.currentUser() (Prasanth Jayachandran reviewed by Gopal V) --- .../hadoop/hive/llap/registry/impl/LlapRegistryService.java | 7 ++- .../hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java | 4 ++-- .../org/apache/hadoop/hive/registry/impl/TezAmRegistryImpl.java | 3 ++- .../java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java | 8 .../apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java | 3 ++- .../apache/hive/service/server/HS2ActivePassiveHARegistry.java| 4 ++-- 6 files changed, 22 insertions(+), 7 deletions(-) diff --git a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapRegistryService.java b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapRegistryService.java index 3bda40b..2289121 100644 --- a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapRegistryService.java +++ b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapRegistryService.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.llap.registry.ServiceRegistry; import org.apache.hadoop.hive.registry.ServiceInstanceSet; import org.apache.hadoop.hive.registry.ServiceInstanceStateChangeListener; import org.apache.hadoop.registry.client.binding.RegistryUtils; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.slf4j.Logger; @@ -81,7 +82,11 @@ public class LlapRegistryService extends AbstractService { } public static String currentUser() { -return RegistryUtils.currentUser(); +try { + return UserGroupInformation.getCurrentUser().getShortUserName(); +} catch (IOException e) { + throw new RuntimeException(e); +} } @Override diff --git a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java index f5d6202..67add92 100644 --- a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java +++ b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java @@ -413,7 +413,7 @@ public class LlapZookeeperRegistryImpl @Override protected String getZkPathUser(Configuration conf) { // External LLAP clients would need to set LLAP_ZK_REGISTRY_USER to the LLAP daemon user (hive), -// rather than relying on RegistryUtils.currentUser(). -return HiveConf.getVar(conf, ConfVars.LLAP_ZK_REGISTRY_USER, RegistryUtils.currentUser()); +// rather than relying on LlapRegistryService.currentUser(). +return HiveConf.getVar(conf, ConfVars.LLAP_ZK_REGISTRY_USER, LlapRegistryService.currentUser()); } } diff --git a/llap-client/src/java/org/apache/hadoop/hive/registry/impl/TezAmRegistryImpl.java b/llap-client/src/java/org/apache/hadoop/hive/registry/impl/TezAmRegistryImpl.java index 3ff732d..754e803 100644 --- a/llap-client/src/java/org/apache/hadoop/hive/registry/impl/TezAmRegistryImpl.java +++ b/llap-client/src/java/org/apache/hadoop/hive/registry/impl/TezAmRegistryImpl.java @@ -23,6 +23,7 @@ import org.apache.curator.framework.recipes.cache.PathChildrenCache; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.llap.registry.impl.LlapRegistryService; import org.apache.hadoop.registry.client.binding.RegistryTypeUtils; import org.apache.hadoop.registry.client.binding.RegistryUtils; import org.apache.hadoop.registry.client.types.Endpoint; @@ -118,7 +119,7 @@ public class TezAmRegistryImpl extends ZkRegistryBase { @Override protected String getZkPathUser(Configuration conf) { // We assume that AMs and HS2 run under the same user. -return RegistryUtils.currentUser(); +return LlapRegistryService.currentUser(); } public String getRegistryName() { diff --git a/llap-client/src/java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java b/llap-client/src/java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java index 7ca3548..e56ae11 100644 --- a/llap-client/src/java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java +++ b/llap-client/src/java/org/apache/hadoop/hive/registry/impl/ZkRegistryBase.java @@ -669,4 +669,12 @@ public abstract
[hive] branch branch-3 updated: HIVE-21925: HiveConnection retries should support backoff (Rajkumar Singh reviewed by Prasanth Jayachandran)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3 by this push: new 91c243c HIVE-21925: HiveConnection retries should support backoff (Rajkumar Singh reviewed by Prasanth Jayachandran) 91c243c is described below commit 91c243c11a14534aa3e96f7ed5ce42fdfc656f24 Author: Rajkumar Singh AuthorDate: Sat Jul 6 22:01:15 2019 -0700 HIVE-21925: HiveConnection retries should support backoff (Rajkumar Singh reviewed by Prasanth Jayachandran) --- jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java | 12 +++- jdbc/src/java/org/apache/hive/jdbc/Utils.java | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java b/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java index a4920bf..e3dda87 100644 --- a/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java +++ b/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java @@ -218,11 +218,16 @@ public class HiveConnection implements java.sql.Connection { executeInitSql(); } else { int maxRetries = 1; + long retryInterval = 1000L; try { String strRetries = sessConfMap.get(JdbcConnectionParams.RETRIES); if (StringUtils.isNotBlank(strRetries)) { maxRetries = Integer.parseInt(strRetries); } +String strRetryInterval = sessConfMap.get(JdbcConnectionParams.RETRY_INTERVAL); +if(StringUtils.isNotBlank(strRetryInterval)){ + retryInterval = Long.parseLong(strRetryInterval); +} } catch(NumberFormatException e) { // Ignore the exception } @@ -263,7 +268,12 @@ public class HiveConnection implements java.sql.Connection { if (numRetries >= maxRetries) { throw new SQLException(errMsg + e.getMessage(), " 08S01", e); } else { -LOG.warn(warnMsg + e.getMessage() + " Retrying " + numRetries + " of " + maxRetries); +LOG.warn(warnMsg + e.getMessage() + " Retrying " + numRetries + " of " + maxRetries+" with retry interval "+retryInterval+"ms"); +try { + Thread.sleep(retryInterval); +} catch (InterruptedException ex) { + //Ignore +} } } } diff --git a/jdbc/src/java/org/apache/hive/jdbc/Utils.java b/jdbc/src/java/org/apache/hive/jdbc/Utils.java index 42b3975..881c1d9 100644 --- a/jdbc/src/java/org/apache/hive/jdbc/Utils.java +++ b/jdbc/src/java/org/apache/hive/jdbc/Utils.java @@ -81,6 +81,7 @@ public class Utils { // Retry setting static final String RETRIES = "retries"; +public static final String RETRY_INTERVAL = "retryInterval"; public static final String AUTH_TYPE = "auth"; // We're deprecating this variable's name.
[hive] branch master updated: HIVE-21925: HiveConnection retries should support backoff (Rajkumar Singh reviewed by Prasanth Jayachandran)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 71adb04 HIVE-21925: HiveConnection retries should support backoff (Rajkumar Singh reviewed by Prasanth Jayachandran) 71adb04 is described below commit 71adb04eaec81a0501621af4b06c1f9b0e3fc024 Author: Rajkumar Singh AuthorDate: Sat Jul 6 22:01:15 2019 -0700 HIVE-21925: HiveConnection retries should support backoff (Rajkumar Singh reviewed by Prasanth Jayachandran) --- jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java | 12 +++- jdbc/src/java/org/apache/hive/jdbc/Utils.java | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java b/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java index ec9c193..cb0b0d1 100644 --- a/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java +++ b/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java @@ -298,11 +298,16 @@ public class HiveConnection implements java.sql.Connection { executeInitSql(); } else { int maxRetries = 1; + long retryInterval = 1000L; try { String strRetries = sessConfMap.get(JdbcConnectionParams.RETRIES); if (StringUtils.isNotBlank(strRetries)) { maxRetries = Integer.parseInt(strRetries); } +String strRetryInterval = sessConfMap.get(JdbcConnectionParams.RETRY_INTERVAL); +if(StringUtils.isNotBlank(strRetryInterval)){ + retryInterval = Long.parseLong(strRetryInterval); +} } catch(NumberFormatException e) { // Ignore the exception } @@ -343,7 +348,12 @@ public class HiveConnection implements java.sql.Connection { if (numRetries >= maxRetries) { throw new SQLException(errMsg + e.getMessage(), " 08S01", e); } else { -LOG.warn(warnMsg + e.getMessage() + " Retrying " + numRetries + " of " + maxRetries); +LOG.warn(warnMsg + e.getMessage() + " Retrying " + numRetries + " of " + maxRetries+" with retry interval "+retryInterval+"ms"); +try { + Thread.sleep(retryInterval); +} catch (InterruptedException ex) { + //Ignore +} } } } diff --git a/jdbc/src/java/org/apache/hive/jdbc/Utils.java b/jdbc/src/java/org/apache/hive/jdbc/Utils.java index 3c18f09..e23826e 100644 --- a/jdbc/src/java/org/apache/hive/jdbc/Utils.java +++ b/jdbc/src/java/org/apache/hive/jdbc/Utils.java @@ -81,6 +81,7 @@ public class Utils { // Retry setting static final String RETRIES = "retries"; +public static final String RETRY_INTERVAL = "retryInterval"; public static final String AUTH_TYPE = "auth"; // We're deprecating this variable's name.
[hive] branch branch-3 updated: HIVE-21913: GenericUDTFGetSplits should handle usernames in the same way as LLAP (Prasanth Jayachandran reviewed by Jason Dere)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3 by this push: new 2b0af5e HIVE-21913: GenericUDTFGetSplits should handle usernames in the same way as LLAP (Prasanth Jayachandran reviewed by Jason Dere) 2b0af5e is described below commit 2b0af5e184afc60c95214dc7e14868a3c43a99b2 Author: Prasanth Jayachandran AuthorDate: Mon Jun 24 15:37:54 2019 -0700 HIVE-21913: GenericUDTFGetSplits should handle usernames in the same way as LLAP (Prasanth Jayachandran reviewed by Jason Dere) --- .../org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java| 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java index 30673ce..c925a3f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java @@ -93,6 +93,7 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.SplitLocationInfo; +import org.apache.hadoop.registry.client.binding.RegistryUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -441,7 +442,7 @@ public class GenericUDTFGetSplits extends GenericUDTF { } // This assumes LLAP cluster owner is always the HS2 user. - String llapUser = UserGroupInformation.getLoginUser().getShortUserName(); + String llapUser = RegistryUtils.currentUser(); String queryUser = null; byte[] tokenBytes = null;
[hive] branch master updated: HIVE-21913: GenericUDTFGetSplits should handle usernames in the same way as LLAP (Prasanth Jayachandran reviewed by Jason Dere)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 84b5ba7 HIVE-21913: GenericUDTFGetSplits should handle usernames in the same way as LLAP (Prasanth Jayachandran reviewed by Jason Dere) 84b5ba7 is described below commit 84b5ba7ac9f93c6a496386db91ae4cd5ab7a451d Author: Prasanth Jayachandran AuthorDate: Mon Jun 24 15:37:54 2019 -0700 HIVE-21913: GenericUDTFGetSplits should handle usernames in the same way as LLAP (Prasanth Jayachandran reviewed by Jason Dere) --- .../org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java| 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java index 30673ce..c925a3f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java @@ -93,6 +93,7 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.SplitLocationInfo; +import org.apache.hadoop.registry.client.binding.RegistryUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -441,7 +442,7 @@ public class GenericUDTFGetSplits extends GenericUDTF { } // This assumes LLAP cluster owner is always the HS2 user. - String llapUser = UserGroupInformation.getLoginUser().getShortUserName(); + String llapUser = RegistryUtils.currentUser(); String queryUser = null; byte[] tokenBytes = null;
[hive] 01/02: HIVE-19568 : Active/Passive HS2 HA: Disallow direct connection to passive HS2 instance (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git commit 096627cd0fb9a87326dbc968cda0907759e3971a Author: sergey AuthorDate: Mon Jul 30 12:16:47 2018 -0700 HIVE-19568 : Active/Passive HS2 HA: Disallow direct connection to passive HS2 instance (Sergey Shelukhin, reviewed by Prasanth Jayachandran) --- .../org/apache/hive/jdbc/TestActivePassiveHA.java | 64 -- .../hive/service/cli/session/TestQueryDisplay.java | 2 +- .../java/org/apache/hive/jdbc/miniHS2/MiniHS2.java | 4 ++ .../org/apache/hive/service/cli/CLIService.java| 11 ++-- .../hive/service/cli/session/SessionManager.java | 48 +++- .../cli/thrift/EmbeddedThriftBinaryCLIService.java | 3 +- .../apache/hive/service/server/HiveServer2.java| 37 +++-- .../hive/service/auth/TestPlainSaslHelper.java | 2 +- .../cli/TestCLIServiceConnectionLimits.java| 2 +- .../hive/service/cli/TestCLIServiceRestore.java| 2 +- .../cli/TestRetryingThriftCLIServiceClient.java| 2 +- .../cli/session/TestPluggableHiveSessionImpl.java | 4 +- .../cli/session/TestSessionGlobalInitFile.java | 2 +- .../cli/session/TestSessionManagerMetrics.java | 2 +- 14 files changed, 147 insertions(+), 38 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestActivePassiveHA.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestActivePassiveHA.java index 4055f13..bf24ebf 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestActivePassiveHA.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestActivePassiveHA.java @@ -43,7 +43,9 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.registry.impl.ZkRegistryBase; import org.apache.hive.http.security.PamAuthenticator; +import org.apache.hive.jdbc.Utils.JdbcConnectionParams; import org.apache.hive.jdbc.miniHS2.MiniHS2; +import org.apache.hive.service.cli.HiveSQLException; import org.apache.hive.service.server.HS2ActivePassiveHARegistry; import org.apache.hive.service.server.HS2ActivePassiveHARegistryClient; import org.apache.hive.service.server.HiveServer2Instance; @@ -383,11 +385,7 @@ public class TestActivePassiveHA { assertEquals("false", sendGet(url2, true, true)); assertEquals(false, miniHS2_2.isLeader()); } finally { - // revert configs to not affect other tests - unsetPamConfs(hiveConf1); - unsetPamConfs(hiveConf2); - hiveConf1.unset(ConfVars.HIVE_SERVER2_WEBUI_ENABLE_CORS.varname); - hiveConf2.unset(ConfVars.HIVE_SERVER2_WEBUI_ENABLE_CORS.varname); + resetFailoverConfs(); } } @@ -427,6 +425,62 @@ public class TestActivePassiveHA { } @Test(timeout = 6) + public void testNoConnectionOnPassive() throws Exception { +hiveConf1.setBoolVar(ConfVars.HIVE_SERVER2_WEBUI_ENABLE_CORS, true); +hiveConf2.setBoolVar(ConfVars.HIVE_SERVER2_WEBUI_ENABLE_CORS, true); +setPamConfs(hiveConf1); +setPamConfs(hiveConf2); +try { + PamAuthenticator pamAuthenticator1 = new TestHS2HttpServerPam.TestPamAuthenticator(hiveConf1); + PamAuthenticator pamAuthenticator2 = new TestHS2HttpServerPam.TestPamAuthenticator(hiveConf2); + String instanceId1 = UUID.randomUUID().toString(); + miniHS2_1.setPamAuthenticator(pamAuthenticator1); + miniHS2_1.start(getSecureConfOverlay(instanceId1)); + String instanceId2 = UUID.randomUUID().toString(); + Map confOverlay = getSecureConfOverlay(instanceId2); + miniHS2_2.setPamAuthenticator(pamAuthenticator2); + miniHS2_2.start(confOverlay); + String url1 = "http://localhost:; + hiveConf1.get(ConfVars.HIVE_SERVER2_WEBUI_PORT.varname) + "/leader"; + assertEquals(true, miniHS2_1.getIsLeaderTestFuture().get()); + assertEquals(true, miniHS2_1.isLeader()); + + // Don't get urls from ZK, it will actually be a service discovery URL that we don't want. + String hs1Url = "jdbc:hive2://" + miniHS2_1.getHost() + ":" + miniHS2_1.getBinaryPort(); + Connection hs2Conn = getConnection(hs1Url, System.getProperty("user.name")); // Should work. + hs2Conn.close(); + + String resp = sendDelete(url1, true); + assertTrue(resp, resp.contains("Failover successful!")); + // wait for failover to close sessions + while (miniHS2_1.getOpenSessionsCount() != 0) { +Thread.sleep(100); + } + + assertEquals(true, miniHS2_2.getIsLeaderTestFuture().get()); + assertEquals(true, miniHS2_2.isLeader()); + + try { +hs2Conn = getConnection(hs1Url, System.getProperty("user.name")); +fail("Should throw"); + } catch (Exception e) { +if (!e.getMessage().contains("Cannot ope
[hive] 02/02: HIVE-21825: Improve client error msg when Active/Passive HA is enabled (Richard Zhang reviewed by Prasanth Jayachandran)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git commit 36c29cbcec2d80ddda8f95537a636ad2c2b73a31 Author: Richard Zhang AuthorDate: Mon Jun 24 11:50:21 2019 -0700 HIVE-21825: Improve client error msg when Active/Passive HA is enabled (Richard Zhang reviewed by Prasanth Jayachandran) --- .../java/org/apache/hive/service/cli/session/SessionManager.java | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/service/src/java/org/apache/hive/service/cli/session/SessionManager.java b/service/src/java/org/apache/hive/service/cli/session/SessionManager.java index 694a691..277519c 100644 --- a/service/src/java/org/apache/hive/service/cli/session/SessionManager.java +++ b/service/src/java/org/apache/hive/service/cli/session/SessionManager.java @@ -65,7 +65,11 @@ import org.slf4j.LoggerFactory; public class SessionManager extends CompositeService { private static final String INACTIVE_ERROR_MESSAGE = - "Cannot open sessions on an inactive HS2 instance; use service discovery to connect"; + "Cannot open sessions on an inactive HS2 instance, " + + "or the HS2 server leader is not ready; please use service discovery to " + + "connect the server leader again"; + private static final String FAIL_CLOSE_ERROR_MESSAGE="Cannot close the session opened " + + "during the HA state change time"; public static final String HIVERCFILE = ".hiverc"; private static final Logger LOG = LoggerFactory.getLogger(CompositeService.class); private HiveConf hiveConf; @@ -478,7 +482,7 @@ public class SessionManager extends CompositeService { } catch (Exception e) { LOG.warn("Failed to close the session opened during an HA state change; ignoring", e); } - throw new HiveSQLException(INACTIVE_ERROR_MESSAGE); + throw new HiveSQLException(FAIL_CLOSE_ERROR_MESSAGE); } LOG.info("Session opened, " + session.getSessionHandle() + ", current sessions:" + getOpenSessionCount());
[hive] branch branch-3 updated (ff598bc -> 36c29cb)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a change to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git. from ff598bc HIVE-21872: Bucketed tables that load data from data/files/auto_sortmerge_join should be tagged as 'bucketing_version'='1' (Jesus Camacho Rodriguez, reviewed by Vineet Garg) new 096627c HIVE-19568 : Active/Passive HS2 HA: Disallow direct connection to passive HS2 instance (Sergey Shelukhin, reviewed by Prasanth Jayachandran) new 36c29cb HIVE-21825: Improve client error msg when Active/Passive HA is enabled (Richard Zhang reviewed by Prasanth Jayachandran) The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../org/apache/hive/jdbc/TestActivePassiveHA.java | 64 -- .../hive/service/cli/session/TestQueryDisplay.java | 2 +- .../java/org/apache/hive/jdbc/miniHS2/MiniHS2.java | 4 ++ .../org/apache/hive/service/cli/CLIService.java| 11 ++-- .../hive/service/cli/session/SessionManager.java | 52 +- .../cli/thrift/EmbeddedThriftBinaryCLIService.java | 3 +- .../apache/hive/service/server/HiveServer2.java| 37 +++-- .../hive/service/auth/TestPlainSaslHelper.java | 2 +- .../cli/TestCLIServiceConnectionLimits.java| 2 +- .../hive/service/cli/TestCLIServiceRestore.java| 2 +- .../cli/TestRetryingThriftCLIServiceClient.java| 2 +- .../cli/session/TestPluggableHiveSessionImpl.java | 4 +- .../cli/session/TestSessionGlobalInitFile.java | 2 +- .../cli/session/TestSessionManagerMetrics.java | 2 +- 14 files changed, 151 insertions(+), 38 deletions(-)
[hive] branch master updated: HIVE-21825: Improve client error msg when Active/Passive HA is enabled (Richard Zhang reviewed by Prasanth Jayachandran)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new c0a9ebd HIVE-21825: Improve client error msg when Active/Passive HA is enabled (Richard Zhang reviewed by Prasanth Jayachandran) c0a9ebd is described below commit c0a9ebdf23209b8a868c6a55ec91c974de9fe667 Author: Richard Zhang AuthorDate: Mon Jun 24 11:50:21 2019 -0700 HIVE-21825: Improve client error msg when Active/Passive HA is enabled (Richard Zhang reviewed by Prasanth Jayachandran) --- .../java/org/apache/hive/service/cli/session/SessionManager.java | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/service/src/java/org/apache/hive/service/cli/session/SessionManager.java b/service/src/java/org/apache/hive/service/cli/session/SessionManager.java index 694a691..277519c 100644 --- a/service/src/java/org/apache/hive/service/cli/session/SessionManager.java +++ b/service/src/java/org/apache/hive/service/cli/session/SessionManager.java @@ -65,7 +65,11 @@ import org.slf4j.LoggerFactory; public class SessionManager extends CompositeService { private static final String INACTIVE_ERROR_MESSAGE = - "Cannot open sessions on an inactive HS2 instance; use service discovery to connect"; + "Cannot open sessions on an inactive HS2 instance, " + + "or the HS2 server leader is not ready; please use service discovery to " + + "connect the server leader again"; + private static final String FAIL_CLOSE_ERROR_MESSAGE="Cannot close the session opened " + + "during the HA state change time"; public static final String HIVERCFILE = ".hiverc"; private static final Logger LOG = LoggerFactory.getLogger(CompositeService.class); private HiveConf hiveConf; @@ -478,7 +482,7 @@ public class SessionManager extends CompositeService { } catch (Exception e) { LOG.warn("Failed to close the session opened during an HA state change; ignoring", e); } - throw new HiveSQLException(INACTIVE_ERROR_MESSAGE); + throw new HiveSQLException(FAIL_CLOSE_ERROR_MESSAGE); } LOG.info("Session opened, " + session.getSessionHandle() + ", current sessions:" + getOpenSessionCount());
[hive] branch master updated: HIVE-21783: Avoid authentication for connection from the same domain (Ashutosh Bapat reviewed by Olli Draese, Prasanth Jayachandran)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 1475050 HIVE-21783: Avoid authentication for connection from the same domain (Ashutosh Bapat reviewed by Olli Draese, Prasanth Jayachandran) 1475050 is described below commit 1475050ecfd1bed0385bcc62698801acf1ed5270 Author: Ashutosh Bapat AuthorDate: Sat Jun 15 20:00:15 2019 -0700 HIVE-21783: Avoid authentication for connection from the same domain (Ashutosh Bapat reviewed by Olli Draese, Prasanth Jayachandran) --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 8 + .../java/org/apache/hive/minikdc/MiniHiveKdc.java | 23 ++- ...estImproperTrustDomainAuthenticationBinary.java | 28 +++ .../TestImproperTrustDomainAuthenticationHttp.java | 28 +++ .../auth/TestTrustDomainAuthenticationBinary.java | 28 +++ .../auth/TestTrustDomainAuthenticationHttp.java| 28 +++ .../auth/TrustDomainAuthenticationTest.java| 192 + .../apache/hive/service/auth/HiveAuthFactory.java | 5 + .../apache/hive/service/auth/PlainSaslHelper.java | 54 ++ .../hive/service/cli/thrift/ThriftHttpServlet.java | 53 -- 10 files changed, 422 insertions(+), 25 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 2cea174..03a8019 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3478,6 +3478,14 @@ public class HiveConf extends Configuration { " (Use with property hive.server2.custom.authentication.class)\n" + " PAM: Pluggable authentication module\n" + " NOSASL: Raw transport"), +HIVE_SERVER2_TRUSTED_DOMAIN("hive.server2.trusted.domain", "", +"Specifies the host or a domain to trust connections from. Authentication is skipped " + +"for any connection coming from a host whose hostname ends with the value of this" + +" property. If authentication is expected to be skipped for connections from " + +"only a given host, fully qualified hostname of that host should be specified. By default" + +" it is empty, which means that all the connections to HiveServer2 are authenticated. " + +"When it is non-empty, the client has to provide a Hive user name. Any password, if " + +"provided, will not be used when authentication is skipped."), HIVE_SERVER2_ALLOW_USER_SUBSTITUTION("hive.server2.allow.user.substitution", true, "Allow alternate user to be specified as part of HiveServer2 open connection request."), HIVE_SERVER2_KERBEROS_KEYTAB("hive.server2.authentication.kerberos.keytab", "", diff --git a/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java b/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java index 7d1192a..e604f90 100644 --- a/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java +++ b/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java @@ -36,6 +36,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.jdbc.miniHS2.MiniHS2; import com.google.common.io.Files; +import org.apache.hive.service.server.HiveServer2; /** * Wrapper around Hadoop's MiniKdc for use in hive tests. @@ -178,15 +179,21 @@ public class MiniHiveKdc { * @return new MiniHS2 instance * @throws Exception */ - public static MiniHS2 getMiniHS2WithKerb(MiniHiveKdc miniHiveKdc, HiveConf hiveConf, + public static MiniHS2 getMiniHS2WithKerb(MiniHiveKdc miniHiveKdc, HiveConf hiveConf, String authType) throws Exception { - String hivePrincipal = - miniHiveKdc.getFullyQualifiedServicePrincipal(MiniHiveKdc.HIVE_SERVICE_PRINCIPAL); - String hiveKeytab = miniHiveKdc.getKeyTabFile( - miniHiveKdc.getServicePrincipalForUser(MiniHiveKdc.HIVE_SERVICE_PRINCIPAL)); - - return new MiniHS2.Builder().withConf(hiveConf).withMiniKdc(hivePrincipal, hiveKeytab). - withAuthenticationType(authType).build(); +String hivePrincipal = + miniHiveKdc.getFullyQualifiedServicePrincipal(MiniHiveKdc.HIVE_SERVICE_PRINCIPAL); +String hiveKeytab = miniHiveKdc.getKeyTabFile( + miniHiveKdc.getServicePrincipalForUser(MiniHiveKdc.HIVE_SERVICE_PRINCIPAL)); + +MiniHS2.Builder miniHS2Builder = new MiniHS2.Builder() + .withConf(hiveConf) + .withMiniKdc(hivePrincipal, hiveKeytab) + .withAuthen
[hive] branch master updated (56fc46b -> b65a70a)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from 56fc46b Merge pull request #648 from ashutosh-bapat/hive21783 add ab79fcb Revert "HIVE-21783: Accept Hive connections from the same domain without authentication." new b65a70a Merge pull request #675 from apache/revert-648-hive21783 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../java/org/apache/hadoop/hive/conf/HiveConf.java | 8 - .../java/org/apache/hive/minikdc/MiniHiveKdc.java | 23 +-- ...estImproperTrustDomainAuthenticationBinary.java | 28 --- .../TestImproperTrustDomainAuthenticationHttp.java | 28 --- .../auth/TestTrustDomainAuthenticationBinary.java | 28 --- .../auth/TestTrustDomainAuthenticationHttp.java| 28 --- .../auth/TrustDomainAuthenticationTest.java| 192 - .../apache/hive/service/auth/HiveAuthFactory.java | 5 - .../apache/hive/service/auth/PlainSaslHelper.java | 54 -- .../hive/service/cli/thrift/ThriftHttpServlet.java | 53 ++ 10 files changed, 25 insertions(+), 422 deletions(-) delete mode 100644 itests/hive-unit/src/test/java/org/apache/hive/service/auth/TestImproperTrustDomainAuthenticationBinary.java delete mode 100644 itests/hive-unit/src/test/java/org/apache/hive/service/auth/TestImproperTrustDomainAuthenticationHttp.java delete mode 100644 itests/hive-unit/src/test/java/org/apache/hive/service/auth/TestTrustDomainAuthenticationBinary.java delete mode 100644 itests/hive-unit/src/test/java/org/apache/hive/service/auth/TestTrustDomainAuthenticationHttp.java delete mode 100644 itests/hive-unit/src/test/java/org/apache/hive/service/auth/TrustDomainAuthenticationTest.java
[hive] 01/01: Merge pull request #675 from apache/revert-648-hive21783
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git commit b65a70a58eab9e2e9b23a138f01de63e573a278e Merge: 56fc46b ab79fcb Author: Prasanth Jayachandran AuthorDate: Sat Jun 15 19:58:45 2019 -0700 Merge pull request #675 from apache/revert-648-hive21783 Revert "HIVE-21783: Accept Hive connections from the same domain without authentication." .../java/org/apache/hadoop/hive/conf/HiveConf.java | 8 - .../java/org/apache/hive/minikdc/MiniHiveKdc.java | 23 +-- ...estImproperTrustDomainAuthenticationBinary.java | 28 --- .../TestImproperTrustDomainAuthenticationHttp.java | 28 --- .../auth/TestTrustDomainAuthenticationBinary.java | 28 --- .../auth/TestTrustDomainAuthenticationHttp.java| 28 --- .../auth/TrustDomainAuthenticationTest.java| 192 - .../apache/hive/service/auth/HiveAuthFactory.java | 5 - .../apache/hive/service/auth/PlainSaslHelper.java | 54 -- .../hive/service/cli/thrift/ThriftHttpServlet.java | 53 ++ 10 files changed, 25 insertions(+), 422 deletions(-)
[hive] branch revert-648-hive21783 created (now ab79fcb)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a change to branch revert-648-hive21783 in repository https://gitbox.apache.org/repos/asf/hive.git. at ab79fcb Revert "HIVE-21783: Accept Hive connections from the same domain without authentication." This branch includes the following new commits: new ab79fcb Revert "HIVE-21783: Accept Hive connections from the same domain without authentication." The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference.
[hive] 01/01: Revert "HIVE-21783: Accept Hive connections from the same domain without authentication."
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch revert-648-hive21783 in repository https://gitbox.apache.org/repos/asf/hive.git commit ab79fcbebe57d97e9661c67ad0083ced0d6971fa Author: Prasanth Jayachandran AuthorDate: Sat Jun 15 19:58:21 2019 -0700 Revert "HIVE-21783: Accept Hive connections from the same domain without authentication." --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 8 - .../java/org/apache/hive/minikdc/MiniHiveKdc.java | 23 +-- ...estImproperTrustDomainAuthenticationBinary.java | 28 --- .../TestImproperTrustDomainAuthenticationHttp.java | 28 --- .../auth/TestTrustDomainAuthenticationBinary.java | 28 --- .../auth/TestTrustDomainAuthenticationHttp.java| 28 --- .../auth/TrustDomainAuthenticationTest.java| 192 - .../apache/hive/service/auth/HiveAuthFactory.java | 5 - .../apache/hive/service/auth/PlainSaslHelper.java | 54 -- .../hive/service/cli/thrift/ThriftHttpServlet.java | 53 ++ 10 files changed, 25 insertions(+), 422 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 03a8019..2cea174 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3478,14 +3478,6 @@ public class HiveConf extends Configuration { " (Use with property hive.server2.custom.authentication.class)\n" + " PAM: Pluggable authentication module\n" + " NOSASL: Raw transport"), -HIVE_SERVER2_TRUSTED_DOMAIN("hive.server2.trusted.domain", "", -"Specifies the host or a domain to trust connections from. Authentication is skipped " + -"for any connection coming from a host whose hostname ends with the value of this" + -" property. If authentication is expected to be skipped for connections from " + -"only a given host, fully qualified hostname of that host should be specified. By default" + -" it is empty, which means that all the connections to HiveServer2 are authenticated. " + -"When it is non-empty, the client has to provide a Hive user name. Any password, if " + -"provided, will not be used when authentication is skipped."), HIVE_SERVER2_ALLOW_USER_SUBSTITUTION("hive.server2.allow.user.substitution", true, "Allow alternate user to be specified as part of HiveServer2 open connection request."), HIVE_SERVER2_KERBEROS_KEYTAB("hive.server2.authentication.kerberos.keytab", "", diff --git a/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java b/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java index e604f90..7d1192a 100644 --- a/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java +++ b/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java @@ -36,7 +36,6 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.jdbc.miniHS2.MiniHS2; import com.google.common.io.Files; -import org.apache.hive.service.server.HiveServer2; /** * Wrapper around Hadoop's MiniKdc for use in hive tests. @@ -179,21 +178,15 @@ public class MiniHiveKdc { * @return new MiniHS2 instance * @throws Exception */ - public static MiniHS2 getMiniHS2WithKerb(MiniHiveKdc miniHiveKdc, HiveConf hiveConf, + public static MiniHS2 getMiniHS2WithKerb(MiniHiveKdc miniHiveKdc, HiveConf hiveConf, String authType) throws Exception { -String hivePrincipal = - miniHiveKdc.getFullyQualifiedServicePrincipal(MiniHiveKdc.HIVE_SERVICE_PRINCIPAL); -String hiveKeytab = miniHiveKdc.getKeyTabFile( - miniHiveKdc.getServicePrincipalForUser(MiniHiveKdc.HIVE_SERVICE_PRINCIPAL)); - -MiniHS2.Builder miniHS2Builder = new MiniHS2.Builder() - .withConf(hiveConf) - .withMiniKdc(hivePrincipal, hiveKeytab) - .withAuthenticationType(authType); -if (HiveServer2.isHTTPTransportMode(hiveConf)) { - miniHS2Builder.withHTTPTransport(); -} -return miniHS2Builder.build(); + String hivePrincipal = + miniHiveKdc.getFullyQualifiedServicePrincipal(MiniHiveKdc.HIVE_SERVICE_PRINCIPAL); + String hiveKeytab = miniHiveKdc.getKeyTabFile( + miniHiveKdc.getServicePrincipalForUser(MiniHiveKdc.HIVE_SERVICE_PRINCIPAL)); + + return new MiniHS2.Builder().withConf(hiveConf).withMiniKdc(hivePrincipal, hiveKeytab). + withAuthenticationType(authType).build(); } /** diff --git a/itests/hive-unit/src/test/java/org/apache/hive/service/auth/TestImproperTrustDomai
[hive] branch master updated (67240e7 -> 56fc46b)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from 67240e7 Revert "HIVE-21783: Avoid authentication for connection from the same domain (Ashutosh Bapat reviewed by Olli Draese, Prasanth Jayachandran)" new eaf96ca HIVE-21783: Accept Hive connections from the same domain without authentication. new 20bda80 HIVE-21801 : Tests using miniHS2 with HTTP as transport are creating miniHS2 with binary transport new c440236 HIVE-21783 : Address comments by Olli and Prasanth. new 636890f HIVE-21783 : Add tests for trusted domain authentication. new b386328 HIVE-21783 : In case of HTTP use reverse DNS lookup to get the host name of client. new 56fc46b Merge pull request #648 from ashutosh-bapat/hive21783 The 13496 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../java/org/apache/hadoop/hive/conf/HiveConf.java | 8 + .../java/org/apache/hive/minikdc/MiniHiveKdc.java | 23 ++- ...estImproperTrustDomainAuthenticationBinary.java | 15 +- .../TestImproperTrustDomainAuthenticationHttp.java | 15 +- .../auth/TestTrustDomainAuthenticationBinary.java | 15 +- .../auth/TestTrustDomainAuthenticationHttp.java| 15 +- .../auth/TrustDomainAuthenticationTest.java| 192 + .../apache/hive/service/auth/HiveAuthFactory.java | 5 + .../apache/hive/service/auth/PlainSaslHelper.java | 54 ++ .../hive/service/cli/thrift/ThriftHttpServlet.java | 53 -- 10 files changed, 338 insertions(+), 57 deletions(-) copy common/src/java/org/apache/hadoop/hive/common/metrics/metrics2/CodahaleReporter.java => itests/hive-unit/src/test/java/org/apache/hive/service/auth/TestImproperTrustDomainAuthenticationBinary.java (73%) copy common/src/java/org/apache/hadoop/hive/common/metrics/metrics2/CodahaleReporter.java => itests/hive-unit/src/test/java/org/apache/hive/service/auth/TestImproperTrustDomainAuthenticationHttp.java (73%) copy common/src/java/org/apache/hadoop/hive/common/metrics/metrics2/CodahaleReporter.java => itests/hive-unit/src/test/java/org/apache/hive/service/auth/TestTrustDomainAuthenticationBinary.java (74%) copy common/src/java/org/apache/hadoop/hive/common/metrics/metrics2/CodahaleReporter.java => itests/hive-unit/src/test/java/org/apache/hive/service/auth/TestTrustDomainAuthenticationHttp.java (74%) create mode 100644 itests/hive-unit/src/test/java/org/apache/hive/service/auth/TrustDomainAuthenticationTest.java
[hive] branch master updated: Revert "HIVE-21783: Avoid authentication for connection from the same domain (Ashutosh Bapat reviewed by Olli Draese, Prasanth Jayachandran)"
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 67240e7 Revert "HIVE-21783: Avoid authentication for connection from the same domain (Ashutosh Bapat reviewed by Olli Draese, Prasanth Jayachandran)" 67240e7 is described below commit 67240e7810387d741e39a07f7acbdc7f4fb0b4bb Author: Prasanth Jayachandran AuthorDate: Sat Jun 15 19:55:35 2019 -0700 Revert "HIVE-21783: Avoid authentication for connection from the same domain (Ashutosh Bapat reviewed by Olli Draese, Prasanth Jayachandran)" This reverts commit 24313ab962b2881317bdcb50e67e90d3da3a5cc2. --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 8 - .../java/org/apache/hive/minikdc/MiniHiveKdc.java | 23 +-- ...estImproperTrustDomainAuthenticationBinary.java | 28 --- .../TestImproperTrustDomainAuthenticationHttp.java | 28 --- .../auth/TestTrustDomainAuthenticationBinary.java | 28 --- .../auth/TestTrustDomainAuthenticationHttp.java| 28 --- .../auth/TrustDomainAuthenticationTest.java| 192 - .../apache/hive/service/auth/HiveAuthFactory.java | 5 - .../apache/hive/service/auth/PlainSaslHelper.java | 54 -- .../hive/service/cli/thrift/ThriftHttpServlet.java | 53 ++ 10 files changed, 25 insertions(+), 422 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 03a8019..2cea174 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3478,14 +3478,6 @@ public class HiveConf extends Configuration { " (Use with property hive.server2.custom.authentication.class)\n" + " PAM: Pluggable authentication module\n" + " NOSASL: Raw transport"), -HIVE_SERVER2_TRUSTED_DOMAIN("hive.server2.trusted.domain", "", -"Specifies the host or a domain to trust connections from. Authentication is skipped " + -"for any connection coming from a host whose hostname ends with the value of this" + -" property. If authentication is expected to be skipped for connections from " + -"only a given host, fully qualified hostname of that host should be specified. By default" + -" it is empty, which means that all the connections to HiveServer2 are authenticated. " + -"When it is non-empty, the client has to provide a Hive user name. Any password, if " + -"provided, will not be used when authentication is skipped."), HIVE_SERVER2_ALLOW_USER_SUBSTITUTION("hive.server2.allow.user.substitution", true, "Allow alternate user to be specified as part of HiveServer2 open connection request."), HIVE_SERVER2_KERBEROS_KEYTAB("hive.server2.authentication.kerberos.keytab", "", diff --git a/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java b/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java index e604f90..7d1192a 100644 --- a/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java +++ b/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java @@ -36,7 +36,6 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.jdbc.miniHS2.MiniHS2; import com.google.common.io.Files; -import org.apache.hive.service.server.HiveServer2; /** * Wrapper around Hadoop's MiniKdc for use in hive tests. @@ -179,21 +178,15 @@ public class MiniHiveKdc { * @return new MiniHS2 instance * @throws Exception */ - public static MiniHS2 getMiniHS2WithKerb(MiniHiveKdc miniHiveKdc, HiveConf hiveConf, + public static MiniHS2 getMiniHS2WithKerb(MiniHiveKdc miniHiveKdc, HiveConf hiveConf, String authType) throws Exception { -String hivePrincipal = - miniHiveKdc.getFullyQualifiedServicePrincipal(MiniHiveKdc.HIVE_SERVICE_PRINCIPAL); -String hiveKeytab = miniHiveKdc.getKeyTabFile( - miniHiveKdc.getServicePrincipalForUser(MiniHiveKdc.HIVE_SERVICE_PRINCIPAL)); - -MiniHS2.Builder miniHS2Builder = new MiniHS2.Builder() - .withConf(hiveConf) - .withMiniKdc(hivePrincipal, hiveKeytab) - .withAuthenticationType(authType); -if (HiveServer2.isHTTPTransportMode(hiveConf)) { - miniHS2Builder.withHTTPTransport(); -} -return miniHS2Builder.build(); + String hivePrincipal = + miniHiveKdc.getFullyQualifiedServicePrincipal(MiniHiveKdc.HIVE_SERVICE_PRINCIPAL); + Strin
[hive] branch master updated: HIVE-21783: Avoid authentication for connection from the same domain (Ashutosh Bapat reviewed by Olli Draese, Prasanth Jayachandran)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 24313ab HIVE-21783: Avoid authentication for connection from the same domain (Ashutosh Bapat reviewed by Olli Draese, Prasanth Jayachandran) 24313ab is described below commit 24313ab962b2881317bdcb50e67e90d3da3a5cc2 Author: Prasanth Jayachandran AuthorDate: Thu Jun 13 01:36:39 2019 -0700 HIVE-21783: Avoid authentication for connection from the same domain (Ashutosh Bapat reviewed by Olli Draese, Prasanth Jayachandran) --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 8 + .../java/org/apache/hive/minikdc/MiniHiveKdc.java | 23 ++- ...estImproperTrustDomainAuthenticationBinary.java | 28 +++ .../TestImproperTrustDomainAuthenticationHttp.java | 28 +++ .../auth/TestTrustDomainAuthenticationBinary.java | 28 +++ .../auth/TestTrustDomainAuthenticationHttp.java| 28 +++ .../auth/TrustDomainAuthenticationTest.java| 192 + .../apache/hive/service/auth/HiveAuthFactory.java | 5 + .../apache/hive/service/auth/PlainSaslHelper.java | 54 ++ .../hive/service/cli/thrift/ThriftHttpServlet.java | 53 -- 10 files changed, 422 insertions(+), 25 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 2cea174..03a8019 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3478,6 +3478,14 @@ public class HiveConf extends Configuration { " (Use with property hive.server2.custom.authentication.class)\n" + " PAM: Pluggable authentication module\n" + " NOSASL: Raw transport"), +HIVE_SERVER2_TRUSTED_DOMAIN("hive.server2.trusted.domain", "", +"Specifies the host or a domain to trust connections from. Authentication is skipped " + +"for any connection coming from a host whose hostname ends with the value of this" + +" property. If authentication is expected to be skipped for connections from " + +"only a given host, fully qualified hostname of that host should be specified. By default" + +" it is empty, which means that all the connections to HiveServer2 are authenticated. " + +"When it is non-empty, the client has to provide a Hive user name. Any password, if " + +"provided, will not be used when authentication is skipped."), HIVE_SERVER2_ALLOW_USER_SUBSTITUTION("hive.server2.allow.user.substitution", true, "Allow alternate user to be specified as part of HiveServer2 open connection request."), HIVE_SERVER2_KERBEROS_KEYTAB("hive.server2.authentication.kerberos.keytab", "", diff --git a/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java b/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java index 7d1192a..e604f90 100644 --- a/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java +++ b/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java @@ -36,6 +36,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.jdbc.miniHS2.MiniHS2; import com.google.common.io.Files; +import org.apache.hive.service.server.HiveServer2; /** * Wrapper around Hadoop's MiniKdc for use in hive tests. @@ -178,15 +179,21 @@ public class MiniHiveKdc { * @return new MiniHS2 instance * @throws Exception */ - public static MiniHS2 getMiniHS2WithKerb(MiniHiveKdc miniHiveKdc, HiveConf hiveConf, + public static MiniHS2 getMiniHS2WithKerb(MiniHiveKdc miniHiveKdc, HiveConf hiveConf, String authType) throws Exception { - String hivePrincipal = - miniHiveKdc.getFullyQualifiedServicePrincipal(MiniHiveKdc.HIVE_SERVICE_PRINCIPAL); - String hiveKeytab = miniHiveKdc.getKeyTabFile( - miniHiveKdc.getServicePrincipalForUser(MiniHiveKdc.HIVE_SERVICE_PRINCIPAL)); - - return new MiniHS2.Builder().withConf(hiveConf).withMiniKdc(hivePrincipal, hiveKeytab). - withAuthenticationType(authType).build(); +String hivePrincipal = + miniHiveKdc.getFullyQualifiedServicePrincipal(MiniHiveKdc.HIVE_SERVICE_PRINCIPAL); +String hiveKeytab = miniHiveKdc.getKeyTabFile( + miniHiveKdc.getServicePrincipalForUser(MiniHiveKdc.HIVE_SERVICE_PRINCIPAL)); + +MiniHS2.Builder miniHS2Builder = new MiniHS2.Builder() + .withConf(hiveConf) + .withMiniKdc(hivePrincipal, hiveKeytab) + .withAuthen
[hive] branch master updated: HIVE-18874: JDBC: HiveConnection shades log4j interfaces (Prasanth Jayachandran reviewed by Gopal V)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 80c55eb0 HIVE-18874: JDBC: HiveConnection shades log4j interfaces (Prasanth Jayachandran reviewed by Gopal V) 80c55eb0 is described below commit 80c55eb0314e8d0f923765c109f7fc4538e9311a Author: Prasanth Jayachandran AuthorDate: Wed Jun 12 11:27:33 2019 -0700 HIVE-18874: JDBC: HiveConnection shades log4j interfaces (Prasanth Jayachandran reviewed by Gopal V) --- jdbc/pom.xml | 15 +-- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/jdbc/pom.xml b/jdbc/pom.xml index 50a3456..b060035 100644 --- a/jdbc/pom.xml +++ b/jdbc/pom.xml @@ -168,12 +168,6 @@ - org.apache.logging.log4j:log4j-core - - org/apache/logging/log4j/core/jackson/** - - - org.apache.hadoop:hadoop-common org/apache/hadoop/security/* @@ -256,6 +250,7 @@ org.antlr:* org.slf4j:slf4j-log4j12 log4j:* + org.apache.logging.log4j:* antlr:* aopalliance:* asm:* @@ -330,14 +325,6 @@ org.apache.hive.org.apache.commons - org.apache.log4j - org.apache.hive.org.apache.log4j - - - org.slf4j - org.apache.hive.org.slf4j - - com.facebook org.apache.hive.com.facebook
[hive] branch branch-3.1 updated: HIVE-18874: JDBC: HiveConnection shades log4j interfaces (Prasanth Jayachandran reviewed by Gopal V)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3.1 by this push: new f983f4e HIVE-18874: JDBC: HiveConnection shades log4j interfaces (Prasanth Jayachandran reviewed by Gopal V) f983f4e is described below commit f983f4e0269d3c263237977f093768517c3d9677 Author: Prasanth Jayachandran AuthorDate: Wed Jun 12 11:27:33 2019 -0700 HIVE-18874: JDBC: HiveConnection shades log4j interfaces (Prasanth Jayachandran reviewed by Gopal V) --- jdbc/pom.xml | 15 +-- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/jdbc/pom.xml b/jdbc/pom.xml index 217b17f..635d2d6 100644 --- a/jdbc/pom.xml +++ b/jdbc/pom.xml @@ -173,12 +173,6 @@ - org.apache.logging.log4j:log4j-core - - org/apache/logging/log4j/core/jackson/** - - - org.apache.hadoop:hadoop-common org/apache/hadoop/security/* @@ -261,6 +255,7 @@ org.antlr:* org.slf4j:slf4j-log4j12 log4j:* + org.apache.logging.log4j:* antlr:* aopalliance:* asm:* @@ -336,14 +331,6 @@ org.apache.hive.org.apache.commons - org.apache.log4j - org.apache.hive.org.apache.log4j - - - org.slf4j - org.apache.hive.org.slf4j - - com.facebook org.apache.hive.com.facebook
[hive] branch master updated: HIVE-21582: Prefix msck configs with metastore (Prasanth Jayachandran reviewed by Jason Dere)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new cc8ba3a HIVE-21582: Prefix msck configs with metastore (Prasanth Jayachandran reviewed by Jason Dere) cc8ba3a is described below commit cc8ba3aff76adfbc93770dc007461f59ec62b5d8 Author: Prasanth Jayachandran AuthorDate: Mon Apr 15 11:46:36 2019 -0700 HIVE-21582: Prefix msck configs with metastore (Prasanth Jayachandran reviewed by Jason Dere) --- ql/src/test/queries/clientpositive/partition_discovery.q | 6 +++--- .../org/apache/hadoop/hive/metastore/conf/MetastoreConf.java | 10 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ql/src/test/queries/clientpositive/partition_discovery.q b/ql/src/test/queries/clientpositive/partition_discovery.q index 7af9664..c85b6fd 100644 --- a/ql/src/test/queries/clientpositive/partition_discovery.q +++ b/ql/src/test/queries/clientpositive/partition_discovery.q @@ -42,13 +42,13 @@ dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n9/p1=c/p2 dfs -touchz ${system:test.warehouse.dir}/repairtable_n9/p1=a/p2=b/datafile; dfs -touchz ${system:test.warehouse.dir}/repairtable_n9/p1=c/p2=d/datafile; -set msck.repair.enable.partition.retention=false; +set metastore.msck.repair.enable.partition.retention=false; MSCK REPAIR TABLE default.repairtable_n9; show partitions default.repairtable_n9; !sleep 12; -set msck.repair.enable.partition.retention=true; +set metastore.msck.repair.enable.partition.retention=true; -- msck does not drop partitions, so this still should be no-op MSCK REPAIR TABLE default.repairtable_n9; show partitions default.repairtable_n9; @@ -65,7 +65,7 @@ dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n10/p1=c/p dfs -touchz ${system:test.warehouse.dir}/repairtable_n10/p1=a/p2=b/datafile; dfs -touchz ${system:test.warehouse.dir}/repairtable_n10/p1=c/p2=d/datafile; -set msck.repair.enable.partition.retention=false; +set metastore.msck.repair.enable.partition.retention=false; !sleep 12; MSCK REPAIR TABLE default.repairtable_n10; show partitions default.repairtable_n10; diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index c918e2b..bac9d01 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -697,25 +697,25 @@ public class MetastoreConf { METRICS_REPORTERS("metastore.metrics.reporters", "metastore.metrics.reporters", "json,jmx", new StringSetValidator("json", "jmx", "console", "hadoop", "slf4j"), "A comma separated list of metrics reporters to start"), -MSCK_PATH_VALIDATION("msck.path.validation", "hive.msck.path.validation", "throw", +MSCK_PATH_VALIDATION("metastore.msck.path.validation", "hive.msck.path.validation", "throw", new StringSetValidator("throw", "skip", "ignore"), "The approach msck should take with HDFS " + "directories that are partition-like but contain unsupported characters. 'throw' (an " + "exception) is the default; 'skip' will skip the invalid directories and still repair the" + " others; 'ignore' will skip the validation (legacy behavior, causes bugs in many cases)"), -MSCK_REPAIR_BATCH_SIZE("msck.repair.batch.size", +MSCK_REPAIR_BATCH_SIZE("metastore.msck.repair.batch.size", "hive.msck.repair.batch.size", 3000, "Batch size for the msck repair command. If the value is greater than zero,\n " + "it will execute batch wise with the configured batch size. In case of errors while\n" + "adding unknown partitions the batch size is automatically reduced by half in the subsequent\n" + "retry attempt. The default value is 3000 which means it will execute in the batches of 3000."), -MSCK_REPAIR_BATCH_MAX_RETRIES("msck.repair.batch.max.retries", "hive.msck.repair.batch.max.retries", 4, +MSCK_REPAIR_BATCH_MAX_RETRIES("metastore.msck.repair.batch.max.retries", "hive.msck.repair.batch.max.retries", 4, "Maximum number of retries for the msck repair command when adding unknown partitions.\n " + "If the value is greater than zero it will retry adding unknown p
[hive] branch master updated: HIVE-21597: WM trigger validation should happen at the time of create or alter (Prasanth Jayachandran reviewed by Daniel Dai)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 079a720 HIVE-21597: WM trigger validation should happen at the time of create or alter (Prasanth Jayachandran reviewed by Daniel Dai) 079a720 is described below commit 079a7208363e4798d24a54d997d5b0f1cb7cd657 Author: Prasanth Jayachandran AuthorDate: Fri Apr 12 16:37:13 2019 -0700 HIVE-21597: WM trigger validation should happen at the time of create or alter (Prasanth Jayachandran reviewed by Daniel Dai) --- .../org/apache/hadoop/hive/ql/exec/DDLTask.java| 11 +++ .../hive/ql/exec/tez/TriggerValidatorRunnable.java | 8 - ql/src/test/queries/clientpositive/resourceplan.q | 10 -- .../results/clientpositive/llap/resourceplan.q.out | 36 ++ .../apache/hive/service/server/KillQueryImpl.java | 4 +-- 5 files changed, 52 insertions(+), 17 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 7c5a47e..7f0eb40 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -190,6 +190,7 @@ import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObje import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveRoleGrant; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveV1Authorizer; import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.wm.ExecutionTrigger; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; @@ -595,11 +596,21 @@ public class DDLTask extends Task implements Serializable { } private int createWMTrigger(Hive db, CreateWMTriggerDesc desc) throws HiveException { +validateTrigger(desc.getTrigger()); db.createWMTrigger(desc.getTrigger()); return 0; } + private void validateTrigger(final WMTrigger trigger) throws HiveException { +try { + ExecutionTrigger.fromWMTrigger(trigger); +} catch (IllegalArgumentException e) { + throw new HiveException(e); +} + } + private int alterWMTrigger(Hive db, AlterWMTriggerDesc desc) throws HiveException { +validateTrigger(desc.getTrigger()); db.alterWMTrigger(desc.getTrigger()); return 0; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TriggerValidatorRunnable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TriggerValidatorRunnable.java index 670184b..8fb0695 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TriggerValidatorRunnable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TriggerValidatorRunnable.java @@ -72,10 +72,16 @@ public class TriggerValidatorRunnable implements Runnable { currentCounterValue); violatedSessions.put(sessionState, currentTrigger); LOG.info("KILL trigger replacing MOVE for query {}", queryId); - } else { + } else if (existingTrigger.getAction().getType().equals(Action.Type.MOVE_TO_POOL) && + currentTrigger.getAction().getType().equals(Action.Type.MOVE_TO_POOL)){ // if multiple MOVE happens, only first move will be chosen LOG.warn("Conflicting MOVE triggers ({} and {}). Choosing the first MOVE trigger: {}", existingTrigger, currentTrigger, existingTrigger.getName()); + } else if (existingTrigger.getAction().getType().equals(Action.Type.KILL_QUERY) && + currentTrigger.getAction().getType().equals(Action.Type.KILL_QUERY)){ +// if multiple KILL happens, only first kill will be chosen +LOG.warn("Conflicting KILL triggers ({} and {}). Choosing the first KILL trigger: {}", + existingTrigger, currentTrigger, existingTrigger.getName()); } } else { // first violation for the session diff --git a/ql/src/test/queries/clientpositive/resourceplan.q b/ql/src/test/queries/clientpositive/resourceplan.q index 46aae72..93d848b 100644 --- a/ql/src/test/queries/clientpositive/resourceplan.q +++ b/ql/src/test/queries/clientpositive/resourceplan.q @@ -179,11 +179,17 @@ CREATE TRIGGER plan_1.trigger_2 WHEN ELAPSED_TIME >= '30seconds' DO MOVE TO slow CREATE TRIGGER plan_1.trigger_2 WHEN ELAPSED_TIME < '30hour' DO MOVE TO slow_pool; CREATE TRIGGER plan_1.trigger_2 WHEN ELAPSED_TIME <= '30min' DO MOVE TO slow_pool; CREATE TRIGGER plan_1.trigger_2 WHEN ELAPSED_TIME = '0day' DO MOVE TO
[hive] branch branch-3 updated: HIVE-21591: Using triggers in non-LLAP mode should not require wm queue (Prasanth Jayachandran reviewed by Daniel Dai)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3 by this push: new cda7150 HIVE-21591: Using triggers in non-LLAP mode should not require wm queue (Prasanth Jayachandran reviewed by Daniel Dai) cda7150 is described below commit cda715077624df947b1c3b1d44a09e0d0fdfa486 Author: Prasanth Jayachandran AuthorDate: Tue Apr 9 15:01:19 2019 -0700 HIVE-21591: Using triggers in non-LLAP mode should not require wm queue (Prasanth Jayachandran reviewed by Daniel Dai) --- .../org/apache/hadoop/hive/ql/exec/DDLTask.java| 5 +-- .../hive/ql/exec/tez/TezSessionPoolManager.java| 15 ++--- .../apache/hive/service/server/HiveServer2.java| 37 +++--- 3 files changed, 33 insertions(+), 24 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 5d07b1b..77dc831 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -35,6 +35,7 @@ import java.sql.SQLException; import java.util.AbstractList; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -779,8 +780,8 @@ public class DDLTask extends Task implements Serializable { } } if (pm != null) { - pm.updateTriggers(appliedRp); - LOG.info("Updated tez session pool manager with active resource plan: {}", name); + Collection appliedTriggers = pm.updateTriggers(appliedRp); + LOG.info("Updated tez session pool manager with active resource plan: {} appliedTriggers: {}", name, appliedTriggers); } return 0; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java index 2633390..7c0a1fe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java @@ -21,9 +21,12 @@ package org.apache.hadoop.hive.ql.exec.tez; import org.apache.hadoop.hive.ql.exec.tez.TezSessionState.HiveResources; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.Set; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; @@ -113,9 +116,9 @@ public class TezSessionPoolManager extends TezSessionPoolSession.AbstractTrigger } initTriggers(conf); if (resourcePlan != null) { - updateTriggers(resourcePlan); - LOG.info("Updated tez session pool manager with active resource plan: {}", - resourcePlan.getPlan().getName()); + Collection appliedTriggers = updateTriggers(resourcePlan); + LOG.info("Updated tez session pool manager with triggers {} from active resource plan: {}", + appliedTriggers, resourcePlan.getPlan().getName()); } } @@ -531,7 +534,8 @@ public class TezSessionPoolManager extends TezSessionPoolSession.AbstractTrigger } } - public void updateTriggers(final WMFullResourcePlan appliedRp) { + public Collection updateTriggers(final WMFullResourcePlan appliedRp) { +Set triggerNames = new HashSet<>(); if (sessionTriggerProvider != null) { List wmTriggers = appliedRp != null ? appliedRp.getTriggers() : null; List triggers = new ArrayList<>(); @@ -539,11 +543,14 @@ public class TezSessionPoolManager extends TezSessionPoolSession.AbstractTrigger for (WMTrigger wmTrigger : wmTriggers) { if (wmTrigger.isSetIsInUnmanaged() && wmTrigger.isIsInUnmanaged()) { triggers.add(ExecutionTrigger.fromWMTrigger(wmTrigger)); +triggerNames.add(wmTrigger.getTriggerName()); } } } sessionTriggerProvider.setTriggers(Collections.unmodifiableList(triggers)); } + +return triggerNames; } /** Called by TezSessionPoolSession when closed. */ diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java index 7bfa203..f1cd7b9 100644 --- a/service/src/java/org/apache/hive/service/server/HiveServer2.java +++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java @@ -801,23 +801,21 @@ public class HiveServer2 extends CompositeService { private void startOrReconnectTezSessions() { LOG.info("Starting/Reconnecting tez sessions.."); // TODO: add tez session reconnect after TEZ-3875 -WMFullResourcePlan resourcePlan = null; -i
[hive] branch master updated: HIVE-21591: Using triggers in non-LLAP mode should not require wm queue (Prasanth Jayachandran reviewed by Daniel Dai)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 928f3d6 HIVE-21591: Using triggers in non-LLAP mode should not require wm queue (Prasanth Jayachandran reviewed by Daniel Dai) 928f3d6 is described below commit 928f3d65c324c785565cdcc1d809695b1a5344bd Author: Prasanth Jayachandran AuthorDate: Tue Apr 9 15:01:19 2019 -0700 HIVE-21591: Using triggers in non-LLAP mode should not require wm queue (Prasanth Jayachandran reviewed by Daniel Dai) --- .../org/apache/hadoop/hive/ql/exec/DDLTask.java| 5 +-- .../hive/ql/exec/tez/TezSessionPoolManager.java| 15 ++--- .../apache/hive/service/server/HiveServer2.java| 37 +++--- 3 files changed, 33 insertions(+), 24 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 269cd85..a1d795f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -26,6 +26,7 @@ import java.io.Serializable; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; @@ -582,8 +583,8 @@ public class DDLTask extends Task implements Serializable { } } if (pm != null) { - pm.updateTriggers(appliedRp); - LOG.info("Updated tez session pool manager with active resource plan: {}", name); + Collection appliedTriggers = pm.updateTriggers(appliedRp); + LOG.info("Updated tez session pool manager with active resource plan: {} appliedTriggers: {}", name, appliedTriggers); } return 0; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java index 2633390..7c0a1fe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java @@ -21,9 +21,12 @@ package org.apache.hadoop.hive.ql.exec.tez; import org.apache.hadoop.hive.ql.exec.tez.TezSessionState.HiveResources; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.Set; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; @@ -113,9 +116,9 @@ public class TezSessionPoolManager extends TezSessionPoolSession.AbstractTrigger } initTriggers(conf); if (resourcePlan != null) { - updateTriggers(resourcePlan); - LOG.info("Updated tez session pool manager with active resource plan: {}", - resourcePlan.getPlan().getName()); + Collection appliedTriggers = updateTriggers(resourcePlan); + LOG.info("Updated tez session pool manager with triggers {} from active resource plan: {}", + appliedTriggers, resourcePlan.getPlan().getName()); } } @@ -531,7 +534,8 @@ public class TezSessionPoolManager extends TezSessionPoolSession.AbstractTrigger } } - public void updateTriggers(final WMFullResourcePlan appliedRp) { + public Collection updateTriggers(final WMFullResourcePlan appliedRp) { +Set triggerNames = new HashSet<>(); if (sessionTriggerProvider != null) { List wmTriggers = appliedRp != null ? appliedRp.getTriggers() : null; List triggers = new ArrayList<>(); @@ -539,11 +543,14 @@ public class TezSessionPoolManager extends TezSessionPoolSession.AbstractTrigger for (WMTrigger wmTrigger : wmTriggers) { if (wmTrigger.isSetIsInUnmanaged() && wmTrigger.isIsInUnmanaged()) { triggers.add(ExecutionTrigger.fromWMTrigger(wmTrigger)); +triggerNames.add(wmTrigger.getTriggerName()); } } } sessionTriggerProvider.setTriggers(Collections.unmodifiableList(triggers)); } + +return triggerNames; } /** Called by TezSessionPoolSession when closed. */ diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java index 452b8d8..00f6e90 100644 --- a/service/src/java/org/apache/hive/service/server/HiveServer2.java +++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java @@ -732,23 +732,21 @@ public class HiveServer2 extends CompositeService { private void startOrReconnectTezSessions() { LOG.info("Starting/Reconnecting tez sessions.."); // TODO: add tez session reconnect after TEZ-3875 -WMFullResourcePlan resourcePlan = null; -i
[hive] branch branch-3.0 updated: Revert "HIVE-19875: increase LLAP IO queue size for perf (Prasanth Jayachandran reviewed by Sergey Shelukhin)"
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3.0 by this push: new a354bed Revert "HIVE-19875: increase LLAP IO queue size for perf (Prasanth Jayachandran reviewed by Sergey Shelukhin)" a354bed is described below commit a354beddd6463fad2fcd4fe22643f65c1f1ef50f Author: Prasanth Jayachandran AuthorDate: Thu Apr 4 11:36:03 2019 -0700 Revert "HIVE-19875: increase LLAP IO queue size for perf (Prasanth Jayachandran reviewed by Sergey Shelukhin)" This reverts commit 5909c6efec69b0c27fe1681a15186e58777fce21. --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 2 +- .../hive/llap/io/api/impl/LlapRecordReader.java| 25 -- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 7aaa3a9..b81c47d 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3848,7 +3848,7 @@ public class HiveConf extends Configuration { "MR LineRecordRedader into LLAP cache, if this feature is enabled. Safety flag."), LLAP_ORC_ENABLE_TIME_COUNTERS("hive.llap.io.orc.time.counters", true, "Whether to enable time counters for LLAP IO layer (time spent in HDFS, etc.)"), -LLAP_IO_VRB_QUEUE_LIMIT_BASE("hive.llap.io.vrb.queue.limit.base", 5, +LLAP_IO_VRB_QUEUE_LIMIT_BASE("hive.llap.io.vrb.queue.limit.base", 1, "The default queue size for VRBs produced by a LLAP IO thread when the processing is\n" + "slower than the IO. The actual queue size is set per fragment, and is adjusted down\n" + "from the base, depending on the schema."), diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java index f83fffe..6897336 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java @@ -58,7 +58,6 @@ import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.NullWritable; @@ -164,9 +163,7 @@ class LlapRecordReader int queueLimitBase = getQueueVar(ConfVars.LLAP_IO_VRB_QUEUE_LIMIT_BASE, job, daemonConf); int queueLimitMin = getQueueVar(ConfVars.LLAP_IO_VRB_QUEUE_LIMIT_MIN, job, daemonConf); -final boolean decimal64Support = HiveConf.getVar(job, ConfVars.HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED) - .equalsIgnoreCase("decimal_64"); -int limit = determineQueueLimit(queueLimitBase, queueLimitMin, rbCtx.getRowColumnTypeInfos(), decimal64Support); +int limit = determineQueueLimit(queueLimitBase, queueLimitMin, rbCtx.getRowColumnTypeInfos()); LOG.info("Queue limit for LlapRecordReader is " + limit); this.queue = new LinkedBlockingQueue<>(limit); @@ -202,14 +199,14 @@ class LlapRecordReader private static final int COL_WEIGHT_COMPLEX = 16, COL_WEIGHT_HIVEDECIMAL = 4, COL_WEIGHT_STRING = 8; private static int determineQueueLimit( -int queueLimitBase, int queueLimitMin, TypeInfo[] typeInfos, final boolean decimal64Support) { + int queueLimitBase, int queueLimitMin, TypeInfo[] typeInfos) { // If the values are equal, the queue limit is fixed. if (queueLimitBase == queueLimitMin) return queueLimitBase; // If there are no columns (projection only join?) just assume no weight. if (typeInfos == null || typeInfos.length == 0) return queueLimitBase; double totalWeight = 0; for (TypeInfo ti : typeInfos) { - int colWeight; + int colWeight = 1; if (ti.getCategory() != Category.PRIMITIVE) { colWeight = COL_WEIGHT_COMPLEX; } else { @@ -220,22 +217,8 @@ class LlapRecordReader case VARCHAR: case STRING: colWeight = COL_WEIGHT_STRING; - break; case DECIMAL: - boolean useDecimal64 = false; - if (ti instanceof DecimalTypeInfo) { -DecimalTypeInfo dti = (DecimalTypeInfo) ti; -if (dti.getPrecision() <= TypeDescription.MAX_DECIMAL64_PRECISION && decimal64Support) { -
[hive] branch master updated: HIVE-21457: Perf optimizations in ORC split-generation (Prasanth Jayachandran reviewed by Gopal V)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 72d72d4 HIVE-21457: Perf optimizations in ORC split-generation (Prasanth Jayachandran reviewed by Gopal V) 72d72d4 is described below commit 72d72d4df734ccc653a0a6986c319200dea35f0b Author: Prasanth Jayachandran AuthorDate: Thu Mar 28 13:46:03 2019 -0700 HIVE-21457: Perf optimizations in ORC split-generation (Prasanth Jayachandran reviewed by Gopal V) --- .../apache/hadoop/hive/ql/exec/FetchOperator.java | 2 +- .../org/apache/hadoop/hive/ql/io/AcidUtils.java| 16 ++--- .../hive/ql/io/HiveContextAwareRecordReader.java | 5 +- .../apache/hadoop/hive/ql/io/HiveInputFormat.java | 2 +- .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 71 -- .../hadoop/hive/ql/io/orc/OrcRawRecordMerger.java | 5 +- .../ql/io/orc/VectorizedOrcAcidRowBatchReader.java | 2 +- .../hadoop/hive/ql/txn/compactor/CompactorMR.java | 7 ++- .../hadoop/hive/ql/txn/compactor/Initiator.java| 2 +- 9 files changed, 77 insertions(+), 35 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index e6b47de..3550747 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -768,7 +768,7 @@ public class FetchOperator implements Serializable { private FileStatus[] listStatusUnderPath(FileSystem fs, Path p) throws IOException { boolean recursive = job.getBoolean(FileInputFormat.INPUT_DIR_RECURSIVE, false); // If this is in acid format always read it recursively regardless of what the jobconf says. -if (!recursive && !AcidUtils.isAcid(p, job)) { +if (!recursive && !AcidUtils.isAcid(fs, p, job)) { return fs.listStatus(p, FileUtils.HIDDEN_FILES_PATH_FILTER); } List results = new ArrayList(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index 50a233d..af8743d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -1080,14 +1080,15 @@ public class AcidUtils { /** * Is the given directory in ACID format? + * @param fileSystem file system instance * @param directory the partition directory to check * @param conf the query configuration * @return true, if it is an ACID directory * @throws IOException */ - public static boolean isAcid(Path directory, + public static boolean isAcid(FileSystem fileSystem, Path directory, Configuration conf) throws IOException { -FileSystem fs = directory.getFileSystem(conf); +FileSystem fs = fileSystem == null ? directory.getFileSystem(conf) : fileSystem; for(FileStatus file: fs.listStatus(directory)) { String filename = file.getPath().getName(); if (filename.startsWith(BASE_PREFIX) || @@ -1106,7 +1107,7 @@ public class AcidUtils { Configuration conf, ValidWriteIdList writeIdList ) throws IOException { -return getAcidState(directory, conf, writeIdList, false, false); +return getAcidState(null, directory, conf, writeIdList, false, false); } /** State class for getChildState; cannot modify 2 things in a method. */ @@ -1122,22 +1123,23 @@ public class AcidUtils { * base and diff directories. Note that because major compactions don't * preserve the history, we can't use a base directory that includes a * write id that we must exclude. + * @param fileSystem file system instance * @param directory the partition directory to analyze * @param conf the configuration * @param writeIdList the list of write ids that we are reading * @return the state of the directory * @throws IOException */ - public static Directory getAcidState(Path directory, + public static Directory getAcidState(FileSystem fileSystem, Path directory, Configuration conf, ValidWriteIdList writeIdList, boolean useFileIds, boolean ignoreEmptyFiles ) throws IOException { -return getAcidState(directory, conf, writeIdList, Ref.from(useFileIds), ignoreEmptyFiles, null); +return getAcidState(fileSystem, directory, conf, writeIdList, Ref.from(useFileIds), ignoreEmptyFiles, null); } - public static Directory getAcidState(Path directory, + public static Directory getAcidState(FileSystem fileSystem, Path directory, Co
[hive] branch master updated: HIVE-21305: LLAP: Option to skip cache for ETL queries (Prasanth Jayachandran reviewed by Gopal V)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 6d74222 HIVE-21305: LLAP: Option to skip cache for ETL queries (Prasanth Jayachandran reviewed by Gopal V) 6d74222 is described below commit 6d74222521d2a1333990b9b3577ec9a7f7e619b8 Author: Prasanth Jayachandran AuthorDate: Tue Mar 26 14:25:38 2019 -0700 HIVE-21305: LLAP: Option to skip cache for ETL queries (Prasanth Jayachandran reviewed by Gopal V) --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 4 + .../test/resources/testconfiguration.properties| 1 + .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 11 ++ ql/src/test/queries/clientpositive/llap_io_etl.q | 49 + .../results/clientpositive/llap/llap_io_etl.q.out | 203 + .../results/clientpositive/llap/orc_merge1.q.out | 12 +- .../results/clientpositive/llap/orc_merge3.q.out | 2 +- .../results/clientpositive/llap/orc_merge4.q.out | 6 +- .../clientpositive/llap/orc_ppd_basic.q.out| 1 - .../clientpositive/llap/tez_input_counters.q.out | 7 +- 10 files changed, 279 insertions(+), 17 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 94902de..777f8b5 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -4112,6 +4112,10 @@ public class HiveConf extends Configuration { LLAP_EXECUTION_MODE("hive.llap.execution.mode", "none", new StringSet("auto", "none", "all", "map", "only"), "Chooses whether query fragments will run in container or in llap"), +LLAP_IO_ETL_SKIP_FORMAT("hive.llap.io.etl.skip.format", "encode", new StringSet("none", "encode", "all"), + "For ETL queries, determines whether to skip llap io cache. By default, hive.llap.io.encode.enabled " + +"will be set to false which disables LLAP IO for text formats. Setting it to 'all' will disable LLAP IO for all" + +" formats. 'none' will not disable LLAP IO for any formats."), LLAP_OBJECT_CACHE_ENABLED("hive.llap.object.cache.enabled", true, "Cache objects (plans, hashtables, etc) in llap"), LLAP_IO_DECODING_METRICS_PERCENTILE_INTERVALS("hive.llap.io.decoding.metrics.percentiles.intervals", "30", diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 8c4d9b7..3a2807f 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -572,6 +572,7 @@ minillaplocal.query.files=\ llap_uncompressed.q,\ llap_decimal64_reader.q,\ llap_text.q,\ + llap_io_etl.q,\ load_data_using_job.q,\ load_dyn_part5.q,\ lvj_mapjoin.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 6252013..1e79f32 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12334,6 +12334,17 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } } +final String llapIOETLSkipFormat = HiveConf.getVar(conf, ConfVars.LLAP_IO_ETL_SKIP_FORMAT); +if (qb.getParseInfo().hasInsertTables() || qb.isCTAS()) { + if (llapIOETLSkipFormat.equalsIgnoreCase("encode")) { +conf.setBoolean(ConfVars.LLAP_IO_ENCODE_ENABLED.varname, false); +LOG.info("Disabling LLAP IO encode as ETL query is detected"); + } else if (llapIOETLSkipFormat.equalsIgnoreCase("all")) { +conf.setBoolean(ConfVars.LLAP_IO_ENABLED.varname, false); +LOG.info("Disabling LLAP IO as ETL query is detected"); + } +} + // Check query results cache. // If no masking/filtering required, then we can check the cache now, before // generating the operator tree and going through CBO. diff --git a/ql/src/test/queries/clientpositive/llap_io_etl.q b/ql/src/test/queries/clientpositive/llap_io_etl.q new file mode 100644 index 000..1e85320 --- /dev/null +++ b/ql/src/test/queries/clientpositive/llap_io_etl.q @@ -0,0 +1,49 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.fetch.task.conversion=none; + +SET hive.llap.io.enabled=true; +set hive.llap.cache.allow.synthetic.fileid=true; + +create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1
[hive] branch master updated: HIVE-21482: Partition discovery table property is added to non-partitioned external tables (Prasanth Jayachandran reviewed by Jason Dere)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new b953c4e HIVE-21482: Partition discovery table property is added to non-partitioned external tables (Prasanth Jayachandran reviewed by Jason Dere) b953c4e is described below commit b953c4e077e71ddf667595b6acba9d5c9972ee6a Author: Prasanth Jayachandran AuthorDate: Thu Mar 21 17:41:38 2019 -0700 HIVE-21482: Partition discovery table property is added to non-partitioned external tables (Prasanth Jayachandran reviewed by Jason Dere) --- .../test/results/positive/external_table_ppd.q.out | 1 - .../positive/hbase_binary_storage_queries.q.out| 2 - .../src/test/results/positive/hbase_ddl.q.out | 2 - .../src/test/results/positive/hbase_queries.q.out | 1 - .../src/test/results/positive/hbasestats.q.out | 5 - .../hadoop/hive/ql/ddl/table/CreateTableDesc.java | 2 +- .../ql/ddl/table/CreateTableLikeOperation.java | 14 +- .../queries/clientpositive/partition_discovery.q | 16 ++ .../test/results/clientpositive/create_like.q.out | 1 - .../results/clientpositive/create_like_view.q.out | 1 - .../clientpositive/default_file_format.q.out | 4 - .../results/clientpositive/druid/druid_topn.q.out | 1 - .../druid/druidkafkamini_basic.q.out | 2 - .../druid/druidmini_expressions.q.out | 2 - .../test/results/clientpositive/druid_topn.q.out | 1 - .../kafka/kafka_storage_handler.q.out | 4 - .../clientpositive/llap/external_table_purge.q.out | 2 - .../test/results/clientpositive/llap/mm_exim.q.out | 1 - .../llap/strict_managed_tables2.q.out | 2 - .../clientpositive/llap/whroot_external1.q.out | 6 - .../clientpositive/partition_discovery.q.out | 197 + .../clientpositive/show_create_table_alter.q.out | 5 - .../clientpositive/show_create_table_serde.q.out | 1 - .../clientpositive/spark/stats_noscan_2.q.out | 2 - .../results/clientpositive/stats_noscan_2.q.out| 2 - .../temp_table_display_colstats_tbllvl.q.out | 5 - .../org/apache/hadoop/hive/metastore/Msck.java | 9 +- 27 files changed, 229 insertions(+), 62 deletions(-) diff --git a/hbase-handler/src/test/results/positive/external_table_ppd.q.out b/hbase-handler/src/test/results/positive/external_table_ppd.q.out index 40abb42..139e35b 100644 --- a/hbase-handler/src/test/results/positive/external_table_ppd.q.out +++ b/hbase-handler/src/test/results/positive/external_table_ppd.q.out @@ -60,7 +60,6 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"bigint_col\":\"true\",\"boolean_col\":\"true\",\"double_col\":\"true\",\"float_col\":\"true\",\"int_col\":\"true\",\"key\":\"true\",\"smallint_col\":\"true\",\"tinyint_col\":\"true\"}} EXTERNALTRUE bucketing_version 2 - discover.partitions true external.table.purgetrue hbase.table.default.storage.typebinary hbase.table.namet_hive diff --git a/hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out b/hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out index bf1a89d..1209c88 100644 --- a/hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out +++ b/hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out @@ -60,7 +60,6 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"bigint_col\":\"true\",\"boolean_col\":\"true\",\"double_col\":\"true\",\"float_col\":\"true\",\"int_col\":\"true\",\"key\":\"true\",\"smallint_col\":\"true\",\"tinyint_col\":\"true\"}} EXTERNALTRUE bucketing_version 2 - discover.partitions true external.table.purgetrue hbase.table.default.storage.typebinary hbase.table.namet_hive @@ -243,7 +242,6 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"bigint_col\":\"true\",\"boolean_col\":\"true\",\"double_col\":\"true\&quo
[hive] branch master updated: HIVE-20656: Sensible defaults: Map aggregation memory configs are too aggressive (Prasanth Jayachandran reviewed by Gopal V)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new e7f7fe3 HIVE-20656: Sensible defaults: Map aggregation memory configs are too aggressive (Prasanth Jayachandran reviewed by Gopal V) e7f7fe3 is described below commit e7f7fe3b1cf443823a05e5409f55c55475fb5b48 Author: Prasanth Jayachandran AuthorDate: Fri Mar 8 18:52:15 2019 -0800 HIVE-20656: Sensible defaults: Map aggregation memory configs are too aggressive (Prasanth Jayachandran reviewed by Gopal V) --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 4 ++-- .../results/clientpositive/groupby_position.q.out | 24 +++--- .../perf/tez/constraints/query94.q.out | 4 ++-- .../perf/tez/constraints/query95.q.out | 4 ++-- .../results/clientpositive/perf/tez/query94.q.out | 4 ++-- .../results/clientpositive/perf/tez/query95.q.out | 4 ++-- 6 files changed, 22 insertions(+), 22 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 0dea099..076035b 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1741,14 +1741,14 @@ public class HiveConf extends Configuration { "How many rows with the same key value should be cached in memory per smb joined table."), HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 10, "Number of rows after which size of the grouping keys/aggregation classes is performed"), -HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.99, +HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.5, "Portion of total memory to be used by map-side group aggregation hash table"), HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY("hive.mapjoin.followby.map.aggr.hash.percentmemory", (float) 0.3, "Portion of total memory to be used by map-side group aggregation hash table, when this group by is followed by map join"), HIVEMAPAGGRMEMORYTHRESHOLD("hive.map.aggr.hash.force.flush.memory.threshold", (float) 0.9, "The max memory to be used by map-side group aggregation hash table.\n" + "If the memory usage is higher than this number, force to flush data"), -HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.5, +HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.99, "Hash aggregation will be turned off if the ratio between hash table size and input rows is bigger than this number. \n" + "Set to 1 to make sure hash aggregation is never turned off."), HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true, diff --git a/ql/src/test/results/clientpositive/groupby_position.q.out b/ql/src/test/results/clientpositive/groupby_position.q.out index 296279a..7305df0 100644 --- a/ql/src/test/results/clientpositive/groupby_position.q.out +++ b/ql/src/test/results/clientpositive/groupby_position.q.out @@ -69,7 +69,7 @@ STAGE PLANS: keys: key (type: string), value (type: string), substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 -Statistics: Num rows: 83 Data size: 30710 Basic stats: COMPLETE Column stats: COMPLETE +Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -168,7 +168,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 30710 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -176,14 +176,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 30876 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), CAST( _col2 AS STRIN
[hive] branch master updated: HIVE-21415: Parallel build is failing, trying to download incorrect hadoop-hdfs-client version (Prasanth Jayachandran reviewed by Vineet Garg)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 6261b0d HIVE-21415: Parallel build is failing, trying to download incorrect hadoop-hdfs-client version (Prasanth Jayachandran reviewed by Vineet Garg) 6261b0d is described below commit 6261b0dc2b628ad23bf100ce2f3f14b688af384e Author: Prasanth Jayachandran AuthorDate: Fri Mar 8 18:21:47 2019 -0800 HIVE-21415: Parallel build is failing, trying to download incorrect hadoop-hdfs-client version (Prasanth Jayachandran reviewed by Vineet Garg) --- hplsql/pom.xml | 16 kryo-registrator/pom.xml | 18 +- packaging/pom.xml| 16 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/hplsql/pom.xml b/hplsql/pom.xml index 4ad9ef9..11eb14f 100644 --- a/hplsql/pom.xml +++ b/hplsql/pom.xml @@ -70,6 +70,22 @@ true + org.apache.hadoop + hadoop-hdfs-client + ${hadoop.version} + true + + + org.slf4j + slf4j-log4j12 + + + commons-logging + commons-logging + + + + junit junit ${junit.version} diff --git a/kryo-registrator/pom.xml b/kryo-registrator/pom.xml index 477c40d..ea2eb65 100644 --- a/kryo-registrator/pom.xml +++ b/kryo-registrator/pom.xml @@ -44,6 +44,22 @@ ${spark.version} true + + org.apache.hadoop + hadoop-hdfs-client + ${hadoop.version} + true + + + org.slf4j + slf4j-log4j12 + + + commons-logging + commons-logging + + + - \ No newline at end of file + diff --git a/packaging/pom.xml b/packaging/pom.xml index 202ae34..f640a05 100644 --- a/packaging/pom.xml +++ b/packaging/pom.xml @@ -302,6 +302,22 @@ hive-kryo-registrator ${project.version} + + org.apache.hadoop + hadoop-hdfs-client + ${hadoop.version} + true + + + org.slf4j + slf4j-log4j12 + + + commons-logging + commons-logging + + +
[hive] branch master updated: HIVE-21254: Pre-upgrade tool should handle exceptions and skip db/tables (Prasanth Jayachandran reviewed by Jason Dere, Eugene Koifman, Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new acec83f HIVE-21254: Pre-upgrade tool should handle exceptions and skip db/tables (Prasanth Jayachandran reviewed by Jason Dere, Eugene Koifman, Ashutosh Chauhan) acec83f is described below commit acec83f3b7718250e6317ec5890a5ad7a014f10e Author: Prasanth Jayachandran AuthorDate: Tue Feb 19 23:06:21 2019 -0800 HIVE-21254: Pre-upgrade tool should handle exceptions and skip db/tables (Prasanth Jayachandran reviewed by Jason Dere, Eugene Koifman, Ashutosh Chauhan) --- .../hadoop/hive/upgrade/acid/PreUpgradeTool.java | 93 --- .../hive/upgrade/acid/TestPreUpgradeTool.java | 172 - 2 files changed, 207 insertions(+), 58 deletions(-) diff --git a/upgrade-acid/pre-upgrade/src/main/java/org/apache/hadoop/hive/upgrade/acid/PreUpgradeTool.java b/upgrade-acid/pre-upgrade/src/main/java/org/apache/hadoop/hive/upgrade/acid/PreUpgradeTool.java index 04782a6..0e3e3e2 100644 --- a/upgrade-acid/pre-upgrade/src/main/java/org/apache/hadoop/hive/upgrade/acid/PreUpgradeTool.java +++ b/upgrade-acid/pre-upgrade/src/main/java/org/apache/hadoop/hive/upgrade/acid/PreUpgradeTool.java @@ -25,6 +25,7 @@ import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; +import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -54,6 +55,7 @@ import org.apache.hadoop.hive.ql.io.orc.OrcFile; import org.apache.hadoop.hive.ql.io.orc.Reader; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.shims.HadoopShims; +import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.common.util.HiveVersionInfo; import org.apache.thrift.TException; @@ -223,39 +225,74 @@ public class PreUpgradeTool { boolean isAcidEnabled = isAcidEnabled(conf); IMetaStoreClient hms = getHMS(conf); LOG.debug("Looking for databases"); -List databases = hms.getAllDatabases();//TException -LOG.debug("Found " + databases.size() + " databases to process"); +String exceptionMsg = null; +List databases; List compactions = new ArrayList<>(); final CompactionMetaInfo compactionMetaInfo = new CompactionMetaInfo(); ValidTxnList txns = null; Hive db = null; -if(execute) { - db = Hive.get(conf); -} +try { + databases = hms.getAllDatabases();//TException + LOG.debug("Found " + databases.size() + " databases to process"); + if (execute) { +db = Hive.get(conf); + } -for(String dbName : databases) { - List tables = hms.getAllTables(dbName); - LOG.debug("found " + tables.size() + " tables in " + dbName); - for(String tableName : tables) { -Table t = hms.getTable(dbName, tableName); -LOG.debug("processing table " + Warehouse.getQualifiedName(t)); -if(isAcidEnabled) { - //if acid is off, there can't be any acid tables - nothing to compact - if(txns == null) { + for (String dbName : databases) { +try { + List tables = hms.getAllTables(dbName); + LOG.debug("found " + tables.size() + " tables in " + dbName); + for (String tableName : tables) { +try { + Table t = hms.getTable(dbName, tableName); + LOG.debug("processing table " + Warehouse.getQualifiedName(t)); + if (isAcidEnabled) { +//if acid is off, there can't be any acid tables - nothing to compact +if (txns == null) { /* This API changed from 2.x to 3.0. so this won't even compile with 3.0 but it doesn't need to since we only run this preUpgrade */ -TxnStore txnHandler = TxnUtils.getTxnStore(conf); -txns = TxnUtils.createValidCompactTxnList(txnHandler.getOpenTxnsInfo()); + TxnStore txnHandler = TxnUtils.getTxnStore(conf); + txns = TxnUtils.createValidCompactTxnList(txnHandler.getOpenTxnsInfo()); +} +List compactionCommands = + getCompactionCommands(t, conf, hms, compactionMetaInfo, execute, db, txns); +compactions.addAll(compactionCommands); + } + /*todo: handle renaming files somewhere*/ +} catch (Exception e) { + if (isAccessControlException(e)) { +// this
[hive] branch master updated: HIVE-21212: LLAP: shuffle port config uses internal configuration (Prasanth Jayachandran reviewed by Gopal V)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new e540cf8 HIVE-21212: LLAP: shuffle port config uses internal configuration (Prasanth Jayachandran reviewed by Gopal V) e540cf8 is described below commit e540cf8238680453fc04e861dbc332e7f56b904a Author: Prasanth Jayachandran AuthorDate: Tue Feb 12 00:37:26 2019 -0800 HIVE-21212: LLAP: shuffle port config uses internal configuration (Prasanth Jayachandran reviewed by Gopal V) --- .../src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java index 2279f8d..41eca8f 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java @@ -534,8 +534,7 @@ public class LlapDaemon extends CompositeService implements ContainerRunner, Lla new String[0] : StringUtils.getTrimmedStrings(localDirList); int rpcPort = HiveConf.getIntVar(daemonConf, ConfVars.LLAP_DAEMON_RPC_PORT); int mngPort = HiveConf.getIntVar(daemonConf, ConfVars.LLAP_MANAGEMENT_RPC_PORT); - int shufflePort = daemonConf - .getInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, ShuffleHandler.DEFAULT_SHUFFLE_PORT); + int shufflePort = HiveConf.getIntVar(daemonConf, ConfVars.LLAP_DAEMON_YARN_SHUFFLE_PORT); int webPort = HiveConf.getIntVar(daemonConf, ConfVars.LLAP_DAEMON_WEB_PORT); LlapDaemonInfo.initialize(appName, daemonConf);
[hive] branch master updated: HIVE-21103: PartitionManagementTask should not modify DN configs to avoid closing persistence manager (Prasanth Jayachandran reviewed by Sankar Hariappan)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 7e89da8 HIVE-21103: PartitionManagementTask should not modify DN configs to avoid closing persistence manager (Prasanth Jayachandran reviewed by Sankar Hariappan) 7e89da8 is described below commit 7e89da8bc2730873147e47fb80a2f9e7a1e5e938 Author: Prasanth Jayachandran AuthorDate: Tue Feb 12 00:30:23 2019 -0800 HIVE-21103: PartitionManagementTask should not modify DN configs to avoid closing persistence manager (Prasanth Jayachandran reviewed by Sankar Hariappan) --- .../java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java| 5 +++-- .../org/apache/hadoop/hive/metastore/PartitionManagementTask.java| 3 --- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index 313f87b..7c1e30a 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -731,9 +731,10 @@ public class MetastoreConf { "metastore.partition.management.table.types", "MANAGED_TABLE,EXTERNAL_TABLE", "Comma separated list of table types to use for partition management"), PARTITION_MANAGEMENT_TASK_THREAD_POOL_SIZE("metastore.partition.management.task.thread.pool.size", - "metastore.partition.management.task.thread.pool.size", 5, + "metastore.partition.management.task.thread.pool.size", 3, "Partition management uses thread pool on to which tasks are submitted for discovering and retaining the\n" + - "partitions. This determines the size of the thread pool."), + "partitions. This determines the size of the thread pool. Note: Increasing the thread pool size will cause\n" + + "threadPoolSize * maxConnectionPoolSize connections to backend db"), PARTITION_MANAGEMENT_CATALOG_NAME("metastore.partition.management.catalog.name", "metastore.partition.management.catalog.name", "hive", "Automatic partition management will look for tables under the specified catalog name"), diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java index 901bf80..59001b5 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java @@ -190,9 +190,6 @@ public class PartitionManagementTask implements MetastoreTaskThread { // when invalid path is encountered as these are background threads. We just want to skip and move on. Users will // have to fix the invalid paths via external means. conf.set(MetastoreConf.ConfVars.MSCK_PATH_VALIDATION.getVarname(), "skip"); -// since msck runs in thread pool and each of them create their own metastore client, we don't want explosion of -// connections to metastore for embedded mode. Also we don't need too many db connections anyway. - conf.setInt(MetastoreConf.ConfVars.CONNECTION_POOLING_MAX_CONNECTIONS.getVarname(), 2); } private static class MsckThread implements Runnable {
[hive] branch master updated: HIVE-20841: LLAP: Make dynamic ports configurable (Prasanth Jayachandran reviewed by Sergey Shelukhin)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 567830e HIVE-20841: LLAP: Make dynamic ports configurable (Prasanth Jayachandran reviewed by Sergey Shelukhin) 567830e is described below commit 567830e9ca7d28e92cc3480602394f47a79ba308 Author: Prasanth Jayachandran AuthorDate: Tue Feb 12 00:28:09 2019 -0800 HIVE-20841: LLAP: Make dynamic ports configurable (Prasanth Jayachandran reviewed by Sergey Shelukhin) --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 4 .../hive/llap/tezplugins/helpers/LlapTaskUmbilicalServer.java | 8 ++-- .../apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java | 6 +- .../hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java | 2 +- .../hive/llap/tezplugins/endpoint/LlapPluginServerImpl.java | 7 +-- 5 files changed, 21 insertions(+), 6 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 2156ff1..4a86b0a 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -4165,6 +4165,8 @@ public class HiveConf extends Configuration { LLAP_DAEMON_RPC_NUM_HANDLERS("hive.llap.daemon.rpc.num.handlers", 5, "Number of RPC handlers for LLAP daemon.", "llap.daemon.rpc.num.handlers"), +LLAP_PLUGIN_RPC_PORT("hive.llap.plugin.rpc.port", 0, + "Port to use for LLAP plugin rpc server"), LLAP_PLUGIN_RPC_NUM_HANDLERS("hive.llap.plugin.rpc.num.handlers", 1, "Number of RPC handlers for AM LLAP plugin endpoint."), LLAP_DAEMON_WORK_DIRS("hive.llap.daemon.work.dirs", "", @@ -4338,6 +4340,8 @@ public class HiveConf extends Configuration { "Sleep duration (in milliseconds) to wait before retrying on error when obtaining a\n" + "connection to LLAP daemon from Tez AM.", "llap.task.communicator.connection.sleep-between-retries-millis"), +LLAP_TASK_UMBILICAL_SERVER_PORT("hive.llap.daemon.umbilical.port", 0, + "LLAP task umbilical server RPC port"), LLAP_DAEMON_WEB_PORT("hive.llap.daemon.web.port", 15002, "LLAP daemon web UI port.", "llap.daemon.service.port"), LLAP_DAEMON_WEB_SSL("hive.llap.daemon.web.ssl", false, diff --git a/llap-client/src/java/org/apache/hadoop/hive/llap/tezplugins/helpers/LlapTaskUmbilicalServer.java b/llap-client/src/java/org/apache/hadoop/hive/llap/tezplugins/helpers/LlapTaskUmbilicalServer.java index 89cb6fb..a16c0af 100644 --- a/llap-client/src/java/org/apache/hadoop/hive/llap/tezplugins/helpers/LlapTaskUmbilicalServer.java +++ b/llap-client/src/java/org/apache/hadoop/hive/llap/tezplugins/helpers/LlapTaskUmbilicalServer.java @@ -26,6 +26,7 @@ import java.util.concurrent.atomic.AtomicLong; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto; import org.apache.hadoop.hive.llap.protocol.LlapTaskUmbilicalProtocol; import org.apache.hadoop.ipc.RPC; @@ -53,11 +54,14 @@ public class LlapTaskUmbilicalServer { public LlapTaskUmbilicalServer(Configuration conf, LlapTaskUmbilicalProtocol umbilical, int numHandlers) throws IOException { jobTokenSecretManager = new JobTokenSecretManager(); - +int umbilicalPort = HiveConf.getIntVar(conf, HiveConf.ConfVars.LLAP_TASK_UMBILICAL_SERVER_PORT); +if (umbilicalPort <= 0) { + umbilicalPort = 0; +} server = new RPC.Builder(conf) .setProtocol(LlapTaskUmbilicalProtocol.class) .setBindAddress("0.0.0.0") -.setPort(0) +.setPort(umbilicalPort) .setInstance(umbilical) .setNumHandlers(numHandlers) .setSecretManager(jobTokenSecretManager).build(); diff --git a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java index 0120bb6..dc10f22 100644 --- a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java +++ b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java @@ -255,10 +255,14 @@ public class LlapTaskCommunicator extends TezTaskCommunicatorImpl { int numHandlers = HiveConf.getIntVar(conf, ConfVars.LLAP_TASK_COMMUNICATOR_LISTENER_THREAD_COUNT); + int umbilicalPort = HiveConf.getIntVar(conf, ConfVars.LLAP_TASK_UMBILICAL_SERVER_PORT); +
[hive] branch branch-3 updated: HIVE-20841: LLAP: Make dynamic ports configurable (Prasanth Jayachandran reviewed by Sergey Shelukhin)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3 by this push: new d9b3833 HIVE-20841: LLAP: Make dynamic ports configurable (Prasanth Jayachandran reviewed by Sergey Shelukhin) d9b3833 is described below commit d9b3833e0c7b0ee008d1dd39bacc8f758170156f Author: Prasanth Jayachandran AuthorDate: Tue Feb 12 00:28:09 2019 -0800 HIVE-20841: LLAP: Make dynamic ports configurable (Prasanth Jayachandran reviewed by Sergey Shelukhin) --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 4 .../hive/llap/tezplugins/helpers/LlapTaskUmbilicalServer.java | 8 ++-- .../apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java | 6 +- .../hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java | 2 +- .../hive/llap/tezplugins/endpoint/LlapPluginServerImpl.java | 7 +-- 5 files changed, 21 insertions(+), 6 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index a04ef38..3bb482f 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -4010,6 +4010,8 @@ public class HiveConf extends Configuration { LLAP_DAEMON_RPC_NUM_HANDLERS("hive.llap.daemon.rpc.num.handlers", 5, "Number of RPC handlers for LLAP daemon.", "llap.daemon.rpc.num.handlers"), +LLAP_PLUGIN_RPC_PORT("hive.llap.plugin.rpc.port", 0, + "Port to use for LLAP plugin rpc server"), LLAP_PLUGIN_RPC_NUM_HANDLERS("hive.llap.plugin.rpc.num.handlers", 1, "Number of RPC handlers for AM LLAP plugin endpoint."), LLAP_DAEMON_WORK_DIRS("hive.llap.daemon.work.dirs", "", @@ -4182,6 +4184,8 @@ public class HiveConf extends Configuration { "Sleep duration (in milliseconds) to wait before retrying on error when obtaining a\n" + "connection to LLAP daemon from Tez AM.", "llap.task.communicator.connection.sleep-between-retries-millis"), +LLAP_TASK_UMBILICAL_SERVER_PORT("hive.llap.daemon.umbilical.port", 0, + "LLAP task umbilical server RPC port"), LLAP_DAEMON_WEB_PORT("hive.llap.daemon.web.port", 15002, "LLAP daemon web UI port.", "llap.daemon.service.port"), LLAP_DAEMON_WEB_SSL("hive.llap.daemon.web.ssl", false, diff --git a/llap-client/src/java/org/apache/hadoop/hive/llap/tezplugins/helpers/LlapTaskUmbilicalServer.java b/llap-client/src/java/org/apache/hadoop/hive/llap/tezplugins/helpers/LlapTaskUmbilicalServer.java index 89cb6fb..a16c0af 100644 --- a/llap-client/src/java/org/apache/hadoop/hive/llap/tezplugins/helpers/LlapTaskUmbilicalServer.java +++ b/llap-client/src/java/org/apache/hadoop/hive/llap/tezplugins/helpers/LlapTaskUmbilicalServer.java @@ -26,6 +26,7 @@ import java.util.concurrent.atomic.AtomicLong; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto; import org.apache.hadoop.hive.llap.protocol.LlapTaskUmbilicalProtocol; import org.apache.hadoop.ipc.RPC; @@ -53,11 +54,14 @@ public class LlapTaskUmbilicalServer { public LlapTaskUmbilicalServer(Configuration conf, LlapTaskUmbilicalProtocol umbilical, int numHandlers) throws IOException { jobTokenSecretManager = new JobTokenSecretManager(); - +int umbilicalPort = HiveConf.getIntVar(conf, HiveConf.ConfVars.LLAP_TASK_UMBILICAL_SERVER_PORT); +if (umbilicalPort <= 0) { + umbilicalPort = 0; +} server = new RPC.Builder(conf) .setProtocol(LlapTaskUmbilicalProtocol.class) .setBindAddress("0.0.0.0") -.setPort(0) +.setPort(umbilicalPort) .setInstance(umbilical) .setNumHandlers(numHandlers) .setSecretManager(jobTokenSecretManager).build(); diff --git a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java index 5d4ce22..2dfd359 100644 --- a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java +++ b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java @@ -254,10 +254,14 @@ public class LlapTaskCommunicator extends TezTaskCommunicatorImpl { int numHandlers = HiveConf.getIntVar(conf, ConfVars.LLAP_TASK_COMMUNICATOR_LISTENER_THREAD_COUNT); + int umbilicalPort = HiveConf.getIntVar(conf, ConfVars.LLAP_TASK_UMBILICAL_SERVER_PORT); +
[hive] branch master updated: HIVE-21222: ACID: When there are no delete deltas skip finding min max keys (Prasanth Jayachandran reviewed by Eugene Koifman)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 1368bd0 HIVE-21222: ACID: When there are no delete deltas skip finding min max keys (Prasanth Jayachandran reviewed by Eugene Koifman) 1368bd0 is described below commit 1368bd07b5dbc3747390c051e8512a2b41217933 Author: Prasanth Jayachandran AuthorDate: Tue Feb 12 00:22:21 2019 -0800 HIVE-21222: ACID: When there are no delete deltas skip finding min max keys (Prasanth Jayachandran reviewed by Eugene Koifman) --- .../ql/io/orc/VectorizedOrcAcidRowBatchReader.java | 3 ++- .../hive/ql/io/orc/TestInputOutputFormat.java | 4 +-- .../orc/TestVectorizedOrcAcidRowBatchReader.java | 31 -- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java index 6d1ca722..2349cda 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java @@ -390,7 +390,8 @@ public class VectorizedOrcAcidRowBatchReader private OrcRawRecordMerger.KeyInterval findMinMaxKeys( OrcSplit orcSplit, Configuration conf, Reader.Options deleteEventReaderOptions) throws IOException { -if(!HiveConf.getBoolVar(conf, ConfVars.FILTER_DELETE_EVENTS)) { +final boolean noDeleteDeltas = getDeleteDeltaDirsFromSplit(orcSplit).length == 0; +if(!HiveConf.getBoolVar(conf, ConfVars.FILTER_DELETE_EVENTS) || noDeleteDeltas) { LOG.debug("findMinMaxKeys() " + ConfVars.FILTER_DELETE_EVENTS + "=false"); return new OrcRawRecordMerger.KeyInterval(null, null); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index 50ebbfa..5c13d45 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -802,8 +802,8 @@ public class TestInputOutputFormat { int readsAfter = fs.statistics.getReadOps(); System.out.println("STATS TRACE END - " + testCaseName.getMethodName()); int delta = readsAfter - readsBefore; - //HIVE-16812 adds 1 read of the footer of each file - assertEquals(16, delta); + //HIVE-16812 adds 1 read of the footer of each file (only if delete delta exists) + assertEquals(8, delta); } finally { MockFileSystem.clearGlobalFiles(); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java index 3382288..a8f18d1 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.io.AcidInputFormat; import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.BucketCodec; @@ -60,6 +61,8 @@ import org.junit.Test; import static org.junit.Assert.*; +import com.google.common.collect.Lists; + /** * This class tests the VectorizedOrcAcidRowBatchReader by creating an actual split and a set * of delete delta files. The split is on an insert delta and there are multiple delete deltas @@ -549,7 +552,7 @@ public class TestVectorizedOrcAcidRowBatchReader { OrcSplit split = new OrcSplit(acidFilePath, null, stripe.getOffset() + 50, stripe.getLength() - 100, -new String[] {"localhost"}, null, false, true, new ArrayList<>(), +new String[] {"localhost"}, null, false, true, Lists.newArrayList(new AcidInputFormat.DeltaMetaData()), fileLength, fileLength, root, null); validateKeyInterval(split, new RecordIdentifier(1, 1, 1), @@ -560,7 +563,7 @@ public class TestVectorizedOrcAcidRowBatchReader { split = new OrcSplit(acidFilePath, null, stripe.getOffset() + 50, stripe.getLength() - 100, -new String[] {"localhost"}, null, false, true, new ArrayList<>(), +new String[] {"localhost"}, null, false, true, Lists.newArrayList(new AcidInputFormat.DeltaMetaData()), fileLength, fileL
[hive] branch master updated: HIVE-21235: LLAP: make the name of log4j2 properties file configurable (Prasanth Jayachandran reviewed by Ashuotsh Chauhan)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new cfe2e9b HIVE-21235: LLAP: make the name of log4j2 properties file configurable (Prasanth Jayachandran reviewed by Ashuotsh Chauhan) cfe2e9b is described below commit cfe2e9b307cd6c401ee3db2369eaa55e80b2e145 Author: Prasanth Jayachandran AuthorDate: Tue Feb 12 00:20:21 2019 -0800 HIVE-21235: LLAP: make the name of log4j2 properties file configurable (Prasanth Jayachandran reviewed by Ashuotsh Chauhan) --- llap-server/bin/runLlapDaemon.sh | 7 ++- .../org/apache/hadoop/hive/llap/daemon/impl/LlapConstants.java | 1 + .../java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java | 7 +-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/llap-server/bin/runLlapDaemon.sh b/llap-server/bin/runLlapDaemon.sh index 30a62f2..69642f4 100755 --- a/llap-server/bin/runLlapDaemon.sh +++ b/llap-server/bin/runLlapDaemon.sh @@ -27,6 +27,7 @@ set -x # LLAP_DAEMON_LOGGER - default is console # LLAP_DAEMON_LOG_DIR - defaults to /tmp # LLAP_DAEMON_TMP_DIR - defaults to /tmp +# LLAP_LOG4J2_PROPERTIES_FILE_NAME - defaults to llap-daemon-log4j2.properties # LLAP_DAEMON_LOG_FILE - # LLAP_DAEMON_CONF_DIR @@ -90,6 +91,10 @@ if [ "$LLAP_DAEMON_LOGFILE" = "" ]; then LLAP_DAEMON_LOG_FILE='llapdaemon.log' fi +if [ "LLAP_LOG4J2_PROPERTIES_FILE_NAME" = "" ]; then + LLAP_LOG4J2_PROPERTIES_FILE_NAME='llap-daemon-log4j2.properties' +fi + if [ "$LLAP_DAEMON_HEAPSIZE" = "" ]; then LLAP_DAEMON_HEAPSIZE=4096 fi @@ -121,7 +126,7 @@ if [ -n "$LLAP_DAEMON_TMP_DIR" ]; then export LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} -Djava.io.tmpdir=$LLAP_DAEMON_TMP_DIR" fi -LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} -Dlog4j.configurationFile=llap-daemon-log4j2.properties" +LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} -Dlog4j.configurationFile=${LLAP_LOG4J2_PROPERTIES_FILE_NAME}" LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} -Dllap.daemon.log.dir=${LLAP_DAEMON_LOG_DIR}" LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} -Dllap.daemon.log.file=${LLAP_DAEMON_LOG_FILE}" LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} -Dllap.daemon.root.logger=${LLAP_DAEMON_LOGGER}" diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapConstants.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapConstants.java index 112173c..b421e0b 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapConstants.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapConstants.java @@ -19,6 +19,7 @@ import org.apache.hadoop.hive.common.classification.InterfaceAudience; @InterfaceAudience.Private public class LlapConstants { public static final String LOG4j2_PROPERTIES_FILE = "llap-daemon-log4j2.properties"; + public static final String LLAP_LOG4J2_PROPERTIES_FILE_NAME_ENV = "LLAP_LOG4J2_PROPERTIES_FILE_NAME"; public static final String LLAP_HADOOP_METRICS2_PROPERTIES_FILE = "hadoop-metrics2-llapdaemon.properties"; public static final String HADOOP_METRICS2_PROPERTIES_FILE = "hadoop-metrics2.properties"; diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java index 940be0e..2279f8d 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java @@ -352,8 +352,11 @@ public class LlapDaemon extends CompositeService implements ContainerRunner, Lla private static void initializeLogging(final Configuration conf) { long start = System.currentTimeMillis(); -URL llap_l4j2 = LlapDaemon.class.getClassLoader().getResource( -LlapConstants.LOG4j2_PROPERTIES_FILE); +String log4j2FileName = System.getenv(LlapConstants.LLAP_LOG4J2_PROPERTIES_FILE_NAME_ENV); +if (log4j2FileName == null || log4j2FileName.isEmpty()) { + log4j2FileName = LlapConstants.LOG4j2_PROPERTIES_FILE; +} +URL llap_l4j2 = LlapDaemon.class.getClassLoader().getResource(log4j2FileName); if (llap_l4j2 != null) { final boolean async = LogUtils.checkAndSetAsyncLogging(conf); // required for MDC based routing appender so that child threads can inherit the MDC context
[hive] branch branch-3 updated: HIVE-21235: LLAP: make the name of log4j2 properties file configurable (Prasanth Jayachandran reviewed by Ashuotsh Chauhan)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3 by this push: new 05cedd4 HIVE-21235: LLAP: make the name of log4j2 properties file configurable (Prasanth Jayachandran reviewed by Ashuotsh Chauhan) 05cedd4 is described below commit 05cedd4ae7c0dc00292665590726e343ab76f211 Author: Prasanth Jayachandran AuthorDate: Tue Feb 12 00:20:21 2019 -0800 HIVE-21235: LLAP: make the name of log4j2 properties file configurable (Prasanth Jayachandran reviewed by Ashuotsh Chauhan) --- llap-server/bin/runLlapDaemon.sh | 7 ++- .../org/apache/hadoop/hive/llap/daemon/impl/LlapConstants.java | 1 + .../java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java | 7 +-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/llap-server/bin/runLlapDaemon.sh b/llap-server/bin/runLlapDaemon.sh index 1c128c6..ae2621c 100755 --- a/llap-server/bin/runLlapDaemon.sh +++ b/llap-server/bin/runLlapDaemon.sh @@ -27,6 +27,7 @@ set -x # LLAP_DAEMON_LOGGER - default is console # LLAP_DAEMON_LOG_DIR - defaults to /tmp # LLAP_DAEMON_TMP_DIR - defaults to /tmp +# LLAP_LOG4J2_PROPERTIES_FILE_NAME - defaults to llap-daemon-log4j2.properties # LLAP_DAEMON_LOG_FILE - # LLAP_DAEMON_CONF_DIR @@ -90,6 +91,10 @@ if [ "$LLAP_DAEMON_LOGFILE" = "" ]; then LLAP_DAEMON_LOG_FILE='llapdaemon.log' fi +if [ "LLAP_LOG4J2_PROPERTIES_FILE_NAME" = "" ]; then + LLAP_LOG4J2_PROPERTIES_FILE_NAME='llap-daemon-log4j2.properties' +fi + if [ "$LLAP_DAEMON_HEAPSIZE" = "" ]; then LLAP_DAEMON_HEAPSIZE=4096 fi @@ -121,7 +126,7 @@ if [ -n "$LLAP_DAEMON_TMP_DIR" ]; then export LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} -Djava.io.tmpdir=$LLAP_DAEMON_TMP_DIR" fi -LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} -Dlog4j.configurationFile=llap-daemon-log4j2.properties" +LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} -Dlog4j.configurationFile=${LLAP_LOG4J2_PROPERTIES_FILE_NAME}" LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} -Dllap.daemon.log.dir=${LLAP_DAEMON_LOG_DIR}" LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} -Dllap.daemon.log.file=${LLAP_DAEMON_LOG_FILE}" LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} -Dllap.daemon.root.logger=${LLAP_DAEMON_LOGGER}" diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapConstants.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapConstants.java index 112173c..b421e0b 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapConstants.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapConstants.java @@ -19,6 +19,7 @@ import org.apache.hadoop.hive.common.classification.InterfaceAudience; @InterfaceAudience.Private public class LlapConstants { public static final String LOG4j2_PROPERTIES_FILE = "llap-daemon-log4j2.properties"; + public static final String LLAP_LOG4J2_PROPERTIES_FILE_NAME_ENV = "LLAP_LOG4J2_PROPERTIES_FILE_NAME"; public static final String LLAP_HADOOP_METRICS2_PROPERTIES_FILE = "hadoop-metrics2-llapdaemon.properties"; public static final String HADOOP_METRICS2_PROPERTIES_FILE = "hadoop-metrics2.properties"; diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java index 52990c5..b13a2fd 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java @@ -338,8 +338,11 @@ public class LlapDaemon extends CompositeService implements ContainerRunner, Lla private static void initializeLogging(final Configuration conf) { long start = System.currentTimeMillis(); -URL llap_l4j2 = LlapDaemon.class.getClassLoader().getResource( -LlapConstants.LOG4j2_PROPERTIES_FILE); +String log4j2FileName = System.getenv(LlapConstants.LLAP_LOG4J2_PROPERTIES_FILE_NAME_ENV); +if (log4j2FileName == null || log4j2FileName.isEmpty()) { + log4j2FileName = LlapConstants.LOG4j2_PROPERTIES_FILE; +} +URL llap_l4j2 = LlapDaemon.class.getClassLoader().getResource(log4j2FileName); if (llap_l4j2 != null) { final boolean async = LogUtils.checkAndSetAsyncLogging(conf); // required for MDC based routing appender so that child threads can inherit the MDC context
[hive] branch branch-3 updated: HIVE-21223: CachedStore returns null partition when partition does not exist (Prasanth Jayachandran reviewed by Vaibhav Gumashta)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3 by this push: new c0bb1db HIVE-21223: CachedStore returns null partition when partition does not exist (Prasanth Jayachandran reviewed by Vaibhav Gumashta) c0bb1db is described below commit c0bb1db0bf3fa6853be2777ba20fb9a56a713962 Author: Prasanth Jayachandran AuthorDate: Tue Feb 12 00:18:08 2019 -0800 HIVE-21223: CachedStore returns null partition when partition does not exist (Prasanth Jayachandran reviewed by Vaibhav Gumashta) --- .../main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java index 2b03d87..39cba08 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java @@ -1511,6 +1511,8 @@ public class CachedStore implements RawStore, Configurable { PrincipalPrivilegeSet privs = getPartitionPrivilegeSet(catName, dbName, tblName, partName, userName, groupNames); p.setPrivileges(privs); +} else { + throw new NoSuchObjectException("partition values=" + partVals.toString()); } return p; }
[hive] branch master updated: HIVE-21223: CachedStore returns null partition when partition does not exist (Prasanth Jayachandran reviewed by Vaibhav Gumashta)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 922b155 HIVE-21223: CachedStore returns null partition when partition does not exist (Prasanth Jayachandran reviewed by Vaibhav Gumashta) 922b155 is described below commit 922b155e85e9fdb5bcc27ac5874503483a591021 Author: Prasanth Jayachandran AuthorDate: Tue Feb 12 00:18:08 2019 -0800 HIVE-21223: CachedStore returns null partition when partition does not exist (Prasanth Jayachandran reviewed by Vaibhav Gumashta) --- .../main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java index 182d5cc..bded743 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java @@ -1884,6 +1884,8 @@ public class CachedStore implements RawStore, Configurable { PrincipalPrivilegeSet privs = getPartitionPrivilegeSet(catName, dbName, tblName, partName, userName, groupNames); p.setPrivileges(privs); +} else { + throw new NoSuchObjectException("partition values=" + partVals.toString()); } return p; }
[hive] branch master updated: HIVE-21009: Adding ability for user to set bind user (David McGinnis reviewed by Prasanth Jayachandran) (addendum)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 6508716 HIVE-21009: Adding ability for user to set bind user (David McGinnis reviewed by Prasanth Jayachandran) (addendum) 6508716 is described below commit 650871623ef435d359c07d4c90d76c439c45e976 Author: David McGinnis <656337+davidov...@users.noreply.github.com> AuthorDate: Wed Feb 6 22:22:20 2019 -0800 HIVE-21009: Adding ability for user to set bind user (David McGinnis reviewed by Prasanth Jayachandran) (addendum) --- service/src/test/resources/creds/test.jceks | Bin 0 -> 534 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/service/src/test/resources/creds/test.jceks b/service/src/test/resources/creds/test.jceks new file mode 100755 index 000..8d58c41 Binary files /dev/null and b/service/src/test/resources/creds/test.jceks differ
[hive] branch master updated: HIVE-21009: Adding ability for user to set bind user (David McGinnis reviewed by Prasanth Jayachandran)
This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 0e4d16b HIVE-21009: Adding ability for user to set bind user (David McGinnis reviewed by Prasanth Jayachandran) 0e4d16b is described below commit 0e4d16b462bf9abd7ec58e60936e24ee4302736c Author: David McGinnis <656337+davidov...@users.noreply.github.com> AuthorDate: Wed Feb 6 14:52:16 2019 -0800 HIVE-21009: Adding ability for user to set bind user (David McGinnis reviewed by Prasanth Jayachandran) --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 10 ++ service/pom.xml| 11 ++ .../auth/LdapAuthenticationProviderImpl.java | 32 +- .../auth/TestLdapAuthenticationProviderImpl.java | 113 + 4 files changed, 164 insertions(+), 2 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index a3b03ca..2156ff1 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3499,6 +3499,16 @@ public class HiveConf extends Configuration { "For example: (&(objectClass=group)(objectClass=top)(instanceType=4)(cn=Domain*)) \n" + "(&(objectClass=person)(|(sAMAccountName=admin)(|(memberOf=CN=Domain Admins,CN=Users,DC=domain,DC=com)" + "(memberOf=CN=Administrators,CN=Builtin,DC=domain,DC=com"), + HIVE_SERVER2_PLAIN_LDAP_BIND_USER("hive.server2.authentication.ldap.binddn", null, +"The user with which to bind to the LDAP server, and search for the full domain name " + +"of the user being authenticated.\n" + +"This should be the full domain name of the user, and should have search access across all " + +"users in the LDAP tree.\n" + +"If not specified, then the user being authenticated will be used as the bind user.\n" + +"For example: CN=bindUser,CN=Users,DC=subdomain,DC=domain,DC=com"), + HIVE_SERVER2_PLAIN_LDAP_BIND_PASSWORD("hive.server2.authentication.ldap.bindpw", null, +"The password for the bind user, to be used to search for the full name of the user being authenticated.\n" + +"If the username is specified, this parameter must also be specified."), HIVE_SERVER2_CUSTOM_AUTHENTICATION_CLASS("hive.server2.custom.authentication.class", null, "Custom authentication class. Used when property\n" + "'hive.server2.authentication' is set to 'CUSTOM'. Provided class\n" + diff --git a/service/pom.xml b/service/pom.xml index eca6f3b..30b7398 100644 --- a/service/pom.xml +++ b/service/pom.xml @@ -36,6 +36,17 @@ org.apache.hive + hive-common + ${project.version} + + + org.eclipse.jetty.aggregate + jetty-all + + + + + org.apache.hive hive-exec ${project.version} diff --git a/service/src/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java b/service/src/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java index 73bbb6b..0120513 100644 --- a/service/src/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java +++ b/service/src/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java @@ -18,9 +18,10 @@ package org.apache.hive.service.auth; import javax.security.sasl.AuthenticationException; - +import javax.naming.NamingException; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; +import java.io.IOException; import java.util.Iterator; import java.util.List; import org.apache.commons.lang.StringUtils; @@ -68,9 +69,36 @@ public class LdapAuthenticationProviderImpl implements PasswdAuthenticationProvi @Override public void Authenticate(String user, String password) throws AuthenticationException { DirSearch search = null; +String bindUser = this.conf.getVar(HiveConf.ConfVars.HIVE_SERVER2_PLAIN_LDAP_BIND_USER); +String bindPassword = null; +try { + char[] rawPassword = this.conf.getPassword(HiveConf.ConfVars.HIVE_SERVER2_PLAIN_LDAP_BIND_PASSWORD.toString()); + if (rawPassword != null) { +bindPassword = new String(rawPassword); + } +} catch (IOException e) { + bindPassword = null; +} +boolean usedBind = bindUser != null && bindPassword != null; +if (!usedBind) { + // If no bind user or bind password was specified, + // we assume the user we are authenticating has the ability to search + // the LDAP tre
hive git commit: HIVE-20785: Wrong key name in the JDBC DatabaseMetaData.getPrimaryKeys method (Guillaume Grossetie reviewed by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/branch-3 3db928668 -> 1ea6e49cf HIVE-20785: Wrong key name in the JDBC DatabaseMetaData.getPrimaryKeys method (Guillaume Grossetie reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1ea6e49c Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1ea6e49c Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1ea6e49c Branch: refs/heads/branch-3 Commit: 1ea6e49cf9eac41908ee2952917eba7bce656179 Parents: 3db9286 Author: Guillaume Grossetie Authored: Mon Dec 17 16:59:37 2018 -0800 Committer: Prasanth Jayachandran Committed: Mon Dec 17 17:02:08 2018 -0800 -- .../apache/hive/service/cli/operation/GetPrimaryKeysOperation.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/1ea6e49c/service/src/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java -- diff --git a/service/src/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java b/service/src/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java index e603fdd..2daa60e 100644 --- a/service/src/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java +++ b/service/src/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java @@ -60,7 +60,7 @@ PK_NAME String => primary key name (may be null) "Table name") .addPrimitiveColumn("COLUMN_NAME", Type.STRING_TYPE, "Column name") - .addPrimitiveColumn("KEQ_SEQ", Type.INT_TYPE, + .addPrimitiveColumn("KEY_SEQ", Type.INT_TYPE, "Sequence number within primary key") .addPrimitiveColumn("PK_NAME", Type.STRING_TYPE, "Primary key name (may be null)");
hive git commit: HIVE-20785: Wrong key name in the JDBC DatabaseMetaData.getPrimaryKeys method (Guillaume Grossetie reviewed by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/master ec5ce5da8 -> 87f8eccf9 HIVE-20785: Wrong key name in the JDBC DatabaseMetaData.getPrimaryKeys method (Guillaume Grossetie reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/87f8eccf Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/87f8eccf Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/87f8eccf Branch: refs/heads/master Commit: 87f8eccf92c4be11858b1559e501f854eb814542 Parents: ec5ce5d Author: Guillaume Grossetie Authored: Mon Dec 17 16:59:37 2018 -0800 Committer: Prasanth Jayachandran Committed: Mon Dec 17 17:01:14 2018 -0800 -- .../apache/hive/service/cli/operation/GetPrimaryKeysOperation.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/87f8eccf/service/src/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java -- diff --git a/service/src/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java b/service/src/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java index 55f4ab6..0e5d5c1 100644 --- a/service/src/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java +++ b/service/src/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java @@ -62,7 +62,7 @@ PK_NAME String => primary key name (may be null) "Table name") .addPrimitiveColumn("COLUMN_NAME", Type.STRING_TYPE, "Column name") - .addPrimitiveColumn("KEQ_SEQ", Type.INT_TYPE, + .addPrimitiveColumn("KEY_SEQ", Type.INT_TYPE, "Sequence number within primary key") .addPrimitiveColumn("PK_NAME", Type.STRING_TYPE, "Primary key name (may be null)");
hive git commit: HIVE-20979: Fix memory leak in hive streaming (Shubham Chaurasia reviewed by Prasanth, Eric, Ashutosh)
Repository: hive Updated Branches: refs/heads/branch-3 0a1bc3583 -> a7b3cf4bd HIVE-20979: Fix memory leak in hive streaming (Shubham Chaurasia reviewed by Prasanth, Eric, Ashutosh) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a7b3cf4b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a7b3cf4b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a7b3cf4b Branch: refs/heads/branch-3 Commit: a7b3cf4bd2239876b323ab544e12d547d875b6c4 Parents: 0a1bc35 Author: Shubham Chaurasia Authored: Mon Dec 10 01:33:00 2018 -0800 Committer: Prasanth Jayachandran Committed: Mon Dec 10 01:39:44 2018 -0800 -- .../java/org/apache/hive/streaming/AbstractRecordWriter.java | 5 + .../org/apache/hive/streaming/HiveStreamingConnection.java | 8 +++- 2 files changed, 12 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/a7b3cf4b/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java -- diff --git a/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java b/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java index 0408599..0653a5d 100644 --- a/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java +++ b/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java @@ -383,6 +383,11 @@ public abstract class AbstractRecordWriter implements RecordWriter { if (LOG.isDebugEnabled()) { logStats("Stats after close:"); } +try { + this.fs.close(); +} catch (IOException e) { + throw new StreamingIOFailure("Error while closing FileSystem", e); +} if (haveError) { throw new StreamingIOFailure("Encountered errors while closing (see logs) " + getWatermark(partition)); } http://git-wip-us.apache.org/repos/asf/hive/blob/a7b3cf4b/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java -- diff --git a/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java b/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java index 6cf14b0..8ca8fe2 100644 --- a/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java +++ b/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java @@ -158,6 +158,7 @@ public class HiveStreamingConnection implements StreamingConnection { private Table tableObject = null; private String metastoreUri; private ConnectionStats connectionStats; + private Runnable onShutdownRunner; private HiveStreamingConnection(Builder builder) throws StreamingException { this.database = builder.database.toLowerCase(); @@ -330,9 +331,10 @@ public class HiveStreamingConnection implements StreamingConnection { throw new StreamingException("Record writer cannot be null for streaming connection"); } HiveStreamingConnection streamingConnection = new HiveStreamingConnection(this); + streamingConnection.onShutdownRunner = streamingConnection::close; // assigning higher priority than FileSystem shutdown hook so that streaming connection gets closed first before // filesystem close (to avoid ClosedChannelException) - ShutdownHookManager.addShutdownHook(streamingConnection::close, FileSystem.SHUTDOWN_HOOK_PRIORITY + 1); + ShutdownHookManager.addShutdownHook(streamingConnection.onShutdownRunner, FileSystem.SHUTDOWN_HOOK_PRIORITY + 1); Thread.setDefaultUncaughtExceptionHandler((t, e) -> streamingConnection.close()); return streamingConnection; } @@ -551,6 +553,10 @@ public class HiveStreamingConnection implements StreamingConnection { } finally { getMSC().close(); getHeatbeatMSC().close(); + //remove shutdown hook entry added while creating this connection via HiveStreamingConnection.Builder#connect() + if (!ShutdownHookManager.isShutdownInProgress()) { +ShutdownHookManager.removeShutdownHook(this.onShutdownRunner); + } } if (LOG.isInfoEnabled()) { LOG.info("Closed streaming connection. Agent: {} Stats: {}", getAgentInfo(), getConnectionStats());
hive git commit: HIVE-20979: Fix memory leak in hive streaming (Shubham Chaurasia reviewed by Prasanth, Eric, Ashutosh)
Repository: hive Updated Branches: refs/heads/master 706bf724e -> f5618d922 HIVE-20979: Fix memory leak in hive streaming (Shubham Chaurasia reviewed by Prasanth, Eric, Ashutosh) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f5618d92 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f5618d92 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f5618d92 Branch: refs/heads/master Commit: f5618d9227e5f6e643aaf9d8d625dc1fc42180dc Parents: 706bf72 Author: Prasanth Jayachandran Authored: Mon Dec 10 01:33:00 2018 -0800 Committer: Prasanth Jayachandran Committed: Mon Dec 10 01:33:00 2018 -0800 -- .../java/org/apache/hive/streaming/AbstractRecordWriter.java | 5 + .../org/apache/hive/streaming/HiveStreamingConnection.java | 8 +++- 2 files changed, 12 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/f5618d92/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java -- diff --git a/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java b/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java index e7588e8..14d34d4 100644 --- a/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java +++ b/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java @@ -394,6 +394,11 @@ public abstract class AbstractRecordWriter implements RecordWriter { if (LOG.isDebugEnabled()) { logStats("Stats after close:"); } +try { + this.fs.close(); +} catch (IOException e) { + throw new StreamingIOFailure("Error while closing FileSystem", e); +} if (haveError) { throw new StreamingIOFailure("Encountered errors while closing (see logs) " + getWatermark(partition)); } http://git-wip-us.apache.org/repos/asf/hive/blob/f5618d92/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java -- diff --git a/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java b/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java index 74fc531..a32aa62 100644 --- a/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java +++ b/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java @@ -147,6 +147,7 @@ public class HiveStreamingConnection implements StreamingConnection { private int countTransactions = 0; private Set partitions; private Long tableId; + private Runnable onShutdownRunner; private HiveStreamingConnection(Builder builder) throws StreamingException { this.database = builder.database.toLowerCase(); @@ -389,9 +390,10 @@ public class HiveStreamingConnection implements StreamingConnection { } HiveStreamingConnection streamingConnection = new HiveStreamingConnection(this); + streamingConnection.onShutdownRunner = streamingConnection::close; // assigning higher priority than FileSystem shutdown hook so that streaming connection gets closed first before // filesystem close (to avoid ClosedChannelException) - ShutdownHookManager.addShutdownHook(streamingConnection::close, FileSystem.SHUTDOWN_HOOK_PRIORITY + 1); + ShutdownHookManager.addShutdownHook(streamingConnection.onShutdownRunner, FileSystem.SHUTDOWN_HOOK_PRIORITY + 1); Thread.setDefaultUncaughtExceptionHandler((t, e) -> streamingConnection.close()); return streamingConnection; } @@ -651,6 +653,10 @@ public class HiveStreamingConnection implements StreamingConnection { getMSC().close(); getHeatbeatMSC().close(); } + //remove shutdown hook entry added while creating this connection via HiveStreamingConnection.Builder#connect() + if (!ShutdownHookManager.isShutdownInProgress()) { +ShutdownHookManager.removeShutdownHook(this.onShutdownRunner); + } } if (LOG.isInfoEnabled()) { LOG.info("Closed streaming connection. Agent: {} Stats: {}", getAgentInfo(), getConnectionStats());
hive git commit: HIVE-20981: streaming/AbstractRecordWriter leaks HeapMemoryMonitor (Eric Wohlstadter, reviewed by Jason Dere)
Repository: hive Updated Branches: refs/heads/branch-3 db8e9b0ef -> 0a1bc3583 HIVE-20981: streaming/AbstractRecordWriter leaks HeapMemoryMonitor (Eric Wohlstadter, reviewed by Jason Dere) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0a1bc358 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0a1bc358 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0a1bc358 Branch: refs/heads/branch-3 Commit: 0a1bc358399f9b14999f27bfcb965318fe5ece11 Parents: db8e9b0 Author: Eric Wohlstadter Authored: Thu Nov 29 12:35:01 2018 -0800 Committer: Prasanth Jayachandran Committed: Mon Dec 10 01:28:46 2018 -0800 -- .../hadoop/hive/common/HeapMemoryMonitor.java | 22 +--- .../hive/streaming/AbstractRecordWriter.java| 1 + 2 files changed, 20 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/0a1bc358/common/src/java/org/apache/hadoop/hive/common/HeapMemoryMonitor.java -- diff --git a/common/src/java/org/apache/hadoop/hive/common/HeapMemoryMonitor.java b/common/src/java/org/apache/hadoop/hive/common/HeapMemoryMonitor.java index 42286be..56ec2fd 100644 --- a/common/src/java/org/apache/hadoop/hive/common/HeapMemoryMonitor.java +++ b/common/src/java/org/apache/hadoop/hive/common/HeapMemoryMonitor.java @@ -28,6 +28,8 @@ import java.util.ArrayList; import java.util.List; import javax.management.NotificationEmitter; +import javax.management.NotificationListener; +import javax.management.ListenerNotFoundException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,6 +46,7 @@ public class HeapMemoryMonitor { private final double threshold; private List listeners = new ArrayList<>(); + private NotificationListener notificationListener; public interface Listener { void memoryUsageAboveThreshold(long usedMemory, long maxMemory); @@ -140,7 +143,7 @@ public class HeapMemoryMonitor { } MemoryMXBean mxBean = ManagementFactory.getMemoryMXBean(); NotificationEmitter emitter = (NotificationEmitter) mxBean; -emitter.addNotificationListener((n, hb) -> { +notificationListener = (n, hb) -> { if (n.getType().equals( MemoryNotificationInfo.MEMORY_COLLECTION_THRESHOLD_EXCEEDED)) { long maxMemory = tenuredGenPool.getUsage().getMax(); @@ -149,6 +152,19 @@ public class HeapMemoryMonitor { listener.memoryUsageAboveThreshold(usedMemory, maxMemory); } } -}, null, null); +}; +emitter.addNotificationListener(notificationListener, null, null); } -} \ No newline at end of file + + public void close() { +if(notificationListener != null) { + MemoryMXBean mxBean = ManagementFactory.getMemoryMXBean(); + NotificationEmitter emitter = (NotificationEmitter) mxBean; + try { +emitter.removeNotificationListener(notificationListener); + } catch(ListenerNotFoundException e) { +LOG.warn("Failed to remove HeapMemoryMonitor notification listener from MemoryMXBean", e); + } +} + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/0a1bc358/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java -- diff --git a/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java b/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java index 9e90d36..0408599 100644 --- a/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java +++ b/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java @@ -355,6 +355,7 @@ public abstract class AbstractRecordWriter implements RecordWriter { @Override public void close() throws StreamingIOFailure { +heapMemoryMonitor.close(); boolean haveError = false; String partition = null; if (LOG.isDebugEnabled()) {
hive git commit: HIVE-20899: Keytab URI for LLAP YARN Service is restrictive to support HDFS only (Gour Saha reviewd by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/master 558876462 -> bc39c4998 HIVE-20899: Keytab URI for LLAP YARN Service is restrictive to support HDFS only (Gour Saha reviewd by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bc39c499 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bc39c499 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bc39c499 Branch: refs/heads/master Commit: bc39c49988c8a5d881a23ed7dd5d4adba0509ee9 Parents: 5588764 Author: Gour Saha Authored: Mon Nov 12 13:04:21 2018 -0800 Committer: Prasanth Jayachandran Committed: Mon Nov 12 13:04:24 2018 -0800 -- llap-server/src/main/resources/package.py | 2 -- 1 file changed, 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/bc39c499/llap-server/src/main/resources/package.py -- diff --git a/llap-server/src/main/resources/package.py b/llap-server/src/main/resources/package.py index 9eb3fd7..c48ff79 100644 --- a/llap-server/src/main/resources/package.py +++ b/llap-server/src/main/resources/package.py @@ -130,8 +130,6 @@ def main(args): service_keytab_path += "/" + service_keytab else: service_keytab_path = service_keytab - if service_keytab_path: - service_keytab_path = "hdfs:///user/hive/" + service_keytab_path if not input: print "Cannot find input files"
hive git commit: HIVE-20899: Keytab URI for LLAP YARN Service is restrictive to support HDFS only (Gour Saha reviewd by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/branch-3 cd4491900 -> 4663e50e7 HIVE-20899: Keytab URI for LLAP YARN Service is restrictive to support HDFS only (Gour Saha reviewd by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4663e50e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4663e50e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4663e50e Branch: refs/heads/branch-3 Commit: 4663e50e709c1f836acb34841a72f1dfc9f31da9 Parents: cd44919 Author: Gour Saha Authored: Mon Nov 12 13:04:21 2018 -0800 Committer: Prasanth Jayachandran Committed: Mon Nov 12 13:04:45 2018 -0800 -- llap-server/src/main/resources/package.py | 2 -- 1 file changed, 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/4663e50e/llap-server/src/main/resources/package.py -- diff --git a/llap-server/src/main/resources/package.py b/llap-server/src/main/resources/package.py index 9eb3fd7..c48ff79 100644 --- a/llap-server/src/main/resources/package.py +++ b/llap-server/src/main/resources/package.py @@ -130,8 +130,6 @@ def main(args): service_keytab_path += "/" + service_keytab else: service_keytab_path = service_keytab - if service_keytab_path: - service_keytab_path = "hdfs:///user/hive/" + service_keytab_path if not input: print "Cannot find input files"
[4/4] hive git commit: HIVE-20707: Automatic partition management (Prasanth Jayachandran reviewed by Jason Dere)
HIVE-20707: Automatic partition management (Prasanth Jayachandran reviewed by Jason Dere) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/64bea035 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/64bea035 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/64bea035 Branch: refs/heads/master Commit: 64bea0354fba2947e4bc0318728f5419e5d763b9 Parents: 54bba9c Author: Prasanth Jayachandran Authored: Mon Oct 29 15:07:49 2018 -0700 Committer: Prasanth Jayachandran Committed: Mon Oct 29 15:07:49 2018 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 12 + .../results/positive/external_table_ppd.q.out | 1 + .../positive/hbase_binary_storage_queries.q.out | 2 + .../src/test/results/positive/hbase_ddl.q.out | 2 + .../test/results/positive/hbase_queries.q.out | 1 + .../src/test/results/positive/hbasestats.q.out | 5 + .../hive/ql/txn/compactor/TestCompactor.java| 1 - .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 290 + .../apache/hadoop/hive/ql/exec/ExplainTask.java | 40 +- .../hadoop/hive/ql/metadata/CheckResult.java| 142 - .../hive/ql/metadata/HiveMetaStoreChecker.java | 567 -- .../hive/ql/optimizer/GenMapRedUtils.java | 21 +- .../hive/ql/parse/DDLSemanticAnalyzer.java | 6 +- .../hadoop/hive/ql/plan/CreateTableDesc.java| 6 + .../exec/TestMsckCreatePartitionsInBatches.java | 244 +--- .../exec/TestMsckDropPartitionsInBatches.java | 125 ++-- .../ql/metadata/TestHiveMetaStoreChecker.java | 187 +++--- .../queries/clientpositive/msck_repair_acid.q | 34 ++ .../clientpositive/partition_discovery.q| 77 +++ .../results/clientpositive/create_like.q.out| 1 + .../clientpositive/create_like_view.q.out | 1 + .../clientpositive/default_file_format.q.out| 4 + .../druid/druidkafkamini_basic.q.out| 2 + .../druid/druidmini_expressions.q.out | 2 + .../results/clientpositive/druid_topn.q.out | 1 + .../results/clientpositive/explain_locks.q.out | 1 + .../llap/external_table_purge.q.out | 4 + .../results/clientpositive/llap/mm_exim.q.out | 1 + .../llap/strict_managed_tables2.q.out | 2 + .../llap/table_nonprintable.q.out | 2 +- .../clientpositive/llap/whroot_external1.q.out | 6 + .../clientpositive/msck_repair_acid.q.out | 88 +++ .../clientpositive/msck_repair_drop.q.out | 68 +-- .../clientpositive/partition_discovery.q.out| 357 .../rename_external_partition_location.q.out| 2 + .../clientpositive/repl_2_exim_basic.q.out | 2 + .../show_create_table_alter.q.out | 5 + .../show_create_table_partitioned.q.out | 1 + .../show_create_table_serde.q.out | 1 + .../clientpositive/spark/stats_noscan_2.q.out | 2 + .../results/clientpositive/stats_noscan_2.q.out | 2 + .../temp_table_display_colstats_tbllvl.q.out| 5 + .../hadoop/hive/metastore/CheckResult.java | 153 + .../apache/hadoop/hive/metastore/Warehouse.java | 2 +- .../hive/metastore/api/MetastoreException.java | 36 ++ .../hive/metastore/conf/MetastoreConf.java | 59 +- .../hive/metastore/utils/MetaStoreUtils.java| 55 +- .../hive/metastore/HiveMetaStoreChecker.java| 571 ++ .../org/apache/hadoop/hive/metastore/Msck.java | 530 + .../apache/hadoop/hive/metastore/MsckInfo.java | 125 .../metastore/MsckPartitionExpressionProxy.java | 64 ++ .../hadoop/hive/metastore/ObjectStore.java | 6 +- .../hive/metastore/PartitionIterable.java | 163 ++ .../hive/metastore/PartitionManagementTask.java | 235 .../metastore/utils/MetaStoreServerUtils.java | 167 +- .../hive/metastore/utils/RetryUtilities.java| 110 .../hive/metastore/NonCatCallsWithCatalog.java | 4 +- .../hive/metastore/TestCatalogOldClient.java| 4 +- .../hive/metastore/TestPartitionManagement.java | 581 +++ .../hive/metastore/client/TestGetTableMeta.java | 11 +- 60 files changed, 3891 insertions(+), 1308 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/64bea035/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index e226a1f..917aaeb 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -4415,17 +4415,29 @@ public class HiveConf extends Configuration { "Merge adjacent joins into a single n-way join"),
[3/4] hive git commit: HIVE-20707: Automatic partition management (Prasanth Jayachandran reviewed by Jason Dere)
http://git-wip-us.apache.org/repos/asf/hive/blob/64bea035/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java index 9480d38..1ec4636 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.exec; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import java.util.ArrayList; @@ -27,16 +28,22 @@ import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.CheckResult.PartitionResult; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.Msck; +import org.apache.hadoop.hive.metastore.PartitionDropOptions; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.MetastoreException; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.utils.RetryUtilities; import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; -import org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.util.StringUtils; -import org.apache.hive.common.util.RetryUtilities.RetryException; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -47,57 +54,71 @@ import org.mockito.Mockito; /** * Unit test for function dropPartitionsInBatches in DDLTask. - * **/ public class TestMsckDropPartitionsInBatches { private static HiveConf hiveConf; - private static DDLTask ddlTask; + private static Msck msck; + private final String catName = "hive"; + private final String dbName = "default"; private final String tableName = "test_msck_batch"; - private static Hive db; + private static IMetaStoreClient db; private List repairOutput; private Table table; @BeforeClass - public static void setupClass() throws HiveException { + public static void setupClass() throws Exception { hiveConf = new HiveConf(TestMsckCreatePartitionsInBatches.class); hiveConf.setIntVar(ConfVars.HIVE_MSCK_REPAIR_BATCH_SIZE, 5); hiveConf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, - "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); + "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); SessionState.start(hiveConf); -db = Hive.get(hiveConf); -ddlTask = new DDLTask(); +db = new HiveMetaStoreClient(hiveConf); +msck = new Msck( false, false); +msck.init(hiveConf); } @Before public void before() throws Exception { -createPartitionedTable("default", tableName); -table = db.getTable(tableName); +createPartitionedTable(catName, dbName, tableName); +table = db.getTable(catName, dbName, tableName); repairOutput = new ArrayList(); } @After public void after() throws Exception { -cleanUpTableQuietly("default", tableName); +cleanUpTableQuietly(catName, dbName, tableName); } - private Table createPartitionedTable(String dbName, String tableName) throws Exception { + private Table createPartitionedTable(String catName, String dbName, String tableName) throws Exception { try { - db.dropTable(dbName, tableName); - db.createTable(tableName, Arrays.asList("key", "value"), // Data columns. - Arrays.asList("city"), // Partition columns. - TextInputFormat.class, HiveIgnoreKeyTextOutputFormat.class); - return db.getTable(dbName, tableName); + db.dropTable(catName, dbName, tableName); + Table table = new Table(); + table.setCatName(catName); + table.setDbName(dbName); + table.setTableName(tableName); + FieldSchema col1 = new FieldSchema("key", "string", ""); + FieldSchema col2 = new FieldSchema("value", "int", ""); + FieldSchema col3 = new FieldSchema("city", "string", ""); + StorageDescriptor sd = new StorageDescriptor(); + sd.setSerdeInfo(new SerDeInfo()); +
[1/4] hive git commit: HIVE-20707: Automatic partition management (Prasanth Jayachandran reviewed by Jason Dere)
Repository: hive Updated Branches: refs/heads/master 54bba9cbf -> 64bea0354 http://git-wip-us.apache.org/repos/asf/hive/blob/64bea035/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionIterable.java -- diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionIterable.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionIterable.java new file mode 100644 index 000..2837ff4 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionIterable.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; + +import org.apache.hadoop.hive.metastore.api.MetastoreException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; + + +/** + * PartitionIterable - effectively a lazy Iterable + * Sometimes, we have a need for iterating through a list of partitions, + * but the list of partitions can be too big to fetch as a single object. + * Thus, the goal of PartitionIterable is to act as an Iterable + * while lazily fetching each relevant partition, one after the other as + * independent metadata calls. + * It is very likely that any calls to PartitionIterable are going to result + * in a large number of calls, so use sparingly only when the memory cost + * of fetching all the partitions in one shot is too prohibitive. + * This is still pretty costly in that it would retain a list of partition + * names, but that should be far less expensive than the entire partition + * objects. + * Note that remove() is an illegal call on this, and will result in an + * IllegalStateException. + */ +public class PartitionIterable implements Iterable { + + @Override + public Iterator iterator() { +return new Iterator() { + + private boolean initialized = false; + private Iterator ptnsIterator = null; + + private Iterator partitionNamesIter = null; + private Iterator batchIter = null; + + private void initialize() { +if (!initialized) { + if (currType == Type.LIST_PROVIDED) { +ptnsIterator = ptnsProvided.iterator(); + } else { +partitionNamesIter = partitionNames.iterator(); + } + initialized = true; +} + } + + @Override + public boolean hasNext() { +initialize(); +if (currType == Type.LIST_PROVIDED) { + return ptnsIterator.hasNext(); +} else { + return ((batchIter != null) && batchIter.hasNext()) || partitionNamesIter.hasNext(); +} + } + + @Override + public Partition next() { +initialize(); +if (currType == Type.LIST_PROVIDED) { + return ptnsIterator.next(); +} + +if ((batchIter == null) || !batchIter.hasNext()) { + getNextBatch(); +} + +return batchIter.next(); + } + + private void getNextBatch() { +int batch_counter = 0; +List nameBatch = new ArrayList(); +while (batch_counter < batch_size && partitionNamesIter.hasNext()) { + nameBatch.add(partitionNamesIter.next()); + batch_counter++; +} +try { + batchIter = +msc.getPartitionsByNames(table.getCatName(), table.getDbName(), table.getTableName(), nameBatch).iterator(); +} catch (Exception e) { + throw new RuntimeException(e); +} + } + + @Override + public void remove() { +throw new IllegalStateException( + "PartitionIterable is a read-only iterable and remove() is unsupported"); + } +}; + } + + enum Type { +LIST_PROVIDED, // Where a List ptnsProvided = null; + + // used for LAZY_FETCH_PARTITIONS cases + private IMetaStoreClient msc = null; // Assumes one instance of this + single-threaded compilation for each query. +
[2/4] hive git commit: HIVE-20707: Automatic partition management (Prasanth Jayachandran reviewed by Jason Dere)
http://git-wip-us.apache.org/repos/asf/hive/blob/64bea035/ql/src/test/results/clientpositive/rename_external_partition_location.q.out -- diff --git a/ql/src/test/results/clientpositive/rename_external_partition_location.q.out b/ql/src/test/results/clientpositive/rename_external_partition_location.q.out index 02cd814..d854887 100644 --- a/ql/src/test/results/clientpositive/rename_external_partition_location.q.out +++ b/ql/src/test/results/clientpositive/rename_external_partition_location.q.out @@ -103,6 +103,7 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} EXTERNALTRUE bucketing_version 2 + discover.partitions true numFiles1 numPartitions 1 numRows 10 @@ -266,6 +267,7 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} EXTERNALTRUE bucketing_version 2 + discover.partitions true numFiles1 numPartitions 1 numRows 10 http://git-wip-us.apache.org/repos/asf/hive/blob/64bea035/ql/src/test/results/clientpositive/repl_2_exim_basic.q.out -- diff --git a/ql/src/test/results/clientpositive/repl_2_exim_basic.q.out b/ql/src/test/results/clientpositive/repl_2_exim_basic.q.out index b2bcd51..40b6ad7 100644 --- a/ql/src/test/results/clientpositive/repl_2_exim_basic.q.out +++ b/ql/src/test/results/clientpositive/repl_2_exim_basic.q.out @@ -345,6 +345,7 @@ LOCATION A masked pattern was here TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', A masked pattern was here PREHOOK: query: select * from ext_t_imported PREHOOK: type: QUERY @@ -426,6 +427,7 @@ LOCATION TBLPROPERTIES ( 'EXTERNAL'='FALSE', 'bucketing_version'='2', + 'discover.partitions'='true', 'repl.last.id'='0', A masked pattern was here PREHOOK: query: select * from ext_t_r_imported http://git-wip-us.apache.org/repos/asf/hive/blob/64bea035/ql/src/test/results/clientpositive/show_create_table_alter.q.out -- diff --git a/ql/src/test/results/clientpositive/show_create_table_alter.q.out b/ql/src/test/results/clientpositive/show_create_table_alter.q.out index 2c75c36..9d93ee9 100644 --- a/ql/src/test/results/clientpositive/show_create_table_alter.q.out +++ b/ql/src/test/results/clientpositive/show_create_table_alter.q.out @@ -32,6 +32,7 @@ LOCATION A masked pattern was here TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', A masked pattern was here PREHOOK: query: ALTER TABLE tmp_showcrt1_n1 SET TBLPROPERTIES ('comment'='temporary table', 'EXTERNAL'='FALSE') PREHOOK: type: ALTERTABLE_PROPERTIES @@ -67,6 +68,7 @@ LOCATION TBLPROPERTIES ( 'EXTERNAL'='FALSE', 'bucketing_version'='2', + 'discover.partitions'='true', A masked pattern was here PREHOOK: query: ALTER TABLE tmp_showcrt1_n1 SET TBLPROPERTIES ('comment'='changed comment', 'EXTERNAL'='TRUE') PREHOOK: type: ALTERTABLE_PROPERTIES @@ -101,6 +103,7 @@ LOCATION A masked pattern was here TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', A masked pattern was here PREHOOK: query: ALTER TABLE tmp_showcrt1_n1 SET TBLPROPERTIES ('SORTBUCKETCOLSPREFIX'='FALSE') PREHOOK: type: ALTERTABLE_PROPERTIES @@ -135,6 +138,7 @@ LOCATION A masked pattern was here TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', A masked pattern was here PREHOOK: query: ALTER TABLE tmp_showcrt1_n1 SET TBLPROPERTIES ('storage_handler'='org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler') PREHOOK: type: ALTERTABLE_PROPERTIES @@ -169,6 +173,7 @@ LOCATION A masked pattern was here TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', A masked pattern was here PREHOOK: query: DROP TABLE tmp_showcrt1_n1 PREHOOK: type: DROPTABLE http://git-wip-us.apache.org/repos/asf/hive/blob/64bea035/ql/src/test/results/clientpositive/show_create_table_partitioned.q.out -- diff --git a/ql/src/test/results/clientpositive/show_create_table_partitioned.q.out b/ql/src/test/results/clientpositive/show_create_table_partitioned.q.out index e554a18..8a56bfc 100644 --- a/ql/src/test/results/clientpositive/show_create_table_partitioned.q.out +++
hive git commit: HIVE-20701: Allow HiveStreaming to receive a key value to commit atomically together with the transaction (Jaume M reviewed by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/master cbe3228c2 -> 7765e90aa HIVE-20701: Allow HiveStreaming to receive a key value to commit atomically together with the transaction (Jaume M reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7765e90a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7765e90a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7765e90a Branch: refs/heads/master Commit: 7765e90aad44747860b3c1adbe8a4857d864912d Parents: cbe3228 Author: Jaume Marhuenda Authored: Mon Oct 22 14:18:20 2018 -0700 Committer: Prasanth Jayachandran Committed: Mon Oct 22 14:18:49 2018 -0700 -- .../streaming/AbstractStreamingTransaction.java | 6 ++- .../hive/streaming/HiveStreamingConnection.java | 13 +-- .../hive/streaming/StreamingConnection.java | 23 --- .../hive/streaming/StreamingTransaction.java| 14 ++- .../apache/hive/streaming/TransactionBatch.java | 26 +++-- .../streaming/UnManagedSingleTransaction.java | 3 +- .../apache/hive/streaming/TestStreaming.java| 41 +++- 7 files changed, 109 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/7765e90a/streaming/src/java/org/apache/hive/streaming/AbstractStreamingTransaction.java -- diff --git a/streaming/src/java/org/apache/hive/streaming/AbstractStreamingTransaction.java b/streaming/src/java/org/apache/hive/streaming/AbstractStreamingTransaction.java index a99fdba..6ab3ffe 100644 --- a/streaming/src/java/org/apache/hive/streaming/AbstractStreamingTransaction.java +++ b/streaming/src/java/org/apache/hive/streaming/AbstractStreamingTransaction.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.metastore.api.TxnToWriteId; import java.io.InputStream; import java.util.List; +import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; /** @@ -151,6 +152,9 @@ abstract class AbstractStreamingTransaction } public void commit() throws StreamingException { -commitWithPartitions(null); +commit(null); + } + public void commit(Set partitions) throws StreamingException { +commit(partitions, null, null); } } http://git-wip-us.apache.org/repos/asf/hive/blob/7765e90a/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java -- diff --git a/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java b/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java index f79b844..74fc531 100644 --- a/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java +++ b/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java @@ -146,6 +146,7 @@ public class HiveStreamingConnection implements StreamingConnection { private boolean manageTransactions; private int countTransactions = 0; private Set partitions; + private Long tableId; private HiveStreamingConnection(Builder builder) throws StreamingException { this.database = builder.database.toLowerCase(); @@ -574,12 +575,18 @@ public class HiveStreamingConnection implements StreamingConnection { @Override public void commitTransaction() throws StreamingException { -commitTransactionWithPartition(null); +commitTransaction(null); } @Override - public void commitTransactionWithPartition(Set partitions) + public void commitTransaction(Set partitions) throws StreamingException { +commitTransaction(partitions, null, null); + } + + @Override + public void commitTransaction(Set partitions, String key, + String value) throws StreamingException { checkState(); Set createdPartitions = new HashSet<>(); @@ -598,7 +605,7 @@ public class HiveStreamingConnection implements StreamingConnection { connectionStats.incrementTotalPartitions(partitions.size()); } -currentTransactionBatch.commitWithPartitions(createdPartitions); +currentTransactionBatch.commit(createdPartitions, key, value); this.partitions.addAll( currentTransactionBatch.getPartitions()); connectionStats.incrementCreatedPartitions(createdPartitions.size()); http://git-wip-us.apache.org/repos/asf/hive/blob/7765e90a/streaming/src/java/org/apache/hive/streaming/StreamingConnection.java -- diff --git a/streaming/src/java/org/apache/hive/streaming/StreamingConnection.java b/streaming/src/java/org/apache/hive/streaming/StreamingConnection.java index 92016e5..ba4c6a5 100644 --- a/streaming/src/java/org/apache/hive/streaming/StreamingConnection.java +++
hive git commit: HIVE-20649: LLAP aware memory manager for Orc writers (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Repository: hive Updated Branches: refs/heads/master 213efd70b -> 369f0f241 HIVE-20649: LLAP aware memory manager for Orc writers (Prasanth Jayachandran reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/369f0f24 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/369f0f24 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/369f0f24 Branch: refs/heads/master Commit: 369f0f241fd3dc90e81d267bf16db61ea90db647 Parents: 213efd7 Author: Prasanth Jayachandran Authored: Sun Oct 14 21:34:08 2018 -0700 Committer: Prasanth Jayachandran Committed: Sun Oct 14 21:34:08 2018 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 4 ++ .../apache/hadoop/hive/ql/io/orc/OrcFile.java | 48 +++- .../hadoop/hive/ql/io/orc/TestOrcFile.java | 41 + 3 files changed, 91 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/369f0f24/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index cc6239c..29958b3 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1949,6 +1949,10 @@ public class HiveConf extends Configuration { " ETL strategy is used when spending little more time in split generation is acceptable" + " (split generation reads and caches file footers). HYBRID chooses between the above strategies" + " based on heuristics."), + HIVE_ORC_WRITER_LLAP_MEMORY_MANAGER_ENABLED("hive.exec.orc.writer.llap.memory.manager.enabled", true, + "Whether orc writers should use llap-aware memory manager. LLAP aware memory manager will use memory\n" + +"per executor instead of entire heap memory when concurrent orc writers are involved. This will let\n" + +"task fragments to use memory within its limit (memory per executor) when performing ETL in LLAP."), // hive streaming ingest settings HIVE_STREAMING_AUTO_FLUSH_ENABLED("hive.streaming.auto.flush.enabled", true, "Whether to enable memory \n" + http://git-wip-us.apache.org/repos/asf/hive/blob/369f0f24/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java index e7dfb05..e246ac2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java @@ -24,20 +24,29 @@ import java.util.Properties; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.llap.LlapDaemonInfo; +import org.apache.hadoop.hive.llap.LlapUtil; +import org.apache.hadoop.hive.llap.io.api.LlapProxy; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.orc.FileMetadata; +import org.apache.orc.OrcConf; import org.apache.orc.PhysicalWriter; import org.apache.orc.MemoryManager; import org.apache.orc.TypeDescription; +import org.apache.orc.impl.MemoryManagerImpl; import org.apache.orc.impl.OrcTail; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; /** * Contains factory methods to read or write ORC files. */ public final class OrcFile extends org.apache.orc.OrcFile { - + private static final Logger LOG = LoggerFactory.getLogger(OrcFile.class); // unused protected OrcFile() {} @@ -96,6 +105,37 @@ public final class OrcFile extends org.apache.orc.OrcFile { return new ReaderImpl(path, options); } + @VisibleForTesting + static class LlapAwareMemoryManager extends MemoryManagerImpl { +private final double maxLoad; +private final long totalMemoryPool; + +public LlapAwareMemoryManager(Configuration conf) { + super(conf); + maxLoad = OrcConf.MEMORY_POOL.getDouble(conf); + long memPerExecutor = LlapDaemonInfo.INSTANCE.getMemoryPerExecutor(); + totalMemoryPool = (long) (memPerExecutor * maxLoad); + if (LOG.isDebugEnabled()) { +LOG.debug("Using LLAP memory manager for orc writer. memPerExecutor: {} maxLoad: {} totalMemPool: {}", + LlapUtil.humanReadableByteCount(memPerExecutor), maxLoad, LlapUtil.humanReadableByteCount(totalMemoryPool));
hive git commit: HIVE-20649: LLAP aware memory manager for Orc writers (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Repository: hive Updated Branches: refs/heads/branch-3 2048f6262 -> 1ce6c7c2a HIVE-20649: LLAP aware memory manager for Orc writers (Prasanth Jayachandran reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1ce6c7c2 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1ce6c7c2 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1ce6c7c2 Branch: refs/heads/branch-3 Commit: 1ce6c7c2a3a93f0f92078ba8c929a870eaa8134d Parents: 2048f62 Author: Prasanth Jayachandran Authored: Sun Oct 14 21:34:08 2018 -0700 Committer: Prasanth Jayachandran Committed: Sun Oct 14 21:34:42 2018 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 4 ++ .../apache/hadoop/hive/ql/io/orc/OrcFile.java | 48 +++- .../hadoop/hive/ql/io/orc/TestOrcFile.java | 41 + 3 files changed, 91 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/1ce6c7c2/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 3852d79..a04ef38 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1923,6 +1923,10 @@ public class HiveConf extends Configuration { " ETL strategy is used when spending little more time in split generation is acceptable" + " (split generation reads and caches file footers). HYBRID chooses between the above strategies" + " based on heuristics."), + HIVE_ORC_WRITER_LLAP_MEMORY_MANAGER_ENABLED("hive.exec.orc.writer.llap.memory.manager.enabled", true, + "Whether orc writers should use llap-aware memory manager. LLAP aware memory manager will use memory\n" + +"per executor instead of entire heap memory when concurrent orc writers are involved. This will let\n" + +"task fragments to use memory within its limit (memory per executor) when performing ETL in LLAP."), // hive streaming ingest settings HIVE_STREAMING_AUTO_FLUSH_ENABLED("hive.streaming.auto.flush.enabled", true, "Whether to enable memory \n" + http://git-wip-us.apache.org/repos/asf/hive/blob/1ce6c7c2/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java index e7dfb05..e246ac2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java @@ -24,20 +24,29 @@ import java.util.Properties; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.llap.LlapDaemonInfo; +import org.apache.hadoop.hive.llap.LlapUtil; +import org.apache.hadoop.hive.llap.io.api.LlapProxy; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.orc.FileMetadata; +import org.apache.orc.OrcConf; import org.apache.orc.PhysicalWriter; import org.apache.orc.MemoryManager; import org.apache.orc.TypeDescription; +import org.apache.orc.impl.MemoryManagerImpl; import org.apache.orc.impl.OrcTail; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; /** * Contains factory methods to read or write ORC files. */ public final class OrcFile extends org.apache.orc.OrcFile { - + private static final Logger LOG = LoggerFactory.getLogger(OrcFile.class); // unused protected OrcFile() {} @@ -96,6 +105,37 @@ public final class OrcFile extends org.apache.orc.OrcFile { return new ReaderImpl(path, options); } + @VisibleForTesting + static class LlapAwareMemoryManager extends MemoryManagerImpl { +private final double maxLoad; +private final long totalMemoryPool; + +public LlapAwareMemoryManager(Configuration conf) { + super(conf); + maxLoad = OrcConf.MEMORY_POOL.getDouble(conf); + long memPerExecutor = LlapDaemonInfo.INSTANCE.getMemoryPerExecutor(); + totalMemoryPool = (long) (memPerExecutor * maxLoad); + if (LOG.isDebugEnabled()) { +LOG.debug("Using LLAP memory manager for orc writer. memPerExecutor: {} maxLoad: {} totalMemPool: {}", + LlapUtil.humanReadableByteCount(memPerExecutor), maxLoad,
[2/2] hive git commit: HIVE-20291: Allow HiveStreamingConnection to receive a WriteId (Jaume Marhuenda reviewed by Prasanth Jayachandran)
HIVE-20291: Allow HiveStreamingConnection to receive a WriteId (Jaume Marhuenda reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bdbd3bcf Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bdbd3bcf Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bdbd3bcf Branch: refs/heads/master Commit: bdbd3bcffac9f7fe1d3babb45eb40547b1499bb5 Parents: 7c4d48e Author: Jaume Marhuenda Authored: Thu Oct 11 16:22:22 2018 -0700 Committer: Prasanth Jayachandran Committed: Thu Oct 11 16:23:34 2018 -0700 -- .../hive/streaming/AbstractRecordWriter.java| 65 +- .../streaming/AbstractStreamingTransaction.java | 156 + .../apache/hive/streaming/ConnectionStats.java | 38 +- .../hive/streaming/HiveStreamingConnection.java | 697 ++- .../hive/streaming/InvalidTransactionState.java | 5 +- .../apache/hive/streaming/PartitionInfo.java| 1 + .../org/apache/hive/streaming/RecordWriter.java | 38 +- .../hive/streaming/StreamingConnection.java | 43 ++ .../hive/streaming/StreamingTransaction.java| 113 +++ .../apache/hive/streaming/TransactionBatch.java | 430 .../apache/hive/streaming/TransactionError.java | 7 +- .../streaming/UnManagedSingleTransaction.java | 135 .../org/apache/hive/streaming/package-info.java | 22 + .../java/org/apache/hive/streaming/package.html | 3 +- .../apache/hive/streaming/TestStreaming.java| 433 ++-- 15 files changed, 1616 insertions(+), 570 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/bdbd3bcf/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java -- diff --git a/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java b/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java index 9e90d36..88a7d82 100644 --- a/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java +++ b/streaming/src/java/org/apache/hive/streaming/AbstractRecordWriter.java @@ -46,9 +46,11 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.llap.LlapUtil; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.AcidOutputFormat; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.RecordUpdater; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.serde2.AbstractSerDe; @@ -66,6 +68,7 @@ public abstract class AbstractRecordWriter implements RecordWriter { private static final Logger LOG = LoggerFactory.getLogger(AbstractRecordWriter.class.getName()); private static final String DEFAULT_LINE_DELIMITER_PATTERN = "[\r\n]"; + private Integer statementId; protected HiveConf conf; protected StreamingConnection conn; protected Table table; @@ -128,13 +131,21 @@ public abstract class AbstractRecordWriter implements RecordWriter { } @Override - public void init(StreamingConnection conn, long minWriteId, long maxWriteId) throws StreamingException { + public void init(StreamingConnection conn, long minWriteId, long maxWriteId) + throws StreamingException { +init(conn, minWriteId, maxWriteId, -1); + } + + @Override + public void init(StreamingConnection conn, long minWriteId, long maxWriteId, + int statementId) throws StreamingException { if (conn == null) { throw new StreamingException("Streaming connection cannot be null during record writer initialization"); } this.conn = conn; this.curBatchMinWriteId = minWriteId; this.curBatchMaxWriteId = maxWriteId; +this.statementId = statementId; this.conf = conn.getHiveConf(); this.defaultPartitionName = conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME); this.table = conn.getTable(); @@ -431,6 +442,7 @@ public abstract class AbstractRecordWriter implements RecordWriter { int bucket = getBucket(encodedRow); List partitionValues = getPartitionValues(encodedRow); getRecordUpdater(partitionValues, bucket).insert(writeId, encodedRow); + // ingest size bytes gets resetted on flush() whereas connection stats is not conn.getConnectionStats().incrementRecordsWritten(); conn.getConnectionStats().incrementRecordsSize(record.length); @@ -492,10 +504,53 @@ public abstract class AbstractRecordWriter implements RecordWriter { .tableProperties(tblProperties) .minimumWriteId(minWriteId) .maximumWriteId(maxWriteID) -
[1/2] hive git commit: HIVE-20291: Allow HiveStreamingConnection to receive a WriteId (Jaume Marhuenda reviewed by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/master 7c4d48ec2 -> bdbd3bcff http://git-wip-us.apache.org/repos/asf/hive/blob/bdbd3bcf/streaming/src/test/org/apache/hive/streaming/TestStreaming.java -- diff --git a/streaming/src/test/org/apache/hive/streaming/TestStreaming.java b/streaming/src/test/org/apache/hive/streaming/TestStreaming.java index 8b5e508..1c9e43f 100644 --- a/streaming/src/test/org/apache/hive/streaming/TestStreaming.java +++ b/streaming/src/test/org/apache/hive/streaming/TestStreaming.java @@ -35,6 +35,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -60,13 +61,13 @@ import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse; import org.apache.hadoop.hive.metastore.api.LockState; import org.apache.hadoop.hive.metastore.api.LockType; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.ShowLocksRequest; import org.apache.hadoop.hive.metastore.api.ShowLocksResponse; import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement; import org.apache.hadoop.hive.metastore.api.TxnAbortedException; import org.apache.hadoop.hive.metastore.api.TxnInfo; -import org.apache.hadoop.hive.metastore.api.TxnState; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.txn.AcidHouseKeeperService; import org.apache.hadoop.hive.metastore.txn.TxnDbUtil; @@ -82,6 +83,7 @@ import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcStruct; import org.apache.hadoop.hive.ql.io.orc.Reader; import org.apache.hadoop.hive.ql.io.orc.RecordReader; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.txn.compactor.Worker; @@ -150,7 +152,8 @@ public class TestStreaming { if (file.canExecute()) { mod |= 0111; } - return new FileStatus(file.length(), file.isDirectory(), 1, 1024, + return new FileStatus(file.length(), file.isDirectory(), + 1, 1024, file.lastModified(), file.lastModified(), FsPermission.createImmutable(mod), "owen", "users", path); } @@ -419,6 +422,123 @@ public class TestStreaming { } @Test + public void testGetDeltaPath() throws Exception { +StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder() +.withFieldDelimiter(',') +.build(); +HiveStreamingConnection connection = HiveStreamingConnection.newBuilder() +.withDatabase(dbName) +.withTable(tblName) +.withRecordWriter(writer) +.withHiveConf(conf) +.connect(); +Path path = connection.getDeltaFileLocation(partitionVals, 0, +5L, 5L, 9); +Assert.assertTrue(path.toString().endsWith("testing.db/alerts/continent" ++ "=Asia/country=India/delta_005_005_0009/bucket_0")); + } + + @Test + public void testConnectionWithWriteId() throws Exception { +queryTable(driver, "drop table if exists default.writeidconnection"); +queryTable(driver, "create table default.writeidconnection (a string, b string) stored as orc " + +"TBLPROPERTIES('transactional'='true')"); +queryTable(driver, "insert into default.writeidconnection values('a0','bar')"); + +List rs = queryTable(driver, "select * from default.writeidconnection"); +Assert.assertEquals(1, rs.size()); +Assert.assertEquals("a0\tbar", rs.get(0)); + +StrictDelimitedInputWriter writerT = StrictDelimitedInputWriter.newBuilder() +.withFieldDelimiter(',') +.build(); +HiveStreamingConnection transactionConnection = HiveStreamingConnection.newBuilder() +.withDatabase("Default") +.withTable("writeidconnection") +.withRecordWriter(writerT) +.withHiveConf(conf) +.connect(); +transactionConnection.beginTransaction(); + +Table tObject = transactionConnection.getTable(); +Long writeId = transactionConnection.getCurrentWriteId(); + +Assert.assertNotNull(tObject); +Assert.assertNotNull(writeId); + +StrictDelimitedInputWriter writerOne = StrictDelimitedInputWriter.newBuilder() +.withFieldDelimiter(',') +.build(); +HiveStreamingConnection connectionOne = HiveStreamingConnection.newBuilder() +.withDatabase("Default") +.withTable("writeidconnection") +.withRecordWriter(writerOne) +.withHiveConf(conf) +.withWriteId(writeId) +.withStatementId(1) +.withTableObject(tObject)
hive git commit: HIVE-20648: LLAP: Vector group by operator should use memory per executor
Repository: hive Updated Branches: refs/heads/branch-3 b3a424bd0 -> 8703a3229 HIVE-20648: LLAP: Vector group by operator should use memory per executor Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8703a322 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8703a322 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8703a322 Branch: refs/heads/branch-3 Commit: 8703a3229e5a8d4afdd0e2ebd06579df40b01ed8 Parents: b3a424b Author: Prasanth Jayachandran Authored: Tue Oct 9 11:05:55 2018 -0700 Committer: Prasanth Jayachandran Committed: Tue Oct 9 11:06:28 2018 -0700 -- .../ql/exec/vector/VectorGroupByOperator.java | 24 +++-- .../exec/vector/TestVectorGroupByOperator.java | 96 2 files changed, 112 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/8703a322/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 43f1162..4dfd179 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -33,6 +33,8 @@ import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.llap.LlapUtil; +import org.apache.hadoop.hive.llap.io.api.LlapProxy; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.IConfigureJobConf; @@ -146,6 +148,7 @@ public class VectorGroupByOperator extends Operator private float memoryThreshold; + private boolean isLlap = false; /** * Interface for processing mode: global, hash, unsorted streaming, or group batch */ @@ -515,7 +518,7 @@ public class VectorGroupByOperator extends Operator aggregationBatchInfo.getAggregatorsFixedSize(); MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean(); - maxMemory = memoryMXBean.getHeapMemoryUsage().getMax(); + maxMemory = isLlap ? getConf().getMaxMemoryAvailable() : memoryMXBean.getHeapMemoryUsage().getMax(); memoryThreshold = conf.getMemoryThreshold(); // Tests may leave this unitialized, so better set it to 1 if (memoryThreshold == 0.0f) { @@ -525,13 +528,14 @@ public class VectorGroupByOperator extends Operator maxHashTblMemory = (int)(maxMemory * memoryThreshold); if (LOG.isDebugEnabled()) { -LOG.debug(String.format("maxMemory:%dMb (%d * %f) fixSize:%d (key:%d agg:%d)", -maxHashTblMemory/1024/1024, -maxMemory/1024/1024, -memoryThreshold, -fixedHashEntrySize, -keyWrappersBatch.getKeysFixedSize(), -aggregationBatchInfo.getAggregatorsFixedSize())); +LOG.debug("GBY memory limits - isLlap: {} maxMemory: {} ({} * {}) fixSize:{} (key:{} agg:{})", + isLlap, + LlapUtil.humanReadableByteCount(maxHashTblMemory), + LlapUtil.humanReadableByteCount(maxMemory), + memoryThreshold, + fixedHashEntrySize, + keyWrappersBatch.getKeysFixedSize(), + aggregationBatchInfo.getAggregatorsFixedSize()); } } @@ -975,6 +979,7 @@ public class VectorGroupByOperator extends Operator @Override protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); +isLlap = LlapProxy.isDaemon(); VectorExpression.doTransientInit(keyExpressions); List objectInspectors = new ArrayList(); @@ -1231,4 +1236,7 @@ public class VectorGroupByOperator extends Operator } } + public long getMaxMemory() { +return maxMemory; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/8703a322/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java index ffdc410..e2a593f 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java @@ -38,6 +38,8 @@ import java.util.Set; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf;
hive git commit: HIVE-20648: LLAP: Vector group by operator should use memory per executor
Repository: hive Updated Branches: refs/heads/master db04f3f9a -> 2d2ab674f HIVE-20648: LLAP: Vector group by operator should use memory per executor Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2d2ab674 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2d2ab674 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2d2ab674 Branch: refs/heads/master Commit: 2d2ab674f8acb8a4e1d0532790e6c27bd8553018 Parents: db04f3f Author: Prasanth Jayachandran Authored: Tue Oct 9 11:05:55 2018 -0700 Committer: Prasanth Jayachandran Committed: Tue Oct 9 11:05:55 2018 -0700 -- .../ql/exec/vector/VectorGroupByOperator.java | 24 +++-- .../exec/vector/TestVectorGroupByOperator.java | 96 2 files changed, 112 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/2d2ab674/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index a516d60..0d80c9e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -33,6 +33,8 @@ import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.llap.LlapUtil; +import org.apache.hadoop.hive.llap.io.api.LlapProxy; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.IConfigureJobConf; @@ -148,6 +150,7 @@ public class VectorGroupByOperator extends Operator private float memoryThreshold; + private boolean isLlap = false; /** * Interface for processing mode: global, hash, unsorted streaming, or group batch */ @@ -517,7 +520,7 @@ public class VectorGroupByOperator extends Operator aggregationBatchInfo.getAggregatorsFixedSize(); MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean(); - maxMemory = memoryMXBean.getHeapMemoryUsage().getMax(); + maxMemory = isLlap ? getConf().getMaxMemoryAvailable() : memoryMXBean.getHeapMemoryUsage().getMax(); memoryThreshold = conf.getMemoryThreshold(); // Tests may leave this unitialized, so better set it to 1 if (memoryThreshold == 0.0f) { @@ -527,13 +530,14 @@ public class VectorGroupByOperator extends Operator maxHashTblMemory = (int)(maxMemory * memoryThreshold); if (LOG.isDebugEnabled()) { -LOG.debug(String.format("maxMemory:%dMb (%d * %f) fixSize:%d (key:%d agg:%d)", -maxHashTblMemory/1024/1024, -maxMemory/1024/1024, -memoryThreshold, -fixedHashEntrySize, -keyWrappersBatch.getKeysFixedSize(), -aggregationBatchInfo.getAggregatorsFixedSize())); +LOG.debug("GBY memory limits - isLlap: {} maxMemory: {} ({} * {}) fixSize:{} (key:{} agg:{})", + isLlap, + LlapUtil.humanReadableByteCount(maxHashTblMemory), + LlapUtil.humanReadableByteCount(maxMemory), + memoryThreshold, + fixedHashEntrySize, + keyWrappersBatch.getKeysFixedSize(), + aggregationBatchInfo.getAggregatorsFixedSize()); } } @@ -977,6 +981,7 @@ public class VectorGroupByOperator extends Operator @Override protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); +isLlap = LlapProxy.isDaemon(); VectorExpression.doTransientInit(keyExpressions); List objectInspectors = new ArrayList(); @@ -1233,4 +1238,7 @@ public class VectorGroupByOperator extends Operator } } + public long getMaxMemory() { +return maxMemory; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/2d2ab674/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java index fe1375b..278f167 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java @@ -38,6 +38,8 @@ import java.util.Set; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf;
hive git commit: HIVE-20599: CAST(INTERVAL_DAY_TIME AS STRING) is throwing SemanticException (Naresh P R reviewed by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/master dab8cc012 -> 307bbca96 HIVE-20599: CAST(INTERVAL_DAY_TIME AS STRING) is throwing SemanticException (Naresh P R reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/307bbca9 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/307bbca9 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/307bbca9 Branch: refs/heads/master Commit: 307bbca96c8bc845183db3608e5ef508a17a8bf4 Parents: dab8cc0 Author: nareshpr Authored: Tue Sep 25 10:12:28 2018 -0700 Committer: Prasanth Jayachandran Committed: Tue Sep 25 10:14:26 2018 -0700 -- .../test/queries/clientpositive/udf_to_string.q | 2 ++ .../results/clientpositive/udf_to_string.q.out| 18 ++ 2 files changed, 20 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/307bbca9/ql/src/test/queries/clientpositive/udf_to_string.q -- diff --git a/ql/src/test/queries/clientpositive/udf_to_string.q b/ql/src/test/queries/clientpositive/udf_to_string.q index 818f80f..4bb6220 100644 --- a/ql/src/test/queries/clientpositive/udf_to_string.q +++ b/ql/src/test/queries/clientpositive/udf_to_string.q @@ -17,3 +17,5 @@ SELECT CAST(CAST(-3.14 AS DECIMAL(3,2)) AS STRING) FROM src tablesample (1 rows) SELECT CAST('Foo' AS STRING) FROM src tablesample (1 rows); +SELECT CAST(from_utc_timestamp(timestamp '2018-05-02 15:30:30', 'PST') - from_utc_timestamp(timestamp '1970-01-30 16:00:00', 'PST') AS STRING); +SELECT CAST(interval_year_month('1-2') AS STRING); http://git-wip-us.apache.org/repos/asf/hive/blob/307bbca9/ql/src/test/results/clientpositive/udf_to_string.q.out -- diff --git a/ql/src/test/results/clientpositive/udf_to_string.q.out b/ql/src/test/results/clientpositive/udf_to_string.q.out index bf2f72d..fc888a1 100644 --- a/ql/src/test/results/clientpositive/udf_to_string.q.out +++ b/ql/src/test/results/clientpositive/udf_to_string.q.out @@ -88,3 +88,21 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src A masked pattern was here Foo +PREHOOK: query: SELECT CAST(from_utc_timestamp(timestamp '2018-05-02 15:30:30', 'PST') - from_utc_timestamp(timestamp '1970-01-30 16:00:00', 'PST') AS STRING) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table + A masked pattern was here +POSTHOOK: query: SELECT CAST(from_utc_timestamp(timestamp '2018-05-02 15:30:30', 'PST') - from_utc_timestamp(timestamp '1970-01-30 16:00:00', 'PST') AS STRING) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table + A masked pattern was here +17624 00:30:30.0 +PREHOOK: query: SELECT CAST(interval_year_month('1-2') AS STRING) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table + A masked pattern was here +POSTHOOK: query: SELECT CAST(interval_year_month('1-2') AS STRING) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table + A masked pattern was here +1-2
hive git commit: HIVE-20599: CAST(INTERVAL_DAY_TIME AS STRING) is throwing SemanticException (Naresh P R reviewed by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/branch-3 a879b9f13 -> fdc12f38f HIVE-20599: CAST(INTERVAL_DAY_TIME AS STRING) is throwing SemanticException (Naresh P R reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fdc12f38 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fdc12f38 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fdc12f38 Branch: refs/heads/branch-3 Commit: fdc12f38f8d89e8c62308683c6b9cabe5dad57ff Parents: a879b9f Author: nareshpr Authored: Tue Sep 25 10:15:22 2018 -0700 Committer: Prasanth Jayachandran Committed: Tue Sep 25 10:15:30 2018 -0700 -- .../apache/hadoop/hive/ql/udf/UDFToString.java| 18 ++ .../test/queries/clientpositive/udf_to_string.q | 3 +++ .../results/clientpositive/udf_to_string.q.out| 18 ++ 3 files changed, 39 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/fdc12f38/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java index a16d429..2d49f21 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java @@ -24,6 +24,8 @@ import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; @@ -178,4 +180,20 @@ public class UDFToString extends UDF { t.set(bw.getBytes(),0,bw.getLength()); return t; } + + public Text evaluate(HiveIntervalDayTimeWritable hiw) { +if (null == hiw) { + return null; +} +t.set(hiw.toString()); +return t; + } + + public Text evaluate(HiveIntervalYearMonthWritable hiw) { +if (null == hiw) { + return null; +} +t.set(hiw.toString()); +return t; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/fdc12f38/ql/src/test/queries/clientpositive/udf_to_string.q -- diff --git a/ql/src/test/queries/clientpositive/udf_to_string.q b/ql/src/test/queries/clientpositive/udf_to_string.q index ac4b524..50186b8 100644 --- a/ql/src/test/queries/clientpositive/udf_to_string.q +++ b/ql/src/test/queries/clientpositive/udf_to_string.q @@ -16,3 +16,6 @@ SELECT CAST(CAST(-3.14 AS DECIMAL(3,2)) AS STRING) FROM src tablesample (1 rows) SELECT CAST('Foo' AS STRING) FROM src tablesample (1 rows); +SELECT CAST(from_utc_timestamp(timestamp '2018-05-02 15:30:30', 'PST') - from_utc_timestamp(timestamp '1970-01-30 16:00:00', 'PST') AS STRING); +SELECT CAST(interval_year_month('1-2') AS STRING); + http://git-wip-us.apache.org/repos/asf/hive/blob/fdc12f38/ql/src/test/results/clientpositive/udf_to_string.q.out -- diff --git a/ql/src/test/results/clientpositive/udf_to_string.q.out b/ql/src/test/results/clientpositive/udf_to_string.q.out index bf2f72d..fc888a1 100644 --- a/ql/src/test/results/clientpositive/udf_to_string.q.out +++ b/ql/src/test/results/clientpositive/udf_to_string.q.out @@ -88,3 +88,21 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src A masked pattern was here Foo +PREHOOK: query: SELECT CAST(from_utc_timestamp(timestamp '2018-05-02 15:30:30', 'PST') - from_utc_timestamp(timestamp '1970-01-30 16:00:00', 'PST') AS STRING) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table + A masked pattern was here +POSTHOOK: query: SELECT CAST(from_utc_timestamp(timestamp '2018-05-02 15:30:30', 'PST') - from_utc_timestamp(timestamp '1970-01-30 16:00:00', 'PST') AS STRING) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table + A masked pattern was here +17624 00:30:30.0 +PREHOOK: query: SELECT CAST(interval_year_month('1-2') AS STRING) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table + A masked pattern was here +POSTHOOK: query: SELECT CAST(interval_year_month('1-2') AS STRING) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table + A masked pattern was here +1-2
hive git commit: HIVE-18871: hive on tez execution error due to set hive.aux.jars.path to hdfs:// (zhuwei reviewed by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/branch-3 491c9f621 -> a879b9f13 HIVE-18871: hive on tez execution error due to set hive.aux.jars.path to hdfs:// (zhuwei reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a879b9f1 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a879b9f1 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a879b9f1 Branch: refs/heads/branch-3 Commit: a879b9f13e2b8815224dcc7c7514eef5cac7aaa1 Parents: 491c9f6 Author: zhuwei Authored: Mon Sep 24 10:24:27 2018 -0700 Committer: Prasanth Jayachandran Committed: Tue Sep 25 10:10:00 2018 -0700 -- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/a879b9f1/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index de0abd1..1a88b77 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -62,6 +62,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -1263,7 +1264,12 @@ public class DagUtils { return createLocalResource(destFS, dest, type, LocalResourceVisibility.PRIVATE); } try { -destFS.copyFromLocalFile(false, false, src, dest); +if (src.toUri().getScheme()!=null) { + FileUtil.copy(src.getFileSystem(conf), src, destFS, dest, false, false, conf); +} +else { + destFS.copyFromLocalFile(false, false, src, dest); +} synchronized (notifier) { notifier.notifyAll(); // Notify if we have successfully copied the file. }
hive git commit: HIVE-18871: hive on tez execution error due to set hive.aux.jars.path to hdfs:// (zhuwei reviewed by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/master ffdbee050 -> dab8cc012 HIVE-18871: hive on tez execution error due to set hive.aux.jars.path to hdfs:// (zhuwei reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dab8cc01 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dab8cc01 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dab8cc01 Branch: refs/heads/master Commit: dab8cc01297bc82e17d3e666a49e3b6392f878b4 Parents: ffdbee0 Author: zhuwei Authored: Mon Sep 24 10:24:27 2018 -0700 Committer: Prasanth Jayachandran Committed: Tue Sep 25 10:09:41 2018 -0700 -- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/dab8cc01/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index de0abd1..1a88b77 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -62,6 +62,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -1263,7 +1264,12 @@ public class DagUtils { return createLocalResource(destFS, dest, type, LocalResourceVisibility.PRIVATE); } try { -destFS.copyFromLocalFile(false, false, src, dest); +if (src.toUri().getScheme()!=null) { + FileUtil.copy(src.getFileSystem(conf), src, destFS, dest, false, false, conf); +} +else { + destFS.copyFromLocalFile(false, false, src, dest); +} synchronized (notifier) { notifier.notifyAll(); // Notify if we have successfully copied the file. }
hive git commit: HIVE-20621: GetOperationStatus called in resultset.next causing incremental slowness (Prasanth Jayachandran reviewed by Gopal V)
Repository: hive Updated Branches: refs/heads/branch-3 d8c97cf28 -> 07f2fe43d HIVE-20621: GetOperationStatus called in resultset.next causing incremental slowness (Prasanth Jayachandran reviewed by Gopal V) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/07f2fe43 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/07f2fe43 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/07f2fe43 Branch: refs/heads/branch-3 Commit: 07f2fe43d3a943995bee0e39b562777ee20f6c9c Parents: d8c97cf Author: Prasanth Jayachandran Authored: Sun Sep 23 01:24:13 2018 -0700 Committer: Prasanth Jayachandran Committed: Sun Sep 23 01:24:33 2018 -0700 -- .../java/org/apache/hive/jdbc/HiveQueryResultSet.java| 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/07f2fe43/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java -- diff --git a/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java b/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java index 953fa83..78025c5 100644 --- a/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java +++ b/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java @@ -43,6 +43,7 @@ import org.apache.hive.service.rpc.thrift.TColumnDesc; import org.apache.hive.service.rpc.thrift.TFetchOrientation; import org.apache.hive.service.rpc.thrift.TFetchResultsReq; import org.apache.hive.service.rpc.thrift.TFetchResultsResp; +import org.apache.hive.service.rpc.thrift.TGetOperationStatusResp; import org.apache.hive.service.rpc.thrift.TGetResultSetMetadataReq; import org.apache.hive.service.rpc.thrift.TGetResultSetMetadataResp; import org.apache.hive.service.rpc.thrift.TOperationHandle; @@ -77,6 +78,7 @@ public class HiveQueryResultSet extends HiveBaseResultSet { private boolean emptyResultSet = false; private boolean isScrollable = false; private boolean fetchFirst = false; + private TGetOperationStatusResp operationStatus = null; private final TProtocolVersion protocol; @@ -317,6 +319,7 @@ public class HiveQueryResultSet extends HiveBaseResultSet { stmtHandle = null; sessHandle = null; isClosed = true; +operationStatus = null; } private void closeOperationHandle(TOperationHandle stmtHandle) throws SQLException { @@ -348,13 +351,15 @@ public class HiveQueryResultSet extends HiveBaseResultSet { return false; } -/** +/* * Poll on the operation status, till the operation is complete. * We need to wait only for HiveStatement to complete. * HiveDatabaseMetaData which also uses this ResultSet returns only after the RPC is complete. */ -if ((statement != null) && (statement instanceof HiveStatement)) { - ((HiveStatement) statement).waitForOperationToComplete(); +// when isHasResultSet is set, the query transitioned from running -> complete and is not expected go back to +// running state when fetching results (implicit state transition) +if ((statement instanceof HiveStatement) && (operationStatus == null || !operationStatus.isHasResultSet())) { + operationStatus = ((HiveStatement) statement).waitForOperationToComplete(); } try {
hive git commit: HIVE-20621: GetOperationStatus called in resultset.next causing incremental slowness (Prasanth Jayachandran reviewed by Gopal V)
Repository: hive Updated Branches: refs/heads/master cdba00c96 -> c44f2b531 HIVE-20621: GetOperationStatus called in resultset.next causing incremental slowness (Prasanth Jayachandran reviewed by Gopal V) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c44f2b53 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c44f2b53 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c44f2b53 Branch: refs/heads/master Commit: c44f2b531d576182a80fb81bfaeeaf3abec2f457 Parents: cdba00c Author: Prasanth Jayachandran Authored: Sun Sep 23 01:24:13 2018 -0700 Committer: Prasanth Jayachandran Committed: Sun Sep 23 01:24:13 2018 -0700 -- .../java/org/apache/hive/jdbc/HiveQueryResultSet.java| 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/c44f2b53/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java -- diff --git a/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java b/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java index 953fa83..78025c5 100644 --- a/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java +++ b/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java @@ -43,6 +43,7 @@ import org.apache.hive.service.rpc.thrift.TColumnDesc; import org.apache.hive.service.rpc.thrift.TFetchOrientation; import org.apache.hive.service.rpc.thrift.TFetchResultsReq; import org.apache.hive.service.rpc.thrift.TFetchResultsResp; +import org.apache.hive.service.rpc.thrift.TGetOperationStatusResp; import org.apache.hive.service.rpc.thrift.TGetResultSetMetadataReq; import org.apache.hive.service.rpc.thrift.TGetResultSetMetadataResp; import org.apache.hive.service.rpc.thrift.TOperationHandle; @@ -77,6 +78,7 @@ public class HiveQueryResultSet extends HiveBaseResultSet { private boolean emptyResultSet = false; private boolean isScrollable = false; private boolean fetchFirst = false; + private TGetOperationStatusResp operationStatus = null; private final TProtocolVersion protocol; @@ -317,6 +319,7 @@ public class HiveQueryResultSet extends HiveBaseResultSet { stmtHandle = null; sessHandle = null; isClosed = true; +operationStatus = null; } private void closeOperationHandle(TOperationHandle stmtHandle) throws SQLException { @@ -348,13 +351,15 @@ public class HiveQueryResultSet extends HiveBaseResultSet { return false; } -/** +/* * Poll on the operation status, till the operation is complete. * We need to wait only for HiveStatement to complete. * HiveDatabaseMetaData which also uses this ResultSet returns only after the RPC is complete. */ -if ((statement != null) && (statement instanceof HiveStatement)) { - ((HiveStatement) statement).waitForOperationToComplete(); +// when isHasResultSet is set, the query transitioned from running -> complete and is not expected go back to +// running state when fetching results (implicit state transition) +if ((statement instanceof HiveStatement) && (operationStatus == null || !operationStatus.isHasResultSet())) { + operationStatus = ((HiveStatement) statement).waitForOperationToComplete(); } try {
hive git commit: HIVE-20267: Expanding WebUI to include form to dynamically config log levels (Zoltan Chovan reviewed by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/master 9b376a7b0 -> 208e202f5 HIVE-20267: Expanding WebUI to include form to dynamically config log levels (Zoltan Chovan reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/208e202f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/208e202f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/208e202f Branch: refs/heads/master Commit: 208e202f5b849ce62a12434da3ec648d949d40d6 Parents: 9b376a7 Author: Zoltan Chovan Authored: Wed Sep 19 09:57:52 2018 -0700 Committer: Prasanth Jayachandran Committed: Thu Sep 20 12:09:36 2018 -0700 -- .../hive-webapps/hiveserver2/hiveserver2.jsp| 1 + .../hive-webapps/hiveserver2/llap.html | 1 + .../hive-webapps/hiveserver2/logconf.jsp| 140 +++ .../resources/hive-webapps/static/js/logconf.js | 40 ++ 4 files changed, 182 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/208e202f/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp -- diff --git a/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp b/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp index 5d82029..c238369 100644 --- a/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp +++ b/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp @@ -79,6 +79,7 @@ String remoteUser = request.getRemoteUser(); Hive Configuration Stack Trace Llap Daemons +Configure logging http://git-wip-us.apache.org/repos/asf/hive/blob/208e202f/service/src/resources/hive-webapps/hiveserver2/llap.html -- diff --git a/service/src/resources/hive-webapps/hiveserver2/llap.html b/service/src/resources/hive-webapps/hiveserver2/llap.html index e1424b8..62ac1b0 100644 --- a/service/src/resources/hive-webapps/hiveserver2/llap.html +++ b/service/src/resources/hive-webapps/hiveserver2/llap.html @@ -37,6 +37,7 @@ Hive Configuration Stack Trace Llap Daemons +Configure logging http://git-wip-us.apache.org/repos/asf/hive/blob/208e202f/service/src/resources/hive-webapps/hiveserver2/logconf.jsp -- diff --git a/service/src/resources/hive-webapps/hiveserver2/logconf.jsp b/service/src/resources/hive-webapps/hiveserver2/logconf.jsp new file mode 100644 index 000..a5747c0 --- /dev/null +++ b/service/src/resources/hive-webapps/hiveserver2/logconf.jsp @@ -0,0 +1,140 @@ +<%-- +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +--%> +<%@ page contentType="text/html;charset=UTF-8" + import="org.apache.hadoop.conf.Configuration" + import="org.apache.hadoop.hive.conf.HiveConf" + import="org.apache.hadoop.hive.conf.HiveConf.ConfVars" + import="org.apache.hive.common.util.HiveVersionInfo" + import="org.apache.hive.http.HttpServer" + import="org.apache.hive.service.cli.operation.Operation" + import="org.apache.hive.service.cli.operation.SQLOperation" + import="org.apache.hadoop.hive.ql.QueryInfo" + import="org.apache.hive.service.cli.session.SessionManager" + import="org.apache.hive.service.cli.session.HiveSession" + import="javax.servlet.ServletContext" + import="java.util.Collection" + import="java.util.Date" + import="java.util.List" + import="jodd.util.HtmlEncoder" +%> + +<% +ServletContext ctx = getServletContext(); +Configuration conf = (Configuration)ctx.getAttribute("hive.conf"); +long startcode = conf.getLong("startcode", System.currentTimeMillis()); +SessionManager sessionManager = +(SessionManager)ctx.getAttribute("hive.sm"); +String
hive git commit: HIVE-20267: Expanding WebUI to include form to dynamically config log levels (Zoltan Chovan reviewed by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/branch-3 36c33ca06 -> c0a9d3ba1 HIVE-20267: Expanding WebUI to include form to dynamically config log levels (Zoltan Chovan reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c0a9d3ba Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c0a9d3ba Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c0a9d3ba Branch: refs/heads/branch-3 Commit: c0a9d3ba18dd42f315d001daa5f99b5e1aedfa52 Parents: 36c33ca Author: Zoltan Chovan Authored: Wed Sep 19 09:57:52 2018 -0700 Committer: Prasanth Jayachandran Committed: Thu Sep 20 12:10:06 2018 -0700 -- .../hive-webapps/hiveserver2/hiveserver2.jsp| 1 + .../hive-webapps/hiveserver2/llap.html | 1 + .../hive-webapps/hiveserver2/logconf.jsp| 140 +++ .../resources/hive-webapps/static/js/logconf.js | 40 ++ 4 files changed, 182 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/c0a9d3ba/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp -- diff --git a/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp b/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp index 5d82029..c238369 100644 --- a/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp +++ b/service/src/resources/hive-webapps/hiveserver2/hiveserver2.jsp @@ -79,6 +79,7 @@ String remoteUser = request.getRemoteUser(); Hive Configuration Stack Trace Llap Daemons +Configure logging http://git-wip-us.apache.org/repos/asf/hive/blob/c0a9d3ba/service/src/resources/hive-webapps/hiveserver2/llap.html -- diff --git a/service/src/resources/hive-webapps/hiveserver2/llap.html b/service/src/resources/hive-webapps/hiveserver2/llap.html index e1424b8..62ac1b0 100644 --- a/service/src/resources/hive-webapps/hiveserver2/llap.html +++ b/service/src/resources/hive-webapps/hiveserver2/llap.html @@ -37,6 +37,7 @@ Hive Configuration Stack Trace Llap Daemons +Configure logging http://git-wip-us.apache.org/repos/asf/hive/blob/c0a9d3ba/service/src/resources/hive-webapps/hiveserver2/logconf.jsp -- diff --git a/service/src/resources/hive-webapps/hiveserver2/logconf.jsp b/service/src/resources/hive-webapps/hiveserver2/logconf.jsp new file mode 100644 index 000..a5747c0 --- /dev/null +++ b/service/src/resources/hive-webapps/hiveserver2/logconf.jsp @@ -0,0 +1,140 @@ +<%-- +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +--%> +<%@ page contentType="text/html;charset=UTF-8" + import="org.apache.hadoop.conf.Configuration" + import="org.apache.hadoop.hive.conf.HiveConf" + import="org.apache.hadoop.hive.conf.HiveConf.ConfVars" + import="org.apache.hive.common.util.HiveVersionInfo" + import="org.apache.hive.http.HttpServer" + import="org.apache.hive.service.cli.operation.Operation" + import="org.apache.hive.service.cli.operation.SQLOperation" + import="org.apache.hadoop.hive.ql.QueryInfo" + import="org.apache.hive.service.cli.session.SessionManager" + import="org.apache.hive.service.cli.session.HiveSession" + import="javax.servlet.ServletContext" + import="java.util.Collection" + import="java.util.Date" + import="java.util.List" + import="jodd.util.HtmlEncoder" +%> + +<% +ServletContext ctx = getServletContext(); +Configuration conf = (Configuration)ctx.getAttribute("hive.conf"); +long startcode = conf.getLong("startcode", System.currentTimeMillis()); +SessionManager sessionManager = +(SessionManager)ctx.getAttribute("hive.sm"); +String
[1/2] hive git commit: HIVE-20011: Move away from append mode in proto logging hook (Harish JP, reviewd by Anishek Agarwal)
Repository: hive Updated Branches: refs/heads/branch-3 204a0e211 -> 36c33ca06 HIVE-20011: Move away from append mode in proto logging hook (Harish JP, reviewd by Anishek Agarwal) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/29315fcb Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/29315fcb Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/29315fcb Branch: refs/heads/branch-3 Commit: 29315fcbb53bf10af16f75ec3d36965c061eedd6 Parents: 204a0e2 Author: Anishek Agarwal Authored: Fri Jun 29 15:05:17 2018 +0530 Committer: Prasanth Jayachandran Committed: Tue Sep 18 13:19:49 2018 -0700 -- .../hive/ql/hooks/HiveProtoLoggingHook.java | 24 +--- .../logging/proto/DatePartitionedLogger.java| 18 +++ .../logging/proto/ProtoMessageReader.java | 9 +--- .../logging/proto/ProtoMessageWriter.java | 12 +- 4 files changed, 46 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/29315fcb/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java index 1e7070b..49cba4c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java @@ -86,6 +86,7 @@ import static org.apache.hadoop.hive.ql.plan.HiveOperation.UNLOCKTABLE; import java.io.IOException; import java.net.InetAddress; import java.net.UnknownHostException; +import java.time.LocalDate; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -101,6 +102,7 @@ import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.compress.utils.IOUtils; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -180,6 +182,9 @@ public class HiveProtoLoggingHook implements ExecuteWithHookContext { private final DatePartitionedLogger logger; private final ExecutorService eventHandler; private final ExecutorService logWriter; +private int logFileCount = 0; +private ProtoMessageWriter writer; +private LocalDate writerDate; EventLogger(HiveConf conf, Clock clock) { this.clock = clock; @@ -234,6 +239,7 @@ public class HiveProtoLoggingHook implements ExecuteWithHookContext { LOG.warn("Got interrupted exception while waiting for events to be flushed", e); } } + IOUtils.closeQuietly(writer); } void handle(HookContext hookContext) { @@ -285,12 +291,24 @@ public class HiveProtoLoggingHook implements ExecuteWithHookContext { private static final int MAX_RETRIES = 2; private void writeEvent(HiveHookEventProto event) { for (int retryCount = 0; retryCount <= MAX_RETRIES; ++retryCount) { -try (ProtoMessageWriter writer = logger.getWriter(logFileName)) { +try { + if (writer == null || !logger.getNow().toLocalDate().equals(writerDate)) { +if (writer != null) { + // Day change over case, reset the logFileCount. + logFileCount = 0; + IOUtils.closeQuietly(writer); +} +// increment log file count, if creating a new writer. +writer = logger.getWriter(logFileName + "_" + ++logFileCount); +writerDate = logger.getDateFromDir(writer.getPath().getParent().getName()); + } writer.writeProto(event); - // This does not work hence, opening and closing file for every event. - // writer.hflush(); + writer.hflush(); return; } catch (IOException e) { + // Something wrong with writer, lets close and reopen. + IOUtils.closeQuietly(writer); + writer = null; if (retryCount < MAX_RETRIES) { LOG.warn("Error writing proto message for query {}, eventType: {}, retryCount: {}," + " error: {} ", event.getHiveQueryId(), event.getEventType(), retryCount, http://git-wip-us.apache.org/repos/asf/hive/blob/29315fcb/ql/src/java/org/apache/tez/dag/history/logging/proto/DatePartitionedLogger.java -- diff --git a/ql/src/java/org/apache/tez/dag/history/logging/proto/DatePartitionedLogger.java b/ql/src/java/org/apache/tez/dag/history/logging/proto/DatePartitionedLogger.java index d6a5121..58cec7e 100644 ---
[2/2] hive git commit: HIVE-20582: Make hflush in hive proto logging configurable (Prasanth Jayachandran reviewed by Thejas M Nair)
HIVE-20582: Make hflush in hive proto logging configurable (Prasanth Jayachandran reviewed by Thejas M Nair) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/36c33ca0 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/36c33ca0 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/36c33ca0 Branch: refs/heads/branch-3 Commit: 36c33ca066c99dfdb21223a711c0c3f33c85b943 Parents: 29315fc Author: Prasanth Jayachandran Authored: Tue Sep 18 13:10:07 2018 -0700 Committer: Prasanth Jayachandran Committed: Tue Sep 18 13:20:02 2018 -0700 -- .../java/org/apache/hadoop/hive/conf/HiveConf.java| 3 +++ .../hadoop/hive/ql/hooks/HiveProtoLoggingHook.java| 14 +- 2 files changed, 16 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/36c33ca0/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 9523640..4ec6368 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -632,6 +632,9 @@ public class HiveConf extends Configuration { HIVE_PROTO_EVENTS_TTL("hive.hook.proto.events.ttl", "7d", new TimeValidator(TimeUnit.DAYS), "Time-To-Live (TTL) of proto event files before cleanup."), +HIVE_PROTO_FILE_PER_EVENT("hive.hook.proto.file.per.event", false, + "Whether each proto event has to be written to separate file. " + +"(Use this for FS that does not hflush immediately like S3A)"), // Hadoop Configuration Properties // Properties with null values are ignored and exist only for the purpose of giving us http://git-wip-us.apache.org/repos/asf/hive/blob/36c33ca0/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java index 49cba4c..aa3a926 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java @@ -185,6 +185,7 @@ public class HiveProtoLoggingHook implements ExecuteWithHookContext { private int logFileCount = 0; private ProtoMessageWriter writer; private LocalDate writerDate; +private boolean eventPerFile; EventLogger(HiveConf conf, Clock clock) { this.clock = clock; @@ -196,6 +197,8 @@ public class HiveProtoLoggingHook implements ExecuteWithHookContext { LOG.error(ConfVars.HIVE_PROTO_EVENTS_BASE_PATH.varname + " is not set, logging disabled."); } + eventPerFile = conf.getBoolVar(ConfVars.HIVE_PROTO_FILE_PER_EVENT); + LOG.info("Event per file enabled: {}", eventPerFile); DatePartitionedLogger tmpLogger = null; try { if (baseDir != null) { @@ -303,7 +306,16 @@ public class HiveProtoLoggingHook implements ExecuteWithHookContext { writerDate = logger.getDateFromDir(writer.getPath().getParent().getName()); } writer.writeProto(event); - writer.hflush(); + if (eventPerFile) { +if (writer != null) { + LOG.debug("Event per file enabled. Closing proto event file: {}", writer.getPath()); + IOUtils.closeQuietly(writer); +} +// rollover to next file +writer = logger.getWriter(logFileName + "_" + ++logFileCount); + } else { +writer.hflush(); + } return; } catch (IOException e) { // Something wrong with writer, lets close and reopen.
hive git commit: HIVE-20582: Make hflush in hive proto logging configurable (Prasanth Jayachandran reviewed by Thejas M Nair)
Repository: hive Updated Branches: refs/heads/master 7450ce762 -> 8ebde0441 HIVE-20582: Make hflush in hive proto logging configurable (Prasanth Jayachandran reviewed by Thejas M Nair) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8ebde044 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8ebde044 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8ebde044 Branch: refs/heads/master Commit: 8ebde04411a4ca34994019db75b57ebee9c28f71 Parents: 7450ce7 Author: Prasanth Jayachandran Authored: Tue Sep 18 13:10:07 2018 -0700 Committer: Prasanth Jayachandran Committed: Tue Sep 18 13:10:14 2018 -0700 -- .../java/org/apache/hadoop/hive/conf/HiveConf.java| 3 +++ .../hadoop/hive/ql/hooks/HiveProtoLoggingHook.java| 14 +- 2 files changed, 16 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/8ebde044/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 4fb8a30..8a561e5 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -642,6 +642,9 @@ public class HiveConf extends Configuration { HIVE_PROTO_EVENTS_TTL("hive.hook.proto.events.ttl", "7d", new TimeValidator(TimeUnit.DAYS), "Time-To-Live (TTL) of proto event files before cleanup."), +HIVE_PROTO_FILE_PER_EVENT("hive.hook.proto.file.per.event", false, + "Whether each proto event has to be written to separate file. " + +"(Use this for FS that does not hflush immediately like S3A)"), // Hadoop Configuration Properties // Properties with null values are ignored and exist only for the purpose of giving us http://git-wip-us.apache.org/repos/asf/hive/blob/8ebde044/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java index 673c858..0af30d4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java @@ -186,6 +186,7 @@ public class HiveProtoLoggingHook implements ExecuteWithHookContext { private int logFileCount = 0; private ProtoMessageWriter writer; private LocalDate writerDate; +private boolean eventPerFile; EventLogger(HiveConf conf, Clock clock) { this.clock = clock; @@ -197,6 +198,8 @@ public class HiveProtoLoggingHook implements ExecuteWithHookContext { LOG.error(ConfVars.HIVE_PROTO_EVENTS_BASE_PATH.varname + " is not set, logging disabled."); } + eventPerFile = conf.getBoolVar(ConfVars.HIVE_PROTO_FILE_PER_EVENT); + LOG.info("Event per file enabled: {}", eventPerFile); DatePartitionedLogger tmpLogger = null; try { if (baseDir != null) { @@ -289,7 +292,16 @@ public class HiveProtoLoggingHook implements ExecuteWithHookContext { writerDate = logger.getDateFromDir(writer.getPath().getParent().getName()); } writer.writeProto(event); - writer.hflush(); + if (eventPerFile) { +if (writer != null) { + LOG.debug("Event per file enabled. Closing proto event file: {}", writer.getPath()); + IOUtils.closeQuietly(writer); +} +// rollover to next file +writer = logger.getWriter(logFileName + "_" + ++logFileCount); + } else { +writer.hflush(); + } return; } catch (IOException e) { // Something wrong with writer, lets close and reopen.