This is an automated email from the ASF dual-hosted git repository.
hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new bd84b5c HIVE-21971 : HS2 leaks classloader due to
`ReflectionUtils::CONSTRUCTOR_CACHE` with temporary functions + GenericUDF
(Rajesh Balamohan via Ashutosh Chauhan)
bd84b5c is described below
commit bd84b5cfdc0423463331935f85cbbed50b364e4b
Author: Rajesh Balamohan <[email protected]>
AuthorDate: Tue May 26 16:38:59 2020 -0700
HIVE-21971 : HS2 leaks classloader due to
`ReflectionUtils::CONSTRUCTOR_CACHE` with temporary functions + GenericUDF
(Rajesh Balamohan via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <[email protected]>
---
.../hadoop/hive/ql/session/SessionState.java | 22 ++++++++++
.../hadoop/hive/ql/session/TestSessionState.java | 51 ++++++++++++++++++++++
2 files changed, 73 insertions(+)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
index 55bd27e..20f352e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
@@ -25,6 +25,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.lang.management.ManagementFactory;
+import java.lang.reflect.Method;
import java.net.URI;
import java.net.URISyntaxException;
import java.security.AccessController;
@@ -106,6 +107,7 @@ import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.hive.shims.Utils;
import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.ReflectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -1808,6 +1810,26 @@ public class SessionState implements ISessionAuthState{
Hive.closeCurrent();
}
progressMonitor = null;
+ // Hadoop's ReflectionUtils caches constructors for the classes it
instantiated.
+ // In UDFs, this can result in classloaders not getting GCed for a
temporary function,
+ // resulting in a PermGen leak when used extensively from HiveServer2
+ // There are lots of places where hadoop's ReflectionUtils is still used.
Until all of them are
+ // cleared up, we would have to retain this to avoid mem leak.
+ clearReflectionUtilsCache();
+ }
+
+ private void clearReflectionUtilsCache() {
+ Method clearCacheMethod;
+ try {
+ clearCacheMethod = ReflectionUtils.class.getDeclaredMethod("clearCache");
+ if (clearCacheMethod != null) {
+ clearCacheMethod.setAccessible(true);
+ clearCacheMethod.invoke(null);
+ LOG.debug("Cleared Hadoop ReflectionUtils CONSTRUCTOR_CACHE");
+ }
+ } catch (Exception e) {
+ LOG.info("Failed to clear up Hadoop ReflectionUtils CONSTRUCTOR_CACHE",
e);
+ }
}
private void unCacheDataNucleusClassLoaders() {
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java
b/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java
index 0fa1c81..4c374e8 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java
@@ -24,9 +24,12 @@ import static org.junit.Assert.fail;
import java.io.File;
import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.util.Arrays;
import java.util.Collection;
+import java.util.Map;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.fs.FileSystem;
@@ -34,6 +37,12 @@ import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.ParentNotDirectoryException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.util.ReflectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -221,6 +230,48 @@ public class TestSessionState {
}
}
+ static class DummyUDF extends GenericUDF {
+
+ @Override public ObjectInspector initialize(ObjectInspector[] arguments)
+ throws UDFArgumentException {
+ return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+ }
+
+ @Override public Object evaluate(DeferredObject[] arguments) throws
HiveException {
+ return "dummy";
+ }
+
+ @Override public String getDisplayString(String[] children) {
+ return "dummy";
+ }
+ }
+
+
+ private Map getReflectionUtilsCache() {
+ Field constructorCache;
+ try {
+ constructorCache =
ReflectionUtils.class.getDeclaredField("CONSTRUCTOR_CACHE");
+ if (constructorCache != null) {
+ constructorCache.setAccessible(true);
+ return (Map)constructorCache.get(new ReflectionUtils());
+ }
+ } catch (Exception e) {
+ LOG.info("Failed to get Hadoop ReflectionUtils CONSTRUCTOR_CACHE", e);
+ }
+ return null;
+ }
+
+ @Test
+ public void testReflectionCleanup() throws Exception {
+ SessionState ss = SessionState.get();
+ assertNull(ss.getTezSession());
+ ReflectionUtils.newInstance(DummyUDF.class, null);
+ ss.close();
+ Map cache = getReflectionUtilsCache();
+ assertTrue("Cache can't be null", (cache != null));
+ assertEquals("Size of cache is " + cache.size(), 0, cache.size());
+ }
+
@Test
public void testReloadExistingAuxJars2() {
HiveConf conf = new HiveConf();