This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new bd84b5c  HIVE-21971 : HS2 leaks classloader due to 
`ReflectionUtils::CONSTRUCTOR_CACHE` with temporary functions + GenericUDF 
(Rajesh Balamohan via Ashutosh Chauhan)
bd84b5c is described below

commit bd84b5cfdc0423463331935f85cbbed50b364e4b
Author: Rajesh Balamohan <[email protected]>
AuthorDate: Tue May 26 16:38:59 2020 -0700

    HIVE-21971 : HS2 leaks classloader due to 
`ReflectionUtils::CONSTRUCTOR_CACHE` with temporary functions + GenericUDF 
(Rajesh Balamohan via Ashutosh Chauhan)
    
    Signed-off-by: Ashutosh Chauhan <[email protected]>
---
 .../hadoop/hive/ql/session/SessionState.java       | 22 ++++++++++
 .../hadoop/hive/ql/session/TestSessionState.java   | 51 ++++++++++++++++++++++
 2 files changed, 73 insertions(+)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java 
b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
index 55bd27e..20f352e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
@@ -25,6 +25,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.PrintStream;
 import java.lang.management.ManagementFactory;
+import java.lang.reflect.Method;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.security.AccessController;
@@ -106,6 +107,7 @@ import org.apache.hadoop.hive.shims.HadoopShims;
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.hive.shims.Utils;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.ReflectionUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -1808,6 +1810,26 @@ public class SessionState implements ISessionAuthState{
       Hive.closeCurrent();
     }
     progressMonitor = null;
+    // Hadoop's ReflectionUtils caches constructors for the classes it 
instantiated.
+    // In UDFs, this can result in classloaders not getting GCed for a 
temporary function,
+    // resulting in a PermGen leak when used extensively from HiveServer2
+    // There are lots of places where hadoop's ReflectionUtils is still used. 
Until all of them are
+    // cleared up, we would have to retain this to avoid mem leak.
+    clearReflectionUtilsCache();
+  }
+
+  private void clearReflectionUtilsCache() {
+    Method clearCacheMethod;
+    try {
+      clearCacheMethod = ReflectionUtils.class.getDeclaredMethod("clearCache");
+      if (clearCacheMethod != null) {
+        clearCacheMethod.setAccessible(true);
+        clearCacheMethod.invoke(null);
+        LOG.debug("Cleared Hadoop ReflectionUtils CONSTRUCTOR_CACHE");
+      }
+    } catch (Exception e) {
+      LOG.info("Failed to clear up Hadoop ReflectionUtils CONSTRUCTOR_CACHE", 
e);
+    }
   }
 
   private void unCacheDataNucleusClassLoaders() {
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java 
b/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java
index 0fa1c81..4c374e8 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java
@@ -24,9 +24,12 @@ import static org.junit.Assert.fail;
 
 import java.io.File;
 import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
 import java.lang.reflect.Method;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Map;
 
 import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.fs.FileSystem;
@@ -34,6 +37,12 @@ import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.ParentNotDirectoryException;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.util.ReflectionUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -221,6 +230,48 @@ public class TestSessionState {
     }
   }
 
+  static class DummyUDF extends GenericUDF {
+
+    @Override public ObjectInspector initialize(ObjectInspector[] arguments)
+        throws UDFArgumentException {
+      return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+    }
+
+    @Override public Object evaluate(DeferredObject[] arguments) throws 
HiveException {
+      return "dummy";
+    }
+
+    @Override public String getDisplayString(String[] children) {
+      return "dummy";
+    }
+  }
+
+
+  private Map getReflectionUtilsCache() {
+    Field constructorCache;
+    try {
+      constructorCache = 
ReflectionUtils.class.getDeclaredField("CONSTRUCTOR_CACHE");
+      if (constructorCache != null) {
+        constructorCache.setAccessible(true);
+        return (Map)constructorCache.get(new ReflectionUtils());
+      }
+    } catch (Exception e) {
+      LOG.info("Failed to get Hadoop ReflectionUtils CONSTRUCTOR_CACHE", e);
+    }
+    return null;
+  }
+
+  @Test
+  public void testReflectionCleanup() throws Exception {
+    SessionState ss = SessionState.get();
+    assertNull(ss.getTezSession());
+    ReflectionUtils.newInstance(DummyUDF.class, null);
+    ss.close();
+    Map cache = getReflectionUtilsCache();
+    assertTrue("Cache can't be null", (cache != null));
+    assertEquals("Size of cache is " + cache.size(), 0, cache.size());
+  }
+
   @Test
   public void testReloadExistingAuxJars2() {
     HiveConf conf = new HiveConf();

Reply via email to