This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 096839560b4 [opt](hive) simplify the isSplitable method to avoid too 
many hadoop metrics (#33242)
096839560b4 is described below

commit 096839560b4fa2156e040143308859d3916edc3d
Author: Mingyu Chen <morning...@163.com>
AuthorDate: Thu Apr 4 08:18:07 2024 +0800

    [opt](hive) simplify the isSplitable method to avoid too many hadoop 
metrics (#33242)
    
    The inputFormat.isSplitable() method will create FileSystem.
    Each FileSystem will register a lot hadoop metrics, which will take a lot 
memory.
    This PR simplify it to avoid calling inputFormat.isSplitable().
    
    Only for branch-2.0
---
 .../doris/catalog/external/HMSExternalTable.java   |  6 ++--
 .../apache/doris/external/hive/util/HiveUtil.java  | 38 +++-------------------
 2 files changed, 7 insertions(+), 37 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
index 2dc1c0d6e48..a268916d33d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
@@ -79,7 +79,7 @@ import java.util.stream.Collectors;
 public class HMSExternalTable extends ExternalTable {
     private static final Logger LOG = 
LogManager.getLogger(HMSExternalTable.class);
 
-    private static final Set<String> SUPPORTED_HIVE_FILE_FORMATS;
+    public static final Set<String> SUPPORTED_HIVE_FILE_FORMATS;
     private static final Set<String> SUPPORTED_HIVE_TRANSACTIONAL_FILE_FORMATS;
 
     private static final String TBL_PROP_TXN_PROPERTIES = 
"transactional_properties";
@@ -712,13 +712,13 @@ public class HMSExternalTable extends ExternalTable {
     @Override
     public boolean isDistributionColumn(String columnName) {
         return 
getRemoteTable().getSd().getBucketCols().stream().map(String::toLowerCase)
-            .collect(Collectors.toSet()).contains(columnName.toLowerCase());
+                
.collect(Collectors.toSet()).contains(columnName.toLowerCase());
     }
 
     @Override
     public Set<String> getDistributionColumnNames() {
         return 
getRemoteTable().getSd().getBucketCols().stream().map(String::toLowerCase)
-            .collect(Collectors.toSet());
+                .collect(Collectors.toSet());
     }
 }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java
index deb048b5943..f0f013fa1f0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java
@@ -21,16 +21,14 @@ import org.apache.doris.catalog.ArrayType;
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.ScalarType;
 import org.apache.doris.catalog.Type;
+import org.apache.doris.catalog.external.HMSExternalTable;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.UserException;
-import org.apache.doris.fs.FileSystemFactory;
 import org.apache.doris.fs.remote.BrokerFileSystem;
 import org.apache.doris.fs.remote.RemoteFileSystem;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat;
 import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat;
@@ -46,10 +44,7 @@ import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
-import java.io.IOException;
 import java.io.UnsupportedEncodingException;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
 import java.net.URLDecoder;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
@@ -169,7 +164,7 @@ public final class HiveUtil {
                         return Type.DECIMALV2;
                     default:
                         throw new UnsupportedOperationException("Unsupported 
type: "
-                            + primitiveTypeInfo.getPrimitiveCategory());
+                                + primitiveTypeInfo.getPrimitiveCategory());
                 }
             }
             case LIST:
@@ -191,38 +186,13 @@ public final class HiveUtil {
     }
 
     public static boolean isSplittable(RemoteFileSystem remoteFileSystem, 
InputFormat<?, ?> inputFormat,
-                                       String location, JobConf jobConf) 
throws UserException {
+            String location, JobConf jobConf) throws UserException {
         if (remoteFileSystem instanceof BrokerFileSystem) {
             return ((BrokerFileSystem) remoteFileSystem)
                     .isSplittable(location, 
inputFormat.getClass().getCanonicalName());
         }
 
-        // ORC uses a custom InputFormat but is always splittable
-        if (inputFormat.getClass().getSimpleName().equals("OrcInputFormat")) {
-            return true;
-        }
-        // use reflection to get isSplitable method on FileInputFormat
-        // ATTN: the method name is actually "isSplitable", but the right 
spell is "isSplittable"
-        Method method = null;
-        for (Class<?> clazz = inputFormat.getClass(); clazz != null; clazz = 
clazz.getSuperclass()) {
-            try {
-                method = clazz.getDeclaredMethod("isSplitable", 
FileSystem.class, Path.class);
-                break;
-            } catch (NoSuchMethodException ignored) {
-                LOG.debug("Class {} doesn't contain isSplitable method.", 
clazz);
-            }
-        }
-
-        if (method == null) {
-            return false;
-        }
-        Path path = new Path(location);
-        try {
-            method.setAccessible(true);
-            return (boolean) method.invoke(inputFormat, 
FileSystemFactory.getNativeByPath(path, jobConf), path);
-        } catch (InvocationTargetException | IllegalAccessException | 
IOException e) {
-            throw new RuntimeException(e);
-        }
+        return 
HMSExternalTable.SUPPORTED_HIVE_FILE_FORMATS.contains(inputFormat);
     }
 
     public static String getHivePartitionValue(String part) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to