This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 096839560b4 [opt](hive) simplify the isSplitable method to avoid too many hadoop metrics (#33242) 096839560b4 is described below commit 096839560b4fa2156e040143308859d3916edc3d Author: Mingyu Chen <morning...@163.com> AuthorDate: Thu Apr 4 08:18:07 2024 +0800 [opt](hive) simplify the isSplitable method to avoid too many hadoop metrics (#33242) The inputFormat.isSplitable() method will create FileSystem. Each FileSystem will register a lot hadoop metrics, which will take a lot memory. This PR simplify it to avoid calling inputFormat.isSplitable(). Only for branch-2.0 --- .../doris/catalog/external/HMSExternalTable.java | 6 ++-- .../apache/doris/external/hive/util/HiveUtil.java | 38 +++------------------- 2 files changed, 7 insertions(+), 37 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java index 2dc1c0d6e48..a268916d33d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java @@ -79,7 +79,7 @@ import java.util.stream.Collectors; public class HMSExternalTable extends ExternalTable { private static final Logger LOG = LogManager.getLogger(HMSExternalTable.class); - private static final Set<String> SUPPORTED_HIVE_FILE_FORMATS; + public static final Set<String> SUPPORTED_HIVE_FILE_FORMATS; private static final Set<String> SUPPORTED_HIVE_TRANSACTIONAL_FILE_FORMATS; private static final String TBL_PROP_TXN_PROPERTIES = "transactional_properties"; @@ -712,13 +712,13 @@ public class HMSExternalTable extends ExternalTable { @Override public boolean isDistributionColumn(String columnName) { return getRemoteTable().getSd().getBucketCols().stream().map(String::toLowerCase) - .collect(Collectors.toSet()).contains(columnName.toLowerCase()); + .collect(Collectors.toSet()).contains(columnName.toLowerCase()); } @Override public Set<String> getDistributionColumnNames() { return getRemoteTable().getSd().getBucketCols().stream().map(String::toLowerCase) - .collect(Collectors.toSet()); + .collect(Collectors.toSet()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java b/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java index deb048b5943..f0f013fa1f0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java @@ -21,16 +21,14 @@ import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.Type; +import org.apache.doris.catalog.external.HMSExternalTable; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.UserException; -import org.apache.doris.fs.FileSystemFactory; import org.apache.doris.fs.remote.BrokerFileSystem; import org.apache.doris.fs.remote.RemoteFileSystem; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat; import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat; @@ -46,10 +44,7 @@ import org.apache.hadoop.util.ReflectionUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import java.io.IOException; import java.io.UnsupportedEncodingException; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; import java.net.URLDecoder; import java.nio.charset.StandardCharsets; import java.util.List; @@ -169,7 +164,7 @@ public final class HiveUtil { return Type.DECIMALV2; default: throw new UnsupportedOperationException("Unsupported type: " - + primitiveTypeInfo.getPrimitiveCategory()); + + primitiveTypeInfo.getPrimitiveCategory()); } } case LIST: @@ -191,38 +186,13 @@ public final class HiveUtil { } public static boolean isSplittable(RemoteFileSystem remoteFileSystem, InputFormat<?, ?> inputFormat, - String location, JobConf jobConf) throws UserException { + String location, JobConf jobConf) throws UserException { if (remoteFileSystem instanceof BrokerFileSystem) { return ((BrokerFileSystem) remoteFileSystem) .isSplittable(location, inputFormat.getClass().getCanonicalName()); } - // ORC uses a custom InputFormat but is always splittable - if (inputFormat.getClass().getSimpleName().equals("OrcInputFormat")) { - return true; - } - // use reflection to get isSplitable method on FileInputFormat - // ATTN: the method name is actually "isSplitable", but the right spell is "isSplittable" - Method method = null; - for (Class<?> clazz = inputFormat.getClass(); clazz != null; clazz = clazz.getSuperclass()) { - try { - method = clazz.getDeclaredMethod("isSplitable", FileSystem.class, Path.class); - break; - } catch (NoSuchMethodException ignored) { - LOG.debug("Class {} doesn't contain isSplitable method.", clazz); - } - } - - if (method == null) { - return false; - } - Path path = new Path(location); - try { - method.setAccessible(true); - return (boolean) method.invoke(inputFormat, FileSystemFactory.getNativeByPath(path, jobConf), path); - } catch (InvocationTargetException | IllegalAccessException | IOException e) { - throw new RuntimeException(e); - } + return HMSExternalTable.SUPPORTED_HIVE_FILE_FORMATS.contains(inputFormat); } public static String getHivePartitionValue(String part) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org