This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 096839560b4 [opt](hive) simplify the isSplitable method to avoid too
many hadoop metrics (#33242)
096839560b4 is described below
commit 096839560b4fa2156e040143308859d3916edc3d
Author: Mingyu Chen <[email protected]>
AuthorDate: Thu Apr 4 08:18:07 2024 +0800
[opt](hive) simplify the isSplitable method to avoid too many hadoop
metrics (#33242)
The inputFormat.isSplitable() method will create FileSystem.
Each FileSystem will register a lot hadoop metrics, which will take a lot
memory.
This PR simplify it to avoid calling inputFormat.isSplitable().
Only for branch-2.0
---
.../doris/catalog/external/HMSExternalTable.java | 6 ++--
.../apache/doris/external/hive/util/HiveUtil.java | 38 +++-------------------
2 files changed, 7 insertions(+), 37 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
index 2dc1c0d6e48..a268916d33d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
@@ -79,7 +79,7 @@ import java.util.stream.Collectors;
public class HMSExternalTable extends ExternalTable {
private static final Logger LOG =
LogManager.getLogger(HMSExternalTable.class);
- private static final Set<String> SUPPORTED_HIVE_FILE_FORMATS;
+ public static final Set<String> SUPPORTED_HIVE_FILE_FORMATS;
private static final Set<String> SUPPORTED_HIVE_TRANSACTIONAL_FILE_FORMATS;
private static final String TBL_PROP_TXN_PROPERTIES =
"transactional_properties";
@@ -712,13 +712,13 @@ public class HMSExternalTable extends ExternalTable {
@Override
public boolean isDistributionColumn(String columnName) {
return
getRemoteTable().getSd().getBucketCols().stream().map(String::toLowerCase)
- .collect(Collectors.toSet()).contains(columnName.toLowerCase());
+
.collect(Collectors.toSet()).contains(columnName.toLowerCase());
}
@Override
public Set<String> getDistributionColumnNames() {
return
getRemoteTable().getSd().getBucketCols().stream().map(String::toLowerCase)
- .collect(Collectors.toSet());
+ .collect(Collectors.toSet());
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java
index deb048b5943..f0f013fa1f0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java
@@ -21,16 +21,14 @@ import org.apache.doris.catalog.ArrayType;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.ScalarType;
import org.apache.doris.catalog.Type;
+import org.apache.doris.catalog.external.HMSExternalTable;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.UserException;
-import org.apache.doris.fs.FileSystemFactory;
import org.apache.doris.fs.remote.BrokerFileSystem;
import org.apache.doris.fs.remote.RemoteFileSystem;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat;
import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat;
@@ -46,10 +44,7 @@ import org.apache.hadoop.util.ReflectionUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
-import java.io.IOException;
import java.io.UnsupportedEncodingException;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.List;
@@ -169,7 +164,7 @@ public final class HiveUtil {
return Type.DECIMALV2;
default:
throw new UnsupportedOperationException("Unsupported
type: "
- + primitiveTypeInfo.getPrimitiveCategory());
+ + primitiveTypeInfo.getPrimitiveCategory());
}
}
case LIST:
@@ -191,38 +186,13 @@ public final class HiveUtil {
}
public static boolean isSplittable(RemoteFileSystem remoteFileSystem,
InputFormat<?, ?> inputFormat,
- String location, JobConf jobConf)
throws UserException {
+ String location, JobConf jobConf) throws UserException {
if (remoteFileSystem instanceof BrokerFileSystem) {
return ((BrokerFileSystem) remoteFileSystem)
.isSplittable(location,
inputFormat.getClass().getCanonicalName());
}
- // ORC uses a custom InputFormat but is always splittable
- if (inputFormat.getClass().getSimpleName().equals("OrcInputFormat")) {
- return true;
- }
- // use reflection to get isSplitable method on FileInputFormat
- // ATTN: the method name is actually "isSplitable", but the right
spell is "isSplittable"
- Method method = null;
- for (Class<?> clazz = inputFormat.getClass(); clazz != null; clazz =
clazz.getSuperclass()) {
- try {
- method = clazz.getDeclaredMethod("isSplitable",
FileSystem.class, Path.class);
- break;
- } catch (NoSuchMethodException ignored) {
- LOG.debug("Class {} doesn't contain isSplitable method.",
clazz);
- }
- }
-
- if (method == null) {
- return false;
- }
- Path path = new Path(location);
- try {
- method.setAccessible(true);
- return (boolean) method.invoke(inputFormat,
FileSystemFactory.getNativeByPath(path, jobConf), path);
- } catch (InvocationTargetException | IllegalAccessException |
IOException e) {
- throw new RuntimeException(e);
- }
+ return
HMSExternalTable.SUPPORTED_HIVE_FILE_FORMATS.contains(inputFormat);
}
public static String getHivePartitionValue(String part) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]