This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 8969a6bd145 branch-3.0: [opt](hive) add option to get schema from
table object #50038 (#50268)
8969a6bd145 is described below
commit 8969a6bd14585040309d4217e776388f767a5744
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Apr 23 11:57:31 2025 +0800
branch-3.0: [opt](hive) add option to get schema from table object #50038
(#50268)
Cherry-picked from #50038
Co-authored-by: Mingyu Chen (Rayner) <[email protected]>
---
.../doris/datasource/hive/HMSExternalCatalog.java | 7 +++
.../doris/datasource/hive/HMSExternalTable.java | 23 +++++++-
.../hive/test_hive_get_schema_from_table.out | Bin 0 -> 6103 bytes
.../hive/test_hive_get_schema_from_table.groovy | 62 +++++++++++++++++++++
4 files changed, 89 insertions(+), 3 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
index 8032dfbb1e2..abd099894a3 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
@@ -75,6 +75,13 @@ public class HMSExternalCatalog extends ExternalCatalog {
public static final String FILE_META_CACHE_TTL_SECOND =
"file.meta.cache.ttl-second";
// broker name for file split and query scan.
public static final String BIND_BROKER_NAME = "broker.name";
+ // Default is false, if set to true, will get table schema from
"remoteTable" instead of from hive metastore.
+ // This is because for some forward compatiblity issue of hive metastore,
there maybe
+ // "storage schema reading not support" error being thrown.
+ // set this to true can avoid this error.
+ // But notice that if set to true, the default value of column will be
ignored because we cannot get default value
+ // from remoteTable object.
+ public static final String GET_SCHEMA_FROM_TABLE = "get_schema_from_table";
// -1 means file cache no ttl set
public static final int FILE_META_CACHE_NO_TTL = -1;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index cb999d79823..22a9349673b 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -75,6 +75,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.logging.log4j.LogManager;
@@ -572,9 +573,18 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
}
private Optional<SchemaCacheValue> getHiveSchema() {
- HMSCachedClient client = ((HMSExternalCatalog) catalog).getClient();
- List<FieldSchema> schema = client.getSchema(dbName, name);
- Map<String, String> colDefaultValues =
client.getDefaultColumnValues(dbName, name);
+ boolean getFromTable = catalog.getCatalogProperty()
+ .getOrDefault(HMSExternalCatalog.GET_SCHEMA_FROM_TABLE,
"false")
+ .equalsIgnoreCase("true");
+ List<FieldSchema> schema = null;
+ Map<String, String> colDefaultValues = Maps.newHashMap();
+ if (getFromTable) {
+ schema = getSchemaFromRemoteTable(remoteTable);
+ } else {
+ HMSCachedClient client = ((HMSExternalCatalog)
catalog).getClient();
+ schema = client.getSchema(dbName, name);
+ colDefaultValues = client.getDefaultColumnValues(dbName, name);
+ }
List<Column> columns = Lists.newArrayListWithCapacity(schema.size());
for (FieldSchema field : schema) {
String fieldName = field.getName().toLowerCase(Locale.ROOT);
@@ -587,6 +597,13 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
return Optional.of(new HMSSchemaCacheValue(columns, partitionColumns));
}
+ private static List<FieldSchema> getSchemaFromRemoteTable(Table table) {
+ List<FieldSchema> schema = Lists.newArrayList();
+ schema.addAll(table.getSd().getCols());
+ schema.addAll(table.getPartitionKeys());
+ return schema;
+ }
+
@Override
public long fetchRowCount() {
makeSureInitialized();
diff --git
a/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out
b/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out
new file mode 100644
index 00000000000..2e190d329f1
Binary files /dev/null and
b/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out
differ
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_get_schema_from_table.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_get_schema_from_table.groovy
new file mode 100644
index 00000000000..c07a0a763b0
--- /dev/null
+++
b/regression-test/suites/external_table_p0/hive/test_hive_get_schema_from_table.groovy
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_get_schema_from_table",
"external_docker,hive,external_docker_hive,p0,external") {
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("diable Hive test.")
+ return;
+ }
+
+ // test get scheam from table
+ for (String hivePrefix : ["hive2", "hive3"]) {
+ String catalog_name = "test_${hivePrefix}_get_schema"
+ String ex_db_name = "`default`"
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+ String hms_port = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
+ String hdfs_port = context.config.otherConfigs.get(hivePrefix +
"HdfsPort")
+
+ sql """drop catalog if exists ${catalog_name} """
+
+ sql """CREATE CATALOG ${catalog_name} PROPERTIES (
+ 'type'='hms',
+ 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}',
+ 'hadoop.username' = 'hive',
+ 'get_schema_from_table' = 'true'
+ );"""
+
+ sql """switch ${catalog_name}"""
+
+ def res_dbs_log = sql "show databases;"
+ for (int i = 0; i < res_dbs_log.size(); i++) {
+ def tbs = sql "show tables from `${res_dbs_log[i][0]}`"
+ log.info("database = ${res_dbs_log[i][0]} => tables = " +
tbs.toString())
+ }
+
+ order_qt_schema_1 """select * from
${catalog_name}.${ex_db_name}.parquet_partition_table order by l_orderkey limit
1;"""
+ order_qt_schema_2 """select * from
${catalog_name}.${ex_db_name}.parquet_delta_binary_packed order by int_value
limit 1;"""
+ order_qt_schema_3 """select * from
${catalog_name}.${ex_db_name}.parquet_alltypes_tiny_pages order by id desc
limit 5;"""
+ order_qt_schema_4 """select * from
${catalog_name}.${ex_db_name}.orc_all_types_partition order by bigint_col desc
limit 3;"""
+ order_qt_schema_5 """select * from
${catalog_name}.${ex_db_name}.csv_partition_table order by k1 limit 1;"""
+ order_qt_schema_6 """select * from
${catalog_name}.${ex_db_name}.csv_all_types limit 1;"""
+ order_qt_schema_7 """select * except(t_varchar_max_length) from
${catalog_name}.${ex_db_name}.text_all_types limit 1;"""
+
+ //sql """drop catalog if exists ${catalog_name} """
+
+ }
+}
+
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]