This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 8338cad5a [hive] Cache TableSchema into Configuration to avoid loading
read scheme file in PaimonSerDe (#2946)
8338cad5a is described below
commit 8338cad5afb6f5b1d225791d7d6939f622bcb4bb
Author: wgcn <[email protected]>
AuthorDate: Fri Mar 15 16:10:37 2024 +0800
[hive] Cache TableSchema into Configuration to avoid loading read scheme
file in PaimonSerDe (#2946)
---
.../org/apache/paimon/utils/JsonSerdeUtil.java | 8 ++++++++
.../java/org/apache/paimon/hive/HiveSchema.java | 2 +-
.../java/org/apache/paimon/hive/PaimonSerDe.java | 20 ++++++++++++++++---
.../apache/paimon/hive/PaimonStorageHandler.java | 15 +++++++++++---
.../apache/paimon/hive/HiveTableSchemaTest.java | 23 ++++++++++++++++++++++
5 files changed, 61 insertions(+), 7 deletions(-)
diff --git
a/paimon-core/src/main/java/org/apache/paimon/utils/JsonSerdeUtil.java
b/paimon-core/src/main/java/org/apache/paimon/utils/JsonSerdeUtil.java
index d45a93368..676276a30 100644
--- a/paimon-core/src/main/java/org/apache/paimon/utils/JsonSerdeUtil.java
+++ b/paimon-core/src/main/java/org/apache/paimon/utils/JsonSerdeUtil.java
@@ -120,6 +120,14 @@ public class JsonSerdeUtil {
fieldName, clazz.getName(),
node.getClass().getName()));
}
+ public static <T> T fromJson(String json, TypeReference<T> typeReference) {
+ try {
+ return OBJECT_MAPPER_INSTANCE.readValue(json, typeReference);
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ }
+
public static <T> T fromJson(String json, Class<T> clazz) {
try {
return OBJECT_MAPPER_INSTANCE.reader().readValue(json, clazz);
diff --git
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/HiveSchema.java
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/HiveSchema.java
index dc6fc9921..f63765141 100644
---
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/HiveSchema.java
+++
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/HiveSchema.java
@@ -67,7 +67,7 @@ public class HiveSchema {
private static final Logger LOG =
LoggerFactory.getLogger(HiveSchema.class);
private final RowType rowType;
- private HiveSchema(RowType rowType) {
+ HiveSchema(RowType rowType) {
this.rowType = rowType;
}
diff --git
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonSerDe.java
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonSerDe.java
index 2934b7e77..fe6a31b53 100644
---
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonSerDe.java
+++
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonSerDe.java
@@ -19,8 +19,12 @@
package org.apache.paimon.hive;
import org.apache.paimon.hive.objectinspector.PaimonInternalRowObjectInspector;
+import org.apache.paimon.types.DataField;
+import org.apache.paimon.types.RowType;
+import org.apache.paimon.utils.JsonSerdeUtil;
import org.apache.paimon.shade.guava30.com.google.common.collect.Maps;
+import
org.apache.paimon.shade.jackson2.com.fasterxml.jackson.core.type.TypeReference;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
@@ -32,6 +36,7 @@ import org.apache.hadoop.io.Writable;
import javax.annotation.Nullable;
+import java.util.List;
import java.util.Map;
import java.util.Properties;
@@ -53,11 +58,20 @@ public class PaimonSerDe extends AbstractSerDe {
@Override
public void initialize(@Nullable Configuration configuration, Properties
properties)
throws SerDeException {
- HiveSchema schema = HiveSchema.extract(configuration, properties);
- this.tableSchema = schema;
+ String dataFieldStr =
properties.getProperty(PaimonStorageHandler.PAIMON_TABLE_FIELDS);
+ if (dataFieldStr != null) {
+ List<DataField> dataFields =
+ JsonSerdeUtil.fromJson(dataFieldStr, new
TypeReference<List<DataField>>() {});
+ this.tableSchema = new HiveSchema(new RowType(dataFields));
+ } else {
+ this.tableSchema = HiveSchema.extract(configuration, properties);
+ }
+
inspector =
new PaimonInternalRowObjectInspector(
- schema.fieldNames(), schema.fieldTypes(),
schema.fieldComments());
+ tableSchema.fieldNames(),
+ tableSchema.fieldTypes(),
+ tableSchema.fieldComments());
}
@Override
diff --git
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonStorageHandler.java
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonStorageHandler.java
index b410833a9..5987fd0c9 100644
---
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonStorageHandler.java
+++
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonStorageHandler.java
@@ -21,6 +21,7 @@ package org.apache.paimon.hive;
import org.apache.paimon.hive.mapred.PaimonInputFormat;
import org.apache.paimon.hive.mapred.PaimonOutputCommitter;
import org.apache.paimon.hive.mapred.PaimonOutputFormat;
+import org.apache.paimon.utils.JsonSerdeUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
@@ -46,6 +47,8 @@ public class PaimonStorageHandler implements
HiveStoragePredicateHandler, HiveSt
private static final String MAPRED_OUTPUT_COMMITTER =
"mapred.output.committer.class";
private static final String PAIMON_WRITE = "paimon.write";
+ public static final String PAIMON_TABLE_FIELDS = "paimon.table.fields";
+
private Configuration conf;
@Override
@@ -76,9 +79,15 @@ public class PaimonStorageHandler implements
HiveStoragePredicateHandler, HiveSt
@Override
public void configureInputJobProperties(TableDesc tableDesc, Map<String,
String> map) {
Properties properties = tableDesc.getProperties();
- map.put(
- LocationKeyExtractor.INTERNAL_LOCATION,
- LocationKeyExtractor.getPaimonLocation(conf, properties));
+ String paimonLocation = LocationKeyExtractor.getPaimonLocation(conf,
properties);
+ map.put(LocationKeyExtractor.INTERNAL_LOCATION, paimonLocation);
+ String dataFieldJsonStr = getDataFieldsJsonStr(properties);
+ tableDesc.getProperties().put(PAIMON_TABLE_FIELDS, dataFieldJsonStr);
+ }
+
+ static String getDataFieldsJsonStr(Properties properties) {
+ HiveSchema hiveSchema = HiveSchema.extract(null, properties);
+ return JsonSerdeUtil.toJson(hiveSchema.fields());
}
public void configureInputJobCredentials(TableDesc tableDesc, Map<String,
String> map) {}
diff --git
a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java
b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java
index 326667a47..07cd00c8e 100644
---
a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java
+++
b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java
@@ -25,7 +25,11 @@ import org.apache.paimon.schema.SchemaManager;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.DataTypes;
import org.apache.paimon.types.RowType;
+import org.apache.paimon.utils.JsonSerdeUtil;
+import
org.apache.paimon.shade.jackson2.com.fasterxml.jackson.core.type.TypeReference;
+
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
@@ -33,6 +37,7 @@ import org.junit.jupiter.api.io.TempDir;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
+import java.util.List;
import java.util.Properties;
import static org.assertj.core.api.Assertions.assertThat;
@@ -342,4 +347,22 @@ public class HiveTableSchemaTest {
properties.setProperty("location", tempDir.toString());
return properties;
}
+
+ @Test
+ public void testReadHiveSchemaFromProperties() throws Exception {
+ createSchema();
+ // cache the TableSchema to properties
+ Properties properties = new Properties();
+ properties.put(hive_metastoreConstants.META_TABLE_LOCATION,
tempDir.toString());
+
+ HiveSchema hiveSchema = HiveSchema.extract(null, properties);
+
+ List<DataField> dataFields = hiveSchema.fields();
+ String dataFieldStr = JsonSerdeUtil.toJson(dataFields);
+
+ List<DataField> dataFieldsDeserialized =
+ JsonSerdeUtil.fromJson(dataFieldStr, new
TypeReference<List<DataField>>() {});
+ HiveSchema newHiveSchema = new HiveSchema(new
RowType(dataFieldsDeserialized));
+
assertThat(newHiveSchema).usingRecursiveComparison().isEqualTo(hiveSchema);
+ }
}