This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 8338cad5a [hive] Cache TableSchema into Configuration to avoid loading 
read scheme file in PaimonSerDe (#2946)
8338cad5a is described below

commit 8338cad5afb6f5b1d225791d7d6939f622bcb4bb
Author: wgcn <[email protected]>
AuthorDate: Fri Mar 15 16:10:37 2024 +0800

    [hive] Cache TableSchema into Configuration to avoid loading read scheme 
file in PaimonSerDe (#2946)
---
 .../org/apache/paimon/utils/JsonSerdeUtil.java     |  8 ++++++++
 .../java/org/apache/paimon/hive/HiveSchema.java    |  2 +-
 .../java/org/apache/paimon/hive/PaimonSerDe.java   | 20 ++++++++++++++++---
 .../apache/paimon/hive/PaimonStorageHandler.java   | 15 +++++++++++---
 .../apache/paimon/hive/HiveTableSchemaTest.java    | 23 ++++++++++++++++++++++
 5 files changed, 61 insertions(+), 7 deletions(-)

diff --git 
a/paimon-core/src/main/java/org/apache/paimon/utils/JsonSerdeUtil.java 
b/paimon-core/src/main/java/org/apache/paimon/utils/JsonSerdeUtil.java
index d45a93368..676276a30 100644
--- a/paimon-core/src/main/java/org/apache/paimon/utils/JsonSerdeUtil.java
+++ b/paimon-core/src/main/java/org/apache/paimon/utils/JsonSerdeUtil.java
@@ -120,6 +120,14 @@ public class JsonSerdeUtil {
                         fieldName, clazz.getName(), 
node.getClass().getName()));
     }
 
+    public static <T> T fromJson(String json, TypeReference<T> typeReference) {
+        try {
+            return OBJECT_MAPPER_INSTANCE.readValue(json, typeReference);
+        } catch (IOException e) {
+            throw new UncheckedIOException(e);
+        }
+    }
+
     public static <T> T fromJson(String json, Class<T> clazz) {
         try {
             return OBJECT_MAPPER_INSTANCE.reader().readValue(json, clazz);
diff --git 
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/HiveSchema.java
 
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/HiveSchema.java
index dc6fc9921..f63765141 100644
--- 
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/HiveSchema.java
+++ 
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/HiveSchema.java
@@ -67,7 +67,7 @@ public class HiveSchema {
     private static final Logger LOG = 
LoggerFactory.getLogger(HiveSchema.class);
     private final RowType rowType;
 
-    private HiveSchema(RowType rowType) {
+    HiveSchema(RowType rowType) {
         this.rowType = rowType;
     }
 
diff --git 
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonSerDe.java
 
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonSerDe.java
index 2934b7e77..fe6a31b53 100644
--- 
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonSerDe.java
+++ 
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonSerDe.java
@@ -19,8 +19,12 @@
 package org.apache.paimon.hive;
 
 import org.apache.paimon.hive.objectinspector.PaimonInternalRowObjectInspector;
+import org.apache.paimon.types.DataField;
+import org.apache.paimon.types.RowType;
+import org.apache.paimon.utils.JsonSerdeUtil;
 
 import org.apache.paimon.shade.guava30.com.google.common.collect.Maps;
+import 
org.apache.paimon.shade.jackson2.com.fasterxml.jackson.core.type.TypeReference;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.serde2.AbstractSerDe;
@@ -32,6 +36,7 @@ import org.apache.hadoop.io.Writable;
 
 import javax.annotation.Nullable;
 
+import java.util.List;
 import java.util.Map;
 import java.util.Properties;
 
@@ -53,11 +58,20 @@ public class PaimonSerDe extends AbstractSerDe {
     @Override
     public void initialize(@Nullable Configuration configuration, Properties 
properties)
             throws SerDeException {
-        HiveSchema schema = HiveSchema.extract(configuration, properties);
-        this.tableSchema = schema;
+        String dataFieldStr = 
properties.getProperty(PaimonStorageHandler.PAIMON_TABLE_FIELDS);
+        if (dataFieldStr != null) {
+            List<DataField> dataFields =
+                    JsonSerdeUtil.fromJson(dataFieldStr, new 
TypeReference<List<DataField>>() {});
+            this.tableSchema = new HiveSchema(new RowType(dataFields));
+        } else {
+            this.tableSchema = HiveSchema.extract(configuration, properties);
+        }
+
         inspector =
                 new PaimonInternalRowObjectInspector(
-                        schema.fieldNames(), schema.fieldTypes(), 
schema.fieldComments());
+                        tableSchema.fieldNames(),
+                        tableSchema.fieldTypes(),
+                        tableSchema.fieldComments());
     }
 
     @Override
diff --git 
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonStorageHandler.java
 
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonStorageHandler.java
index b410833a9..5987fd0c9 100644
--- 
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonStorageHandler.java
+++ 
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/PaimonStorageHandler.java
@@ -21,6 +21,7 @@ package org.apache.paimon.hive;
 import org.apache.paimon.hive.mapred.PaimonInputFormat;
 import org.apache.paimon.hive.mapred.PaimonOutputCommitter;
 import org.apache.paimon.hive.mapred.PaimonOutputFormat;
+import org.apache.paimon.utils.JsonSerdeUtil;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.metastore.HiveMetaHook;
@@ -46,6 +47,8 @@ public class PaimonStorageHandler implements 
HiveStoragePredicateHandler, HiveSt
     private static final String MAPRED_OUTPUT_COMMITTER = 
"mapred.output.committer.class";
     private static final String PAIMON_WRITE = "paimon.write";
 
+    public static final String PAIMON_TABLE_FIELDS = "paimon.table.fields";
+
     private Configuration conf;
 
     @Override
@@ -76,9 +79,15 @@ public class PaimonStorageHandler implements 
HiveStoragePredicateHandler, HiveSt
     @Override
     public void configureInputJobProperties(TableDesc tableDesc, Map<String, 
String> map) {
         Properties properties = tableDesc.getProperties();
-        map.put(
-                LocationKeyExtractor.INTERNAL_LOCATION,
-                LocationKeyExtractor.getPaimonLocation(conf, properties));
+        String paimonLocation = LocationKeyExtractor.getPaimonLocation(conf, 
properties);
+        map.put(LocationKeyExtractor.INTERNAL_LOCATION, paimonLocation);
+        String dataFieldJsonStr = getDataFieldsJsonStr(properties);
+        tableDesc.getProperties().put(PAIMON_TABLE_FIELDS, dataFieldJsonStr);
+    }
+
+    static String getDataFieldsJsonStr(Properties properties) {
+        HiveSchema hiveSchema = HiveSchema.extract(null, properties);
+        return JsonSerdeUtil.toJson(hiveSchema.fields());
     }
 
     public void configureInputJobCredentials(TableDesc tableDesc, Map<String, 
String> map) {}
diff --git 
a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java
 
b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java
index 326667a47..07cd00c8e 100644
--- 
a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java
+++ 
b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java
@@ -25,7 +25,11 @@ import org.apache.paimon.schema.SchemaManager;
 import org.apache.paimon.types.DataField;
 import org.apache.paimon.types.DataTypes;
 import org.apache.paimon.types.RowType;
+import org.apache.paimon.utils.JsonSerdeUtil;
 
+import 
org.apache.paimon.shade.jackson2.com.fasterxml.jackson.core.type.TypeReference;
+
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;
@@ -33,6 +37,7 @@ import org.junit.jupiter.api.io.TempDir;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Properties;
 
 import static org.assertj.core.api.Assertions.assertThat;
@@ -342,4 +347,22 @@ public class HiveTableSchemaTest {
         properties.setProperty("location", tempDir.toString());
         return properties;
     }
+
+    @Test
+    public void testReadHiveSchemaFromProperties() throws Exception {
+        createSchema();
+        // cache the TableSchema to properties
+        Properties properties = new Properties();
+        properties.put(hive_metastoreConstants.META_TABLE_LOCATION, 
tempDir.toString());
+
+        HiveSchema hiveSchema = HiveSchema.extract(null, properties);
+
+        List<DataField> dataFields = hiveSchema.fields();
+        String dataFieldStr = JsonSerdeUtil.toJson(dataFields);
+
+        List<DataField> dataFieldsDeserialized =
+                JsonSerdeUtil.fromJson(dataFieldStr, new 
TypeReference<List<DataField>>() {});
+        HiveSchema newHiveSchema = new HiveSchema(new 
RowType(dataFieldsDeserialized));
+        
assertThat(newHiveSchema).usingRecursiveComparison().isEqualTo(hiveSchema);
+    }
 }

Reply via email to