This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new c9757b54315 [improvement](paimon)Using table serialization on the jni
side (#43167)
c9757b54315 is described below
commit c9757b543157d95b275caa4545a5ebf20241a523
Author: wuwenchi <[email protected]>
AuthorDate: Fri Nov 8 08:04:13 2024 +0800
[improvement](paimon)Using table serialization on the jni side (#43167)
advantage:
1. Reduce the access to HMS and HDFS on the JNI side.
2. There will be no inconsistency between the fe and be tables.
---------
Co-authored-by: wuwenchi <[email protected]>
---
be/src/vec/exec/format/table/paimon_jni_reader.cpp | 3 ++
.../org/apache/doris/paimon/PaimonJniScanner.java | 32 ++++++++++++++++------
.../datasource/paimon/source/PaimonScanNode.java | 1 +
gensrc/thrift/PlanNodes.thrift | 17 ++++++------
4 files changed, 36 insertions(+), 17 deletions(-)
diff --git a/be/src/vec/exec/format/table/paimon_jni_reader.cpp
b/be/src/vec/exec/format/table/paimon_jni_reader.cpp
index a9ec243cf46..30358eace1a 100644
--- a/be/src/vec/exec/format/table/paimon_jni_reader.cpp
+++ b/be/src/vec/exec/format/table/paimon_jni_reader.cpp
@@ -61,6 +61,9 @@ PaimonJniReader::PaimonJniReader(const
std::vector<SlotDescriptor*>& file_slot_d
std::to_string(range.table_format_params.paimon_params.last_update_time);
params["required_fields"] = join(column_names, ",");
params["columns_types"] = join(column_types, "#");
+ if (range.table_format_params.paimon_params.__isset.paimon_table) {
+ params["paimon_table"] =
range.table_format_params.paimon_params.paimon_table;
+ }
// Used to create paimon option
for (auto& kv : range.table_format_params.paimon_params.paimon_options) {
diff --git
a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java
b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java
index 7bd9fa631c8..e85d465f663 100644
---
a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java
+++
b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java
@@ -42,13 +42,19 @@ import java.util.stream.Collectors;
public class PaimonJniScanner extends JniScanner {
private static final Logger LOG =
LoggerFactory.getLogger(PaimonJniScanner.class);
+ @Deprecated
private static final String PAIMON_OPTION_PREFIX = "paimon.";
+ @Deprecated
private static final String HADOOP_OPTION_PREFIX = "hadoop.";
private final Map<String, String> params;
+ @Deprecated
private final Map<String, String> paimonOptionParams;
+ @Deprecated
private final Map<String, String> hadoopOptionParams;
+ @Deprecated
private final String dbName;
+ @Deprecated
private final String tblName;
private final String paimonSplit;
private final String paimonPredicate;
@@ -58,9 +64,13 @@ public class PaimonJniScanner extends JniScanner {
private List<String> paimonAllFieldNames;
private List<DataType> paimonDataTypeList;
+ @Deprecated
private long ctlId;
+ @Deprecated
private long dbId;
+ @Deprecated
private long tblId;
+ @Deprecated
private long lastUpdateTime;
private RecordReader.RecordIterator<InternalRow> recordIterator = null;
private final ClassLoader classLoader;
@@ -214,16 +224,20 @@ public class PaimonJniScanner extends JniScanner {
}
private void initTable() {
- PaimonTableCacheKey key = new PaimonTableCacheKey(ctlId, dbId, tblId,
- paimonOptionParams, hadoopOptionParams, dbName, tblName);
- TableExt tableExt = PaimonTableCache.getTable(key);
- if (tableExt.getCreateTime() < lastUpdateTime) {
- LOG.warn("invalidate cache table:{}, localTime:{}, remoteTime:{}",
key, tableExt.getCreateTime(),
- lastUpdateTime);
- PaimonTableCache.invalidateTableCache(key);
- tableExt = PaimonTableCache.getTable(key);
+ if (params.containsKey("paimon_table")) {
+ table = PaimonUtils.deserialize(params.get("paimon_table"));
+ } else {
+ PaimonTableCacheKey key = new PaimonTableCacheKey(ctlId, dbId,
tblId,
+ paimonOptionParams, hadoopOptionParams, dbName, tblName);
+ TableExt tableExt = PaimonTableCache.getTable(key);
+ if (tableExt.getCreateTime() < lastUpdateTime) {
+ LOG.warn("invalidate cache table:{}, localTime:{},
remoteTime:{}", key, tableExt.getCreateTime(),
+ lastUpdateTime);
+ PaimonTableCache.invalidateTableCache(key);
+ tableExt = PaimonTableCache.getTable(key);
+ }
+ this.table = tableExt.getTable();
}
- this.table = tableExt.getTable();
paimonAllFieldNames = PaimonUtils.getFieldNames(this.table.rowType());
if (LOG.isDebugEnabled()) {
LOG.debug("paimonAllFieldNames:{}", paimonAllFieldNames);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
index cd477cc9b29..29e3d0529f2 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
@@ -164,6 +164,7 @@ public class PaimonScanNode extends FileQueryScanNode {
fileDesc.setDbId(((PaimonExternalTable)
source.getTargetTable()).getDbId());
fileDesc.setTblId(source.getTargetTable().getId());
fileDesc.setLastUpdateTime(source.getTargetTable().getUpdateTime());
+ fileDesc.setPaimonTable(encodeObjectToString(source.getPaimonTable()));
// The hadoop conf should be same with
PaimonExternalCatalog.createCatalog()#getConfiguration()
fileDesc.setHadoopConf(source.getCatalog().getCatalogProperty().getHadoopProperties());
Optional<DeletionFile> optDeletionFile = paimonSplit.getDeletionFile();
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
index eb5266942c0..ec4497b267b 100644
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@@ -321,17 +321,18 @@ struct TPaimonDeletionFileDesc {
struct TPaimonFileDesc {
1: optional string paimon_split
2: optional string paimon_column_names
- 3: optional string db_name
- 4: optional string table_name
+ 3: optional string db_name // deprecated
+ 4: optional string table_name // deprecated
5: optional string paimon_predicate
- 6: optional map<string, string> paimon_options
- 7: optional i64 ctl_id
- 8: optional i64 db_id
- 9: optional i64 tbl_id
- 10: optional i64 last_update_time
+ 6: optional map<string, string> paimon_options // deprecated
+ 7: optional i64 ctl_id // deprecated
+ 8: optional i64 db_id // deprecated
+ 9: optional i64 tbl_id // deprecated
+ 10: optional i64 last_update_time // deprecated
11: optional string file_format
12: optional TPaimonDeletionFileDesc deletion_file;
- 13: optional map<string, string> hadoop_conf
+ 13: optional map<string, string> hadoop_conf // deprecated
+ 14: optional string paimon_table
}
struct TTrinoConnectorFileDesc {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]