This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new b28e3461675 branch-3.1: [optimize](paimon): Avoid redundant hadoop
conf setting in each split #57950 (#58012)
b28e3461675 is described below
commit b28e3461675b94dc472790173a10e4e0d81c746c
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Nov 25 11:06:00 2025 +0800
branch-3.1: [optimize](paimon): Avoid redundant hadoop conf setting in each
split #57950 (#58012)
Cherry-picked from #57950
Co-authored-by: Socrates <[email protected]>
---
be/src/vec/exec/format/table/paimon_jni_reader.cpp | 16 ++++++++++++++--
.../doris/datasource/paimon/source/PaimonScanNode.java | 14 ++++++++++----
gensrc/thrift/PlanNodes.thrift | 4 ++++
3 files changed, 28 insertions(+), 6 deletions(-)
diff --git a/be/src/vec/exec/format/table/paimon_jni_reader.cpp
b/be/src/vec/exec/format/table/paimon_jni_reader.cpp
index b603a3ccc54..a7fb5adcca8 100644
--- a/be/src/vec/exec/format/table/paimon_jni_reader.cpp
+++ b/be/src/vec/exec/format/table/paimon_jni_reader.cpp
@@ -50,7 +50,12 @@ PaimonJniReader::PaimonJniReader(const
std::vector<SlotDescriptor*>& file_slot_d
}
std::map<String, String> params;
params["paimon_split"] =
range.table_format_params.paimon_params.paimon_split;
- params["paimon_predicate"] =
range.table_format_params.paimon_params.paimon_predicate;
+ if (range_params->__isset.paimon_predicate &&
!range_params->paimon_predicate.empty()) {
+ params["paimon_predicate"] = range_params->paimon_predicate;
+ } else if
(range.table_format_params.paimon_params.__isset.paimon_predicate) {
+ // Fallback to split level paimon_predicate for backward compatibility
+ params["paimon_predicate"] =
range.table_format_params.paimon_params.paimon_predicate;
+ }
params["required_fields"] = join(column_names, ",");
params["columns_types"] = join(column_types, "#");
params["time_zone"] = _state->timezone();
@@ -67,7 +72,14 @@ PaimonJniReader::PaimonJniReader(const
std::vector<SlotDescriptor*>& file_slot_d
for (const auto& kv :
range.table_format_params.paimon_params.paimon_options) {
params[PAIMON_OPTION_PREFIX + kv.first] = kv.second;
}
- if (range.table_format_params.paimon_params.__isset.hadoop_conf) {
+ // Prefer hadoop conf from scan node level (range_params->properties) over
split level
+ // to avoid redundant configuration in each split
+ if (range_params->__isset.properties && !range_params->properties.empty())
{
+ for (const auto& kv : range_params->properties) {
+ params[HADOOP_OPTION_PREFIX + kv.first] = kv.second;
+ }
+ } else if (range.table_format_params.paimon_params.__isset.hadoop_conf) {
+ // Fallback to split level hadoop conf for backward compatibility
for (const auto& kv :
range.table_format_params.paimon_params.hadoop_conf) {
params[HADOOP_OPTION_PREFIX + kv.first] = kv.second;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
index 9c3f60c9c2f..673d2499e7a 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
@@ -189,6 +189,14 @@ public class PaimonScanNode extends FileQueryScanNode {
return Optional.of(serializedTable);
}
+ @Override
+ public void createScanRangeLocations() throws UserException {
+ super.createScanRangeLocations();
+ // Set paimon_predicate at ScanNode level to avoid redundant
serialization in each split
+ String serializedPredicate =
PaimonUtil.encodeObjectToString(predicates);
+ params.setPaimonPredicate(serializedPredicate);
+ }
+
private void putHistorySchemaInfo(Long schemaId) {
if (currentQuerySchema.putIfAbsent(schemaId, Boolean.TRUE) == null) {
PaimonExternalTable table = (PaimonExternalTable)
source.getTargetTable();
@@ -224,10 +232,8 @@ public class PaimonScanNode extends FileQueryScanNode {
fileDesc.setSchemaId(paimonSplit.getSchemaId());
}
fileDesc.setFileFormat(fileFormat);
-
fileDesc.setPaimonPredicate(PaimonUtil.encodeObjectToString(predicates));
- // The hadoop conf should be same with
- // PaimonExternalCatalog.createCatalog()#getConfiguration()
- fileDesc.setHadoopConf(backendStorageProperties);
+ // Hadoop conf is set at ScanNode level via params.properties in
createScanRangeLocations(),
+ // no need to set it for each split to avoid redundant configuration
Optional<DeletionFile> optDeletionFile = paimonSplit.getDeletionFile();
if (optDeletionFile.isPresent()) {
DeletionFile deletionFile = optDeletionFile.get();
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
index eb2baf31780..eca2c6fbe3a 100644
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@@ -478,6 +478,10 @@ struct TFileScanRangeParams {
25: optional i64 current_schema_id;
// All schema information used in the current query process
26: optional list<ExternalTableSchema.TSchema> history_schema_info
+
+ // Paimon predicate from FE, used for jni scanner
+ // Set at ScanNode level to avoid redundant serialization in each split
+ 27: optional string paimon_predicate
}
struct TFileRangeDesc {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]