This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 13c0449d2e1 [fix](maxcompute)fix maxcompute push down predicates null
pointer. (#57474)
13c0449d2e1 is described below
commit 13c0449d2e18e80bf1db6dc51c368ef38e5f3c85
Author: daidai <[email protected]>
AuthorDate: Fri Oct 31 14:35:53 2025 +0800
[fix](maxcompute)fix maxcompute push down predicates null pointer. (#57474)
### What problem does this PR solve?
Problem Summary:
This PR is a continuation of PR #55635. Based on the current
investigation, the conclusion in PR #55635 appears to be incorrect — the
reason why the predicate column cannot be found in the columnMap remains
unclear.
To help with further debugging, this PR adds additional log information.
In addition, during the `refresh table` process, the
MaxComputeExternalTable object remains unchanged, so the objects it
holds also stay the same. When initializing the table schema, the map
maintained by this object may have repeated `put` operations. So, move
this map to `MaxComputeSchemaCacheValue`, which better matches its
lifecycle and avoids repeated updates during table refresh.
---
.../doris/datasource/maxcompute/MaxComputeExternalTable.java | 11 ++++++-----
.../datasource/maxcompute/MaxComputeSchemaCacheValue.java | 9 ++++++++-
.../datasource/maxcompute/source/MaxComputeScanNode.java | 10 ++++++++++
3 files changed, 24 insertions(+), 6 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java
index d9d167e786b..09f052f2cea 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java
@@ -66,8 +66,6 @@ public class MaxComputeExternalTable extends ExternalTable {
super(id, name, remoteName, catalog, db,
TableType.MAX_COMPUTE_EXTERNAL_TABLE);
}
- private Map<String, com.aliyun.odps.Column> columnNameToOdpsColumn = new
HashMap();
-
@Override
protected synchronized void makeSureInitialized() {
super.makeSureInitialized();
@@ -166,7 +164,10 @@ public class MaxComputeExternalTable extends ExternalTable
{
}
public Map<String, com.aliyun.odps.Column> getColumnNameToOdpsColumn() {
- return columnNameToOdpsColumn;
+ makeSureInitialized();
+ Optional<SchemaCacheValue> schemaCacheValue = getSchemaCacheValue();
+ return schemaCacheValue.map(value -> ((MaxComputeSchemaCacheValue)
value).getColumnNameToOdpsColumn())
+ .orElse(Collections.emptyMap());
}
@Override
@@ -179,7 +180,7 @@ public class MaxComputeExternalTable extends ExternalTable {
TableIdentifier tableIdentifier =
mcCatalog.getOdpsTableIdentifier(dbName, name);
List<com.aliyun.odps.Column> columns =
odpsTable.getSchema().getColumns();
-
+ Map<String, com.aliyun.odps.Column> columnNameToOdpsColumn = new
HashMap<>();
for (com.aliyun.odps.Column column : columns) {
columnNameToOdpsColumn.put(column.getName(), column);
}
@@ -218,7 +219,7 @@ public class MaxComputeExternalTable extends ExternalTable {
}
return Optional.of(new MaxComputeSchemaCacheValue(schema, odpsTable,
tableIdentifier,
- partitionColumnNames, partitionSpecs, partitionDorisColumns,
partitionTypes));
+ partitionColumnNames, partitionSpecs, partitionDorisColumns,
partitionTypes, columnNameToOdpsColumn));
}
private Type mcTypeToDorisType(TypeInfo typeInfo) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java
index cf87725c680..cd734985e6e 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java
@@ -27,6 +27,7 @@ import lombok.Getter;
import lombok.Setter;
import java.util.List;
+import java.util.Map;
@Getter
@Setter
@@ -37,10 +38,11 @@ public class MaxComputeSchemaCacheValue extends
SchemaCacheValue {
private List<String> partitionSpecs;
private List<Column> partitionColumns;
private List<Type> partitionTypes;
+ private Map<String, com.aliyun.odps.Column> columnNameToOdpsColumn;
public MaxComputeSchemaCacheValue(List<Column> schema, Table odpsTable,
TableIdentifier tableIdentifier,
List<String> partitionColumnNames, List<String> partitionSpecs,
List<Column> partitionColumns,
- List<Type> partitionTypes) {
+ List<Type> partitionTypes, Map<String, com.aliyun.odps.Column>
columnNameToOdpsColumn) {
super(schema);
this.odpsTable = odpsTable;
this.tableIdentifier = tableIdentifier;
@@ -48,6 +50,7 @@ public class MaxComputeSchemaCacheValue extends
SchemaCacheValue {
this.partitionColumnNames = partitionColumnNames;
this.partitionColumns = partitionColumns;
this.partitionTypes = partitionTypes;
+ this.columnNameToOdpsColumn = columnNameToOdpsColumn;
}
public List<Column> getPartitionColumns() {
@@ -57,4 +60,8 @@ public class MaxComputeSchemaCacheValue extends
SchemaCacheValue {
public List<String> getPartitionColumnNames() {
return partitionColumnNames;
}
+
+ public Map<String, com.aliyun.odps.Column> getColumnNameToOdpsColumn() {
+ return columnNameToOdpsColumn;
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java
index e923b600d23..dc6ced6ee01 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java
@@ -62,6 +62,8 @@ import com.aliyun.odps.table.read.split.InputSplitAssigner;
import com.aliyun.odps.table.read.split.impl.IndexedInputSplit;
import jline.internal.Log;
import lombok.Setter;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
@@ -88,6 +90,8 @@ public class MaxComputeScanNode extends FileQueryScanNode {
static final DateTimeFormatter dateTime3Formatter =
DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS");
static final DateTimeFormatter dateTime6Formatter =
DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSSSS");
+ private static final Logger LOG =
LogManager.getLogger(MaxComputeScanNode.class);
+
private final MaxComputeExternalTable table;
private Predicate filterPredicate;
List<String> requiredPartitionColumns = new ArrayList<>();
@@ -355,6 +359,9 @@ public class MaxComputeScanNode extends FileQueryScanNode {
String columnName = convertSlotRefToColumnName(expr.getChild(0));
if (!table.getColumnNameToOdpsColumn().containsKey(columnName)) {
+ Map<String, com.aliyun.odps.Column> columnMap =
table.getColumnNameToOdpsColumn();
+ LOG.warn("ColumnNameToOdpsColumn size=" + columnMap.size()
+ + ", keys=[" + String.join(", ", columnMap.keySet()) +
"]");
throw new AnalysisException("Column " + columnName + " not
found in table, can not push "
+ "down predicate to MaxCompute " + table.getName());
}
@@ -415,6 +422,9 @@ public class MaxComputeScanNode extends FileQueryScanNode {
if (odpsOp != null) {
String columnName =
convertSlotRefToColumnName(expr.getChild(0));
if
(!table.getColumnNameToOdpsColumn().containsKey(columnName)) {
+ Map<String, com.aliyun.odps.Column> columnMap =
table.getColumnNameToOdpsColumn();
+ LOG.warn("ColumnNameToOdpsColumn size=" + columnMap.size()
+ + ", keys=[" + String.join(", ",
columnMap.keySet()) + "]");
throw new AnalysisException("Column " + columnName + " not
found in table, can not push "
+ "down predicate to MaxCompute " +
table.getName());
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]