This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new c19c2462056 [bugfix](iceberg)Convert the datetime type in the
predicate according to the target column (#32923)
c19c2462056 is described below
commit c19c24620562c6057aebc15c7e290beec6eecb14
Author: wuwenchi <[email protected]>
AuthorDate: Sat Mar 30 23:55:47 2024 +0800
[bugfix](iceberg)Convert the datetime type in the predicate according to
the target column (#32923)
Convert the datetime type in the predicate according to the target column.
And add a testcase for #32194
related #30478 #30162
---
.../doris/datasource/iceberg/IcebergUtils.java | 214 +++++++++++++++------
.../datasource/iceberg/TestIcebergPredict.java | 135 +++++++++++++
.../hive/test_external_catalog_glue_table.out | 3 +
.../hive/test_external_catalog_glue_table.groovy | 1 +
4 files changed, 294 insertions(+), 59 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index aba7167eda0..76b5bfb5105 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -50,6 +50,8 @@ import org.apache.iceberg.Snapshot;
import org.apache.iceberg.Table;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.Expressions;
+import org.apache.iceberg.expressions.Unbound;
+import org.apache.iceberg.types.Type.TypeID;
import org.apache.iceberg.types.Types;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -87,51 +89,46 @@ public class IcebergUtils {
return null;
}
+ Expression expression = null;
// BoolLiteral
if (expr instanceof BoolLiteral) {
BoolLiteral boolLiteral = (BoolLiteral) expr;
boolean value = boolLiteral.getValue();
if (value) {
- return Expressions.alwaysTrue();
+ expression = Expressions.alwaysTrue();
} else {
- return Expressions.alwaysFalse();
+ expression = Expressions.alwaysFalse();
}
- }
-
- // CompoundPredicate
- if (expr instanceof CompoundPredicate) {
+ } else if (expr instanceof CompoundPredicate) {
CompoundPredicate compoundPredicate = (CompoundPredicate) expr;
switch (compoundPredicate.getOp()) {
case AND: {
Expression left =
convertToIcebergExpr(compoundPredicate.getChild(0), schema);
Expression right =
convertToIcebergExpr(compoundPredicate.getChild(1), schema);
if (left != null && right != null) {
- return Expressions.and(left, right);
+ expression = Expressions.and(left, right);
}
- return null;
+ break;
}
case OR: {
Expression left =
convertToIcebergExpr(compoundPredicate.getChild(0), schema);
Expression right =
convertToIcebergExpr(compoundPredicate.getChild(1), schema);
if (left != null && right != null) {
- return Expressions.or(left, right);
+ expression = Expressions.or(left, right);
}
- return null;
+ break;
}
case NOT: {
Expression child =
convertToIcebergExpr(compoundPredicate.getChild(0), schema);
if (child != null) {
- return Expressions.not(child);
+ expression = Expressions.not(child);
}
- return null;
+ break;
}
default:
return null;
}
- }
-
- // BinaryPredicate
- if (expr instanceof BinaryPredicate) {
+ } else if (expr instanceof BinaryPredicate) {
TExprOpcode opCode = expr.getOpcode();
switch (opCode) {
case EQ:
@@ -156,38 +153,44 @@ public class IcebergUtils {
String colName = slotRef.getColumnName();
Types.NestedField nestedField =
schema.caseInsensitiveFindField(colName);
colName = nestedField.name();
- Object value = extractDorisLiteral(literalExpr);
+ Object value = extractDorisLiteral(nestedField.type(),
literalExpr);
if (value == null) {
if (opCode == TExprOpcode.EQ_FOR_NULL && literalExpr
instanceof NullLiteral) {
- return Expressions.isNull(colName);
+ expression = Expressions.isNull(colName);
} else {
return null;
}
+ } else {
+ switch (opCode) {
+ case EQ:
+ case EQ_FOR_NULL:
+ expression = Expressions.equal(colName, value);
+ break;
+ case NE:
+ expression =
Expressions.not(Expressions.equal(colName, value));
+ break;
+ case GE:
+ expression =
Expressions.greaterThanOrEqual(colName, value);
+ break;
+ case GT:
+ expression = Expressions.greaterThan(colName,
value);
+ break;
+ case LE:
+ expression =
Expressions.lessThanOrEqual(colName, value);
+ break;
+ case LT:
+ expression = Expressions.lessThan(colName,
value);
+ break;
+ default:
+ return null;
+ }
}
- switch (opCode) {
- case EQ:
- case EQ_FOR_NULL:
- return Expressions.equal(colName, value);
- case NE:
- return Expressions.not(Expressions.equal(colName,
value));
- case GE:
- return Expressions.greaterThanOrEqual(colName,
value);
- case GT:
- return Expressions.greaterThan(colName, value);
- case LE:
- return Expressions.lessThanOrEqual(colName, value);
- case LT:
- return Expressions.lessThan(colName, value);
- default:
- return null;
- }
+ break;
default:
return null;
}
- }
-
- // InPredicate, only support a in (1,2,3)
- if (expr instanceof InPredicate) {
+ } else if (expr instanceof InPredicate) {
+ // InPredicate, only support a in (1,2,3)
InPredicate inExpr = (InPredicate) expr;
if (inExpr.contains(Subquery.class)) {
return null;
@@ -196,56 +199,149 @@ public class IcebergUtils {
if (slotRef == null) {
return null;
}
+ String colName = slotRef.getColumnName();
+ Types.NestedField nestedField =
schema.caseInsensitiveFindField(colName);
+ colName = nestedField.name();
List<Object> valueList = new ArrayList<>();
for (int i = 1; i < inExpr.getChildren().size(); ++i) {
if (!(inExpr.getChild(i) instanceof LiteralExpr)) {
return null;
}
LiteralExpr literalExpr = (LiteralExpr) inExpr.getChild(i);
- Object value = extractDorisLiteral(literalExpr);
+ Object value = extractDorisLiteral(nestedField.type(),
literalExpr);
valueList.add(value);
}
- String colName = slotRef.getColumnName();
- Types.NestedField nestedField =
schema.caseInsensitiveFindField(colName);
- colName = nestedField.name();
if (inExpr.isNotIn()) {
// not in
- return Expressions.notIn(colName, valueList);
+ expression = Expressions.notIn(colName, valueList);
} else {
// in
- return Expressions.in(colName, valueList);
+ expression = Expressions.in(colName, valueList);
}
}
+ if (expression != null && expression instanceof Unbound) {
+ try {
+ ((Unbound<?, ?>) expression).bind(schema.asStruct(), true);
+ return expression;
+ } catch (Exception e) {
+ LOG.warn("Failed to check expression: " + e.getMessage());
+ return null;
+ }
+ }
return null;
}
- private static Object extractDorisLiteral(Expr expr) {
- if (!expr.isLiteral()) {
- return null;
- }
+ public static Object extractDorisLiteral(org.apache.iceberg.types.Type
icebergType, Expr expr) {
+ TypeID icebergTypeID = icebergType.typeId();
if (expr instanceof BoolLiteral) {
BoolLiteral boolLiteral = (BoolLiteral) expr;
- return boolLiteral.getValue();
+ switch (icebergTypeID) {
+ case BOOLEAN:
+ return boolLiteral.getValue();
+ case STRING:
+ return boolLiteral.getStringValue();
+ default:
+ return null;
+ }
} else if (expr instanceof DateLiteral) {
DateLiteral dateLiteral = (DateLiteral) expr;
- if (dateLiteral.isDateType() || dateLiteral.isDateTimeType()) {
- return dateLiteral.getStringValue();
- } else {
- return dateLiteral.unixTimestamp(TimeUtils.getTimeZone()) *
MILLIS_TO_NANO_TIME;
+ switch (icebergTypeID) {
+ case STRING:
+ return dateLiteral.getStringValue();
+ case TIMESTAMP:
+ return dateLiteral.unixTimestamp(TimeUtils.getTimeZone())
* MILLIS_TO_NANO_TIME;
+ default:
+ return null;
}
} else if (expr instanceof DecimalLiteral) {
DecimalLiteral decimalLiteral = (DecimalLiteral) expr;
- return decimalLiteral.getValue();
+ switch (icebergTypeID) {
+ case DECIMAL:
+ return decimalLiteral.getValue();
+ case STRING:
+ return decimalLiteral.getStringValue();
+ case DOUBLE:
+ return decimalLiteral.getDoubleValue();
+ default:
+ return null;
+ }
} else if (expr instanceof FloatLiteral) {
FloatLiteral floatLiteral = (FloatLiteral) expr;
- return floatLiteral.getValue();
+ if (floatLiteral.getType() == Type.FLOAT) {
+ switch (icebergTypeID) {
+ case FLOAT:
+ case DOUBLE:
+ case DECIMAL:
+ return floatLiteral.getValue();
+ default:
+ return null;
+ }
+ } else {
+ switch (icebergTypeID) {
+ case DOUBLE:
+ case DECIMAL:
+ return floatLiteral.getValue();
+ default:
+ return null;
+ }
+ }
} else if (expr instanceof IntLiteral) {
IntLiteral intLiteral = (IntLiteral) expr;
- return intLiteral.getValue();
+ Type type = intLiteral.getType();
+ if (type.isInteger32Type()) {
+ switch (icebergTypeID) {
+ case INTEGER:
+ case LONG:
+ case FLOAT:
+ case DOUBLE:
+ case DATE:
+ case DECIMAL:
+ return (int) intLiteral.getValue();
+ default:
+ return null;
+ }
+ } else {
+ // only PrimitiveType.BIGINT
+ switch (icebergTypeID) {
+ case INTEGER:
+ case LONG:
+ case FLOAT:
+ case DOUBLE:
+ case TIME:
+ case TIMESTAMP:
+ case DATE:
+ case DECIMAL:
+ return intLiteral.getValue();
+ default:
+ return null;
+ }
+ }
} else if (expr instanceof StringLiteral) {
- StringLiteral stringLiteral = (StringLiteral) expr;
- return stringLiteral.getStringValue();
+ String value = expr.getStringValue();
+ switch (icebergTypeID) {
+ case DATE:
+ case TIME:
+ case TIMESTAMP:
+ case STRING:
+ case UUID:
+ case DECIMAL:
+ return value;
+ case INTEGER:
+ try {
+ return Integer.parseInt(value);
+ } catch (Exception e) {
+ return null;
+ }
+ case LONG:
+ try {
+ return Long.parseLong(value);
+ } catch (Exception e) {
+ return null;
+ }
+ default:
+ return null;
+ }
}
return null;
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/TestIcebergPredict.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/TestIcebergPredict.java
new file mode 100644
index 00000000000..80b1c62819b
--- /dev/null
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/TestIcebergPredict.java
@@ -0,0 +1,135 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.datasource.iceberg;
+
+import org.apache.doris.analysis.BinaryPredicate;
+import org.apache.doris.analysis.BoolLiteral;
+import org.apache.doris.analysis.DateLiteral;
+import org.apache.doris.analysis.DecimalLiteral;
+import org.apache.doris.analysis.FloatLiteral;
+import org.apache.doris.analysis.IntLiteral;
+import org.apache.doris.analysis.LiteralExpr;
+import org.apache.doris.analysis.SlotRef;
+import org.apache.doris.analysis.StringLiteral;
+import org.apache.doris.analysis.TableName;
+import org.apache.doris.catalog.Type;
+import org.apache.doris.common.AnalysisException;
+
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.types.Types;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class TestIcebergPredict {
+
+ public static Schema schema;
+
+ @BeforeClass
+ public static void before() throws AnalysisException {
+ schema = new Schema(
+ Types.NestedField.required(1, "c_int", Types.IntegerType.get()),
+ Types.NestedField.required(2, "c_long", Types.LongType.get()),
+ Types.NestedField.required(3, "c_bool", Types.BooleanType.get()),
+ Types.NestedField.required(4, "c_float", Types.FloatType.get()),
+ Types.NestedField.required(5, "c_double", Types.DoubleType.get()),
+ Types.NestedField.required(6, "c_dec", Types.DecimalType.of(20,
10)),
+ Types.NestedField.required(7, "c_date", Types.DateType.get()),
+ Types.NestedField.required(8, "c_ts",
Types.TimestampType.withoutZone()),
+ Types.NestedField.required(10, "c_str", Types.StringType.get())
+ );
+ }
+
+ @Test
+ public void testBinaryPredicate() throws AnalysisException {
+ List<LiteralExpr> literalList = new ArrayList<LiteralExpr>() {{
+ add(new BoolLiteral(true));
+ add(new DateLiteral("2023-01-02", Type.DATEV2));
+ add(new DateLiteral("2024-01-02 12:34:56.123456",
Type.DATETIMEV2));
+ add(new DecimalLiteral(new BigDecimal("1.23")));
+ add(new FloatLiteral(1.23, Type.FLOAT));
+ add(new FloatLiteral(3.456, Type.DOUBLE));
+ add(new IntLiteral(1, Type.TINYINT));
+ add(new IntLiteral(1, Type.SMALLINT));
+ add(new IntLiteral(1, Type.INT));
+ add(new IntLiteral(1, Type.BIGINT));
+ add(new StringLiteral("abc"));
+ add(new StringLiteral("2023-01-02"));
+ add(new StringLiteral("2023-01-02 01:02:03.456789"));
+ }};
+
+ List<SlotRef> slotRefs = new ArrayList<SlotRef>() {{
+ add(new SlotRef(new TableName(), "c_int"));
+ add(new SlotRef(new TableName(), "c_long"));
+ add(new SlotRef(new TableName(), "c_bool"));
+ add(new SlotRef(new TableName(), "c_float"));
+ add(new SlotRef(new TableName(), "c_double"));
+ add(new SlotRef(new TableName(), "c_dec"));
+ add(new SlotRef(new TableName(), "c_date"));
+ add(new SlotRef(new TableName(), "c_ts"));
+ add(new SlotRef(new TableName(), "c_str"));
+ }};
+
+ // true indicates support for pushdown
+ Boolean[][] expects = new Boolean[][] {
+ { // int
+ false, false, false, false, false, false, true, true, true,
true, false, false, false
+ },
+ { // long
+ false, false, false, false, false, false, true, true, true,
true, false, false, false
+ },
+ { // boolean
+ true, false, false, false, false, false, false, false, false,
false, false, false, false
+ },
+ { // float
+ false, false, false, false, true, false, true, true, true,
true, false, false, false
+ },
+ { // double
+ false, false, false, true, true, true, true, true, true, true,
false, false, false
+ },
+ { // decimal
+ false, false, false, true, true, true, true, true, true, true,
false, false, false
+ },
+ { // date
+ false, false, false, false, false, false, true, true, true,
true, false, true, false
+ },
+ { // timestamp
+ false, true, true, false, false, false, false, false, false,
true, false, false, false
+ },
+ { // string
+ true, true, true, true, false, false, false, false, false,
false, true, true, true
+ }
+ };
+
+ for (int i = 0; i < slotRefs.size(); i++) {
+ final int loc = i;
+ List<Boolean> ret = literalList.stream().map(literal -> {
+ BinaryPredicate expr = new
BinaryPredicate(BinaryPredicate.Operator.EQ, slotRefs.get(loc), literal);
+ Expression expression =
IcebergUtils.convertToIcebergExpr(expr, schema);
+ return expression != null;
+ }).collect(Collectors.toList());
+ Assert.assertArrayEquals(expects[i], ret.toArray());
+ }
+ }
+}
diff --git
a/regression-test/data/external_table_p2/hive/test_external_catalog_glue_table.out
b/regression-test/data/external_table_p2/hive/test_external_catalog_glue_table.out
index 372dac93728..206cf483b45 100644
---
a/regression-test/data/external_table_p2/hive/test_external_catalog_glue_table.out
+++
b/regression-test/data/external_table_p2/hive/test_external_catalog_glue_table.out
@@ -137,3 +137,6 @@ b5e6bf2b5
10410585 \N 1938534851 2023-03-07T20:35:17.731
955.1760424982325 643e7c71b83d444e9261 67.0202 6a15d14103dc4
55b15adbec34 true
10055090 \N 2147483647 2023-03-07T20:38:59.078
1387.1527042831178 47 67.7351 c4c5
960637955914682b6 true
+-- !q18 --
+11801003 35210325
+
diff --git
a/regression-test/suites/external_table_p2/hive/test_external_catalog_glue_table.groovy
b/regression-test/suites/external_table_p2/hive/test_external_catalog_glue_table.groovy
index db230f7e8c9..5b3edd44c78 100644
---
a/regression-test/suites/external_table_p2/hive/test_external_catalog_glue_table.groovy
+++
b/regression-test/suites/external_table_p2/hive/test_external_catalog_glue_table.groovy
@@ -48,6 +48,7 @@ suite("test_external_catalog_glue_table",
"p2,external,hive,external_remote,exte
qt_q15 """ select count(1) from iceberg_glue_types """
qt_q16 """ select glue_timstamp from iceberg_glue_types where
glue_timstamp > '2023-03-07 20:35:59' order by glue_timstamp limit 5 """
qt_q17 """ select * from iceberg_glue_types order by glue_decimal
limit 5 """
+ qt_q18 """ select glue_int, glue_varchar from iceberg_glue_types
where glue_varchar > date '2023-03-07' """
}
sql """ use `iceberg_catalog`; """
q01()
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]