This is an automated email from the ASF dual-hosted git repository.
cancai pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new 1aacc43efd [CALCITE-6636] Support CNF condition of Arrow ArrowAdapter
1aacc43efd is described below
commit 1aacc43efd089cd9411e071986a821b459807234
Author: Cancai Cai <[email protected]>
AuthorDate: Thu Mar 26 22:33:44 2026 +0800
[CALCITE-6636] Support CNF condition of Arrow ArrowAdapter
---
.../apache/calcite/adapter/arrow/ArrowFilter.java | 2 +-
.../org/apache/calcite/adapter/arrow/ArrowRel.java | 15 +-
.../apache/calcite/adapter/arrow/ArrowRules.java | 8 +-
.../apache/calcite/adapter/arrow/ArrowTable.java | 58 +++++---
.../adapter/arrow/ArrowToEnumerableConverter.java | 20 ++-
.../calcite/adapter/arrow/ArrowTranslator.java | 88 ++++++------
.../calcite/adapter/arrow/ConditionToken.java | 97 +++++++++++++
.../calcite/adapter/arrow/ArrowAdapterTest.java | 160 ++++++++++++++++-----
.../src/main/java/org/apache/calcite/util/Bug.java | 10 --
9 files changed, 342 insertions(+), 116 deletions(-)
diff --git
a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowFilter.java
b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowFilter.java
index 9774318ea9..9617ac6b3c 100644
--- a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowFilter.java
+++ b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowFilter.java
@@ -34,7 +34,7 @@
* relational expression in Arrow.
*/
class ArrowFilter extends Filter implements ArrowRel {
- private final List<String> match;
+ private final List<List<ConditionToken>> match;
ArrowFilter(RelOptCluster cluster, RelTraitSet traitSet, RelNode input,
RexNode condition) {
super(cluster, traitSet, input, condition);
diff --git a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowRel.java
b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowRel.java
index 5b002bdc2d..944c17d867 100644
--- a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowRel.java
+++ b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowRel.java
@@ -41,15 +41,24 @@ public interface ArrowRel extends RelNode {
* {@link ArrowRel} nodes into a SQL query. */
class Implementor {
@Nullable List<Integer> selectFields;
- final List<String> whereClause = new ArrayList<>();
+ final List<List<ConditionToken>> whereClause = new ArrayList<>();
@Nullable RelOptTable table;
@Nullable ArrowTable arrowTable;
/** Adds new predicates.
*
- * @param predicates Predicates
+ * <p>The structure is two levels of nesting:
+ * <ul>
+ * <li>Outer list: conjunction (AND) of clauses
+ * <li>Inner list: disjunction (OR) of conditions within a clause
+ * </ul>
+ *
+ * <p>Each {@link ConditionToken} represents a single unary or binary
+ * predicate condition.
+ *
+ * @param predicates Predicates in CNF form
*/
- void addFilters(List<String> predicates) {
+ void addFilters(List<List<ConditionToken>> predicates) {
whereClause.addAll(predicates);
}
diff --git
a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowRules.java
b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowRules.java
index b70e709648..6e268d6469 100644
--- a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowRules.java
+++ b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowRules.java
@@ -29,6 +29,8 @@
import org.apache.calcite.rel.logical.LogicalFilter;
import org.apache.calcite.rel.logical.LogicalProject;
import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.validate.SqlValidatorUtil;
import com.google.common.collect.ImmutableList;
@@ -97,9 +99,13 @@ protected ArrowFilterRule(Config config) {
RelNode convert(Filter filter) {
final RelTraitSet traitSet =
filter.getTraitSet().replace(ArrowRel.CONVENTION);
+ // Expand SEARCH (e.g. IN, BETWEEN) before pushing to Arrow,
+ // since Gandiva does not support SEARCH natively.
+ final RexNode condition =
+ RexUtil.expandSearch(filter.getCluster().getRexBuilder(), null,
filter.getCondition());
return new ArrowFilter(filter.getCluster(), traitSet,
convert(filter.getInput(), ArrowRel.CONVENTION),
- filter.getCondition());
+ condition);
}
/** Rule configuration. */
diff --git
a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTable.java
b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTable.java
index ba459c7b48..358a08fb25 100644
--- a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTable.java
+++ b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTable.java
@@ -97,7 +97,7 @@ public class ArrowTable extends AbstractTable
* {@link org.apache.calcite.adapter.arrow.ArrowMethod#ARROW_QUERY}. */
@SuppressWarnings("unused")
public Enumerable<Object> query(DataContext root, ImmutableIntList fields,
- List<String> conditions) {
+ List<List<List<String>>> conditions) {
requireNonNull(fields, "fields");
final Projector projector;
final Filter filter;
@@ -119,30 +119,26 @@ public Enumerable<Object> query(DataContext root,
ImmutableIntList fields,
} else {
projector = null;
- final List<TreeNode> conditionNodes = new ArrayList<>(conditions.size());
- for (String condition : conditions) {
- String[] data = condition.split(" ");
- List<TreeNode> treeNodes = new ArrayList<>(2);
- treeNodes.add(
- TreeBuilder.makeField(schema.getFields()
- .get(schema.getFields().indexOf(schema.findField(data[0])))));
-
- // if the split condition has more than two parts it's a binary
operator
- // with an additional literal node
- if (data.length > 2) {
- treeNodes.add(makeLiteralNode(data[2], data[3]));
+ final List<TreeNode> conjuncts = new ArrayList<>(conditions.size());
+ for (List<List<String>> orGroup : conditions) {
+ final List<TreeNode> disjuncts = new ArrayList<>(orGroup.size());
+ for (List<String> conditionParts : orGroup) {
+ disjuncts.add(
+ convertConditionToGandiva(
+ ConditionToken.fromTokenList(conditionParts)));
+ }
+ if (disjuncts.size() == 1) {
+ conjuncts.add(disjuncts.get(0));
+ } else {
+ conjuncts.add(TreeBuilder.makeOr(disjuncts));
}
-
- String operator = data[1];
- conditionNodes.add(
- TreeBuilder.makeFunction(operator, treeNodes, new
ArrowType.Bool()));
}
final Condition filterCondition;
- if (conditionNodes.size() == 1) {
- filterCondition = TreeBuilder.makeCondition(conditionNodes.get(0));
+ if (conjuncts.size() == 1) {
+ filterCondition = TreeBuilder.makeCondition(conjuncts.get(0));
} else {
- TreeNode treeNode = TreeBuilder.makeAnd(conditionNodes);
- filterCondition = TreeBuilder.makeCondition(treeNode);
+ filterCondition =
+ TreeBuilder.makeCondition(TreeBuilder.makeAnd(conjuncts));
}
try {
@@ -184,6 +180,26 @@ private static RelDataType deduceRowType(Schema schema,
return builder.build();
}
+ /** Converts a single {@link ConditionToken} into a Gandiva {@link
TreeNode}. */
+ private TreeNode convertConditionToGandiva(ConditionToken token) {
+ final List<TreeNode> treeNodes = new ArrayList<>(2);
+ treeNodes.add(
+ TreeBuilder.makeField(schema.getFields()
+ .get(
+ schema.getFields().indexOf(
+ schema.findField(token.fieldName)))));
+
+ if (token.isBinary()) {
+ treeNodes.add(
+ makeLiteralNode(
+ requireNonNull(token.value, "value"),
+ requireNonNull(token.valueType, "valueType")));
+ }
+
+ return TreeBuilder.makeFunction(
+ token.operator, treeNodes, new ArrowType.Bool());
+ }
+
private static TreeNode makeLiteralNode(String literal, String type) {
if (type.startsWith("decimal")) {
String[] typeParts =
diff --git
a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowToEnumerableConverter.java
b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowToEnumerableConverter.java
index 3b90dfd890..bd0e2c2e8c 100644
---
a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowToEnumerableConverter.java
+++
b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowToEnumerableConverter.java
@@ -35,6 +35,7 @@
import com.google.common.primitives.Ints;
+import java.util.ArrayList;
import java.util.List;
import static java.util.Objects.requireNonNull;
@@ -84,6 +85,23 @@ protected ArrowToEnumerableConverter(RelOptCluster cluster,
: Expressions.call(
BuiltInMethod.IMMUTABLE_INT_LIST_IDENTITY.method,
Expressions.constant(fieldCount)),
- Expressions.constant(arrowImplementor.whereClause))));
+ Expressions.constant(
+ toTokenLists(arrowImplementor.whereClause)))));
+ }
+
+ /** Converts structured {@link ConditionToken} conditions to nested string
+ * lists for serialization through {@link Expressions#constant}. */
+ private static List<List<List<String>>> toTokenLists(
+ List<List<ConditionToken>> conditions) {
+ final List<List<List<String>>> result =
+ new ArrayList<>(conditions.size());
+ for (List<ConditionToken> orGroup : conditions) {
+ final List<List<String>> group = new ArrayList<>(orGroup.size());
+ for (ConditionToken token : orGroup) {
+ group.add(token.toTokenList());
+ }
+ result.add(group);
+ }
+ return result;
}
}
diff --git
a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTranslator.java
b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTranslator.java
index 1102ce2056..cb27096a09 100644
--- a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTranslator.java
+++ b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTranslator.java
@@ -41,7 +41,7 @@
import static java.util.Objects.requireNonNull;
/**
- * Translates a {@link RexNode} expression to a Gandiva string.
+ * Translates a {@link RexNode} expression to Gandiva predicate tokens.
*/
class ArrowTranslator {
final RexBuilder rexBuilder;
@@ -61,13 +61,30 @@ public static ArrowTranslator create(RexBuilder rexBuilder,
return new ArrowTranslator(rexBuilder, rowType);
}
- List<String> translateMatch(RexNode condition) {
- List<RexNode> disjunctions = RelOptUtil.disjunctions(condition);
- if (disjunctions.size() == 1) {
- return translateAnd(disjunctions.get(0));
- } else {
- throw new UnsupportedOperationException("Unsupported disjunctive
condition " + condition);
+ /** The maximum number of nodes allowed during CNF conversion.
+ *
+ * <p>If exceeded, {@link RexUtil#toCnf(RexBuilder, int, RexNode)} returns
+ * the original expression unchanged, which may cause the subsequent
+ * translation to Gandiva predicates to fail with an
+ * {@link UnsupportedOperationException}. When invoked by the Arrow adapter
+ * module, the exception is caught and the plan falls back to
+ * an Enumerable convention. */
+ private static final int MAX_CNF_NODE_COUNT = 256;
+
+ List<List<ConditionToken>> translateMatch(RexNode condition) {
+ // Convert to CNF; SEARCH nodes are already expanded
+ // by ArrowFilterRule before reaching here.
+ final RexNode cnf = RexUtil.toCnf(rexBuilder, MAX_CNF_NODE_COUNT,
condition);
+
+ final List<List<ConditionToken>> result = new ArrayList<>();
+ for (RexNode conjunct : RelOptUtil.conjunctions(cnf)) {
+ final List<ConditionToken> orGroup = new ArrayList<>();
+ for (RexNode disjunct : RelOptUtil.disjunctions(conjunct)) {
+ orGroup.add(translateMatch2(disjunct));
+ }
+ result.add(orGroup);
}
+ return result;
}
/**
@@ -93,34 +110,14 @@ private static Object literalValue(RexLiteral literal) {
}
}
- /**
- * Translate a conjunctive predicate to a SQL string.
- *
- * @param condition A conjunctive predicate
- *
- * @return SQL string for the predicate
- */
- private List<String> translateAnd(RexNode condition) {
- List<String> predicates = new ArrayList<>();
- for (RexNode node : RelOptUtil.conjunctions(condition)) {
- if (node.getKind() == SqlKind.SEARCH) {
- final RexNode node2 = RexUtil.expandSearch(rexBuilder, null, node);
- predicates.addAll(translateMatch(node2));
- } else {
- predicates.add(translateMatch2(node));
- }
- }
- return predicates;
- }
-
/**
* Translates a binary or unary relation.
*
* @param node A RexNode that always evaluates to a boolean expression.
* Currently, this method is only called from translateAnd.
- * @return The translated SQL string for the relation.
+ * @return The translated condition token for the relation.
*/
- private String translateMatch2(RexNode node) {
+ private ConditionToken translateMatch2(RexNode node) {
switch (node.getKind()) {
case EQUALS:
return translateBinary("equal", "=", (RexCall) node);
@@ -144,7 +141,7 @@ private String translateMatch2(RexNode node) {
return translateUnary("isnotfalse", (RexCall) node);
case INPUT_REF:
final RexInputRef inputRef = (RexInputRef) node;
- return fieldNames.get(inputRef.getIndex()) + " istrue";
+ return ConditionToken.unary(fieldNames.get(inputRef.getIndex()),
"istrue");
case NOT:
return translateUnary("isfalse", (RexCall) node);
default:
@@ -156,10 +153,10 @@ private String translateMatch2(RexNode node) {
* Translates a call to a binary operator, reversing arguments if
* necessary.
*/
- private String translateBinary(String op, String rop, RexCall call) {
+ private ConditionToken translateBinary(String op, String rop, RexCall call) {
final RexNode left = call.operands.get(0);
final RexNode right = call.operands.get(1);
- @Nullable String expression = translateBinary2(op, left, right);
+ @Nullable ConditionToken expression = translateBinary2(op, left, right);
if (expression != null) {
return expression;
}
@@ -171,7 +168,8 @@ private String translateBinary(String op, String rop,
RexCall call) {
}
/** Translates a call to a binary operator. Returns null on failure. */
- private @Nullable String translateBinary2(String op, RexNode left, RexNode
right) {
+ private @Nullable ConditionToken translateBinary2(String op, RexNode left,
+ RexNode right) {
if (right.getKind() != SqlKind.LITERAL) {
return null;
}
@@ -189,26 +187,29 @@ private String translateBinary(String op, String rop,
RexCall call) {
}
}
- /** Combines a field name, operator, and literal to produce a predicate
string. */
- private String translateOp2(String op, String name, RexLiteral right) {
+ /** Combines a field name, operator, and literal to produce a binary
+ * condition token. */
+ private ConditionToken translateOp2(String op, String name,
+ RexLiteral right) {
Object value = literalValue(right);
String valueString = value.toString();
String valueType = getLiteralType(right.getType());
if (value instanceof String) {
- final RelDataTypeField field = requireNonNull(rowType.getField(name,
true, false), "field");
+ final RelDataTypeField field =
+ requireNonNull(rowType.getField(name, true, false), "field");
SqlTypeName typeName = field.getType().getSqlTypeName();
if (typeName != SqlTypeName.CHAR) {
valueString = "'" + valueString + "'";
}
}
- return name + " " + op + " " + valueString + " " + valueType;
+ return ConditionToken.binary(name, op, valueString, valueType);
}
/** Translates a call to a unary operator. */
- private String translateUnary(String op, RexCall call) {
+ private ConditionToken translateUnary(String op, RexCall call) {
final RexNode opNode = call.operands.get(0);
- @Nullable String expression = translateUnary2(op, opNode);
+ @Nullable ConditionToken expression = translateUnary2(op, opNode);
if (expression != null) {
return expression;
@@ -218,21 +219,16 @@ private String translateUnary(String op, RexCall call) {
}
/** Translates a call to a unary operator. Returns null on failure. */
- private @Nullable String translateUnary2(String op, RexNode opNode) {
+ private @Nullable ConditionToken translateUnary2(String op, RexNode opNode) {
if (opNode.getKind() == SqlKind.INPUT_REF) {
final RexInputRef inputRef = (RexInputRef) opNode;
final String name = fieldNames.get(inputRef.getIndex());
- return translateUnaryOp(op, name);
+ return ConditionToken.unary(name, op);
}
return null;
}
- /** Combines a field name and a unary operator to produce a predicate
string. */
- private static String translateUnaryOp(String op, String name) {
- return name + " " + op;
- }
-
private static String getLiteralType(RelDataType type) {
if (type.getSqlTypeName() == SqlTypeName.DECIMAL) {
return "decimal" + "(" + type.getPrecision() + "," + type.getScale() +
")";
diff --git
a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ConditionToken.java
b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ConditionToken.java
new file mode 100644
index 0000000000..44d3facea7
--- /dev/null
+++ b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ConditionToken.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.adapter.arrow;
+
+import com.google.common.collect.ImmutableList;
+
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+import java.util.List;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * A structured representation of a single Gandiva predicate condition.
+ *
+ * <p>A condition is either unary (e.g. {@code IS NULL}) or binary
+ * (e.g. {@code =}, {@code <}). Unary conditions have a field name
+ * and operator; binary conditions additionally have a literal value
+ * and its type.
+ *
+ * @see ArrowTranslator
+ */
+class ConditionToken {
+ final String fieldName;
+ final String operator;
+ final @Nullable String value;
+ final @Nullable String valueType;
+
+ private ConditionToken(String fieldName, String operator,
+ @Nullable String value, @Nullable String valueType) {
+ this.fieldName = requireNonNull(fieldName, "fieldName");
+ this.operator = requireNonNull(operator, "operator");
+ this.value = value;
+ this.valueType = valueType;
+ }
+
+ /** Creates a binary condition token
+ * (e.g. {@code intField equal 12 integer}). */
+ static ConditionToken binary(String fieldName, String operator,
+ String value, String valueType) {
+ return new ConditionToken(fieldName, operator,
+ requireNonNull(value, "value"),
+ requireNonNull(valueType, "valueType"));
+ }
+
+ /** Creates a unary condition token
+ * (e.g. {@code intField isnull}). */
+ static ConditionToken unary(String fieldName, String operator) {
+ return new ConditionToken(fieldName, operator, null, null);
+ }
+
+ /** Returns whether this is a binary condition. */
+ boolean isBinary() {
+ return value != null;
+ }
+
+ /** Converts this token to a string list for serialization
+ * through code generation.
+ *
+ * <p>The result is either {@code [fieldName, operator]} for unary
+ * conditions or {@code [fieldName, operator, value, valueType]} for
+ * binary conditions. */
+ List<String> toTokenList() {
+ if (isBinary()) {
+ return ImmutableList.of(fieldName, operator,
+ requireNonNull(value, "value"),
+ requireNonNull(valueType, "valueType"));
+ }
+ return ImmutableList.of(fieldName, operator);
+ }
+
+ /** Creates a {@code ConditionToken} from a serialized string list. */
+ static ConditionToken fromTokenList(List<String> tokens) {
+ final int size = tokens.size();
+ if (size == 4) {
+ return binary(tokens.get(0), tokens.get(1),
+ tokens.get(2), tokens.get(3));
+ } else if (size == 2) {
+ return unary(tokens.get(0), tokens.get(1));
+ }
+ throw new IllegalArgumentException("Invalid condition tokens: " + tokens);
+ }
+}
diff --git
a/arrow/src/test/java/org/apache/calcite/adapter/arrow/ArrowAdapterTest.java
b/arrow/src/test/java/org/apache/calcite/adapter/arrow/ArrowAdapterTest.java
index 14f387509c..67b3075b4e 100644
--- a/arrow/src/test/java/org/apache/calcite/adapter/arrow/ArrowAdapterTest.java
+++ b/arrow/src/test/java/org/apache/calcite/adapter/arrow/ArrowAdapterTest.java
@@ -22,7 +22,6 @@
import org.apache.calcite.rel.type.RelDataTypeSystem;
import org.apache.calcite.schema.Table;
import org.apache.calcite.test.CalciteAssert;
-import org.apache.calcite.util.Bug;
import org.apache.calcite.util.Sources;
import com.google.common.collect.ImmutableMap;
@@ -218,7 +217,7 @@ static void initializeArrowState(@TempDir Path
sharedTempDir)
+ "where \"intField\" > 1 and \"intField\" < 4";
String plan = "PLAN=ArrowToEnumerableConverter\n"
+ " ArrowProject(intField=[$0], stringField=[$1])\n"
- + " ArrowFilter(condition=[SEARCH($0, Sarg[(1..4)])])\n"
+ + " ArrowFilter(condition=[AND(>($0, 1), <($0, 4))])\n"
+ " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2,
3]])\n\n";
String result = "intField=2; stringField=2\n"
+ "intField=3; stringField=3\n";
@@ -251,20 +250,10 @@ static void initializeArrowState(@TempDir Path
sharedTempDir)
String sql = "select \"intField\", \"stringField\"\n"
+ "from arrowdata\n"
+ "where \"intField\"=12 or \"stringField\"='12'";
- String plan;
- if (Bug.CALCITE_6293_FIXED) {
- plan = "PLAN=ArrowToEnumerableConverter\n"
- + " ArrowProject(intField=[$0], stringField=[$1])\n"
- + " ArrowFilter(condition=[OR(=($0, 12), =($1, '12'))])\n"
- + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1,
2, 3]])\n\n";
- } else {
- plan = "PLAN=EnumerableCalc(expr#0..1=[{inputs}], expr#2=[12], "
- + "expr#3=[=($t0, $t2)], expr#4=['12':VARCHAR], expr#5=[=($t1,
$t4)], "
- + "expr#6=[OR($t3, $t5)], proj#0..1=[{exprs}], $condition=[$t6])\n"
- + " ArrowToEnumerableConverter\n"
- + " ArrowProject(intField=[$0], stringField=[$1])\n"
- + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1,
2, 3]])\n\n";
- }
+ String plan = "PLAN=ArrowToEnumerableConverter\n"
+ + " ArrowProject(intField=[$0], stringField=[$1])\n"
+ + " ArrowFilter(condition=[OR(=($0, 12), =($1, '12'))])\n"
+ + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2,
3]])\n\n";
String result = "intField=12; stringField=12\n";
CalciteAssert.that()
@@ -274,23 +263,84 @@ static void initializeArrowState(@TempDir Path
sharedTempDir)
.explainContains(plan);
}
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-6636">[CALCITE-6636]
+ * Support CNF condition of Arrow adapter</a>. */
+ @Test void testArrowProjectFieldsWithCnfFilter() {
+ String sql = "select \"intField\", \"stringField\"\n"
+ + "from arrowdata\n"
+ + "where (\"intField\" > 1 and \"stringField\" = '2') or \"intField\"
= 0";
+ String plan = "PLAN=ArrowToEnumerableConverter\n"
+ + " ArrowProject(intField=[$0], stringField=[$1])\n"
+ + " ArrowFilter(condition=[OR(AND(>($0, 1), =($1, '2')), =($0,
0))])\n"
+ + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2,
3]])\n\n";
+ String result = "intField=0; stringField=0\n"
+ + "intField=2; stringField=2\n";
+
+ CalciteAssert.that()
+ .with(arrow)
+ .query(sql)
+ .returns(result)
+ .explainContains(plan);
+ }
+
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-6636">[CALCITE-6636]
+ * Support CNF condition of Arrow adapter</a>.
+ *
+ * <p>Tests deeply nested conditions: {@code (A AND B) OR (C AND D)},
+ * which in CNF becomes {@code (A OR C) AND (A OR D) AND (B OR C) AND (B OR
D)}. */
+ @Test void testArrowProjectFieldsWithDeepCnfFilter() {
+ String sql = "select \"intField\", \"stringField\"\n"
+ + "from arrowdata\n"
+ + "where (\"intField\" = 2 and \"stringField\" = '2')"
+ + " or (\"intField\" = 3 and \"stringField\" = '3')";
+ String plan = "PLAN=ArrowToEnumerableConverter\n"
+ + " ArrowProject(intField=[$0], stringField=[$1])\n"
+ + " ArrowFilter(condition=[OR(AND(=($0, 2), =($1, '2')), AND(=($0,
3), =($1, '3')))])\n"
+ + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2,
3]])\n\n";
+ String result = "intField=2; stringField=2\n"
+ + "intField=3; stringField=3\n";
+
+ CalciteAssert.that()
+ .with(arrow)
+ .query(sql)
+ .returns(result)
+ .explainContains(plan);
+ }
+
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-6636">[CALCITE-6636]
+ * Support CNF condition of Arrow adapter</a>.
+ *
+ * <p>Tests triple OR: {@code A OR B OR C}. */
+ @Test void testArrowProjectFieldsWithTripleOrFilter() {
+ String sql = "select \"intField\", \"stringField\"\n"
+ + "from arrowdata\n"
+ + "where \"intField\" = 1 or \"intField\" = 2 or \"intField\" = 3";
+ String plan = "PLAN=ArrowToEnumerableConverter\n"
+ + " ArrowProject(intField=[$0], stringField=[$1])\n"
+ + " ArrowFilter(condition=[OR(=($0, 1), =($0, 2), =($0, 3))])\n"
+ + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2,
3]])\n\n";
+ String result = "intField=1; stringField=1\n"
+ + "intField=2; stringField=2\n"
+ + "intField=3; stringField=3\n";
+
+ CalciteAssert.that()
+ .with(arrow)
+ .query(sql)
+ .returns(result)
+ .explainContains(plan);
+ }
+
@Test void testArrowProjectFieldsWithInFilter() {
String sql = "select \"intField\", \"stringField\"\n"
+ "from arrowdata\n"
+ "where \"intField\" in (0, 1, 2)";
- String plan;
- if (Bug.CALCITE_6294_FIXED) {
- plan = "PLAN=ArrowToEnumerableConverter\n"
- + " ArrowProject(intField=[$0], stringField=[$1])\n"
- + " ArrowFilter(condition=[OR(=($0, 0), =($0, 1), =($0, 2))])\n"
- + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1,
2, 3]])\n\n";
- } else {
- plan = "PLAN=EnumerableCalc(expr#0..1=[{inputs}], expr#2=[Sarg[0, 1,
2]], "
- + "expr#3=[SEARCH($t0, $t2)], proj#0..1=[{exprs}],
$condition=[$t3])\n"
- + " ArrowToEnumerableConverter\n"
- + " ArrowProject(intField=[$0], stringField=[$1])\n"
- + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1,
2, 3]])\n\n";
- }
+ String plan = "PLAN=ArrowToEnumerableConverter\n"
+ + " ArrowProject(intField=[$0], stringField=[$1])\n"
+ + " ArrowFilter(condition=[OR(=($0, 0), =($0, 1), =($0, 2))])\n"
+ + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2,
3]])\n\n";
String result = "intField=0; stringField=0\n"
+ "intField=1; stringField=1\n"
+ "intField=2; stringField=2\n";
@@ -387,7 +437,7 @@ static void initializeArrowState(@TempDir Path
sharedTempDir)
+ "where \"intField\" between 1 and 3";
String plan = "PLAN=ArrowToEnumerableConverter\n"
+ " ArrowProject(intField=[$0], stringField=[$1])\n"
- + " ArrowFilter(condition=[SEARCH($0, Sarg[[1..3]])])\n"
+ + " ArrowFilter(condition=[AND(>=($0, 1), <=($0, 3))])\n"
+ " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2,
3]])\n\n";
String result = "intField=1; stringField=1\n"
+ "intField=2; stringField=2\n"
@@ -530,14 +580,13 @@ static void initializeArrowState(@TempDir Path
sharedTempDir)
.explainContains(plan);
}
- @Disabled("literal with space is not supported")
@Test void testLiteralWithSpace() {
String sql = "select \"intField\", \"stringField\" as \"my Field\"\n"
+ "from arrowdata\n"
+ "where \"stringField\" = 'literal with space'";
String plan = "PLAN=ArrowToEnumerableConverter\n"
- + " ArrowProject(intField=[$0], my Field=[$1])\n"
- + " ArrowFilter(condition=[=($1, '2')])\n"
+ + " ArrowProject(intField=[$0], stringField=[$1])\n"
+ + " ArrowFilter(condition=[=($1, 'literal with space')])\n"
+ " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2,
3]])\n\n";
String result = "";
@@ -565,6 +614,23 @@ static void initializeArrowState(@TempDir Path
sharedTempDir)
.explainContains(plan);
}
+ @Test void testLiteralWithEmptyString() {
+ String sql = "select \"intField\", \"stringField\"\n"
+ + "from arrowdata\n"
+ + "where \"stringField\" = ''";
+ String plan = "PLAN=ArrowToEnumerableConverter\n"
+ + " ArrowProject(intField=[$0], stringField=[$1])\n"
+ + " ArrowFilter(condition=[=($1, '')])\n"
+ + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2,
3]])\n\n";
+ String result = "";
+
+ CalciteAssert.that()
+ .with(arrow)
+ .query(sql)
+ .returns(result)
+ .explainContains(plan);
+ }
+
@Test void testTinyIntProject() {
String sql = "select DEPTNO from DEPT";
String plan = "PLAN=ArrowToEnumerableConverter\n"
@@ -962,6 +1028,34 @@ static void initializeArrowState(@TempDir Path
sharedTempDir)
.explainContains(plan);
}
+ /** When a filter condition exceeds the CNF node limit, the Arrow adapter
+ * falls back to the Enumerable convention (EnumerableCalc) instead of
+ * using ArrowFilter. The query should still return correct results. */
+ @Test void testCnfExceedsLimitFallsBackToEnumerable() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("select \"intField\", \"stringField\" from arrowdata\nwhere ");
+ for (int i = 0; i < 45; i++) {
+ if (i > 0) {
+ sb.append(" or ");
+ }
+ sb.append("(\"intField\" = ").append(i)
+ .append(" and \"stringField\" = '").append(i).append("')");
+ }
+ String sql = sb.toString();
+
+ String planPrefix = "PLAN=EnumerableCalc(";
+ String arrowInputPlan = "ArrowToEnumerableConverter"
+ + "\n ArrowProject(intField=[$0], stringField=[$1])"
+ + "\n ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1,
2, 3]])";
+
+ CalciteAssert.that()
+ .with(arrow)
+ .query(sql)
+ .returnsCount(45)
+ .explainContains(planPrefix)
+ .explainContains(arrowInputPlan);
+ }
+
/** Test case for
* <a
href="https://issues.apache.org/jira/browse/CALCITE-6684">[CALCITE-6684]
* Arrow adapter should supports filter conditions of Decimal type</a>. */
diff --git a/core/src/main/java/org/apache/calcite/util/Bug.java
b/core/src/main/java/org/apache/calcite/util/Bug.java
index 7eb756d5d2..8aaebeb4af 100644
--- a/core/src/main/java/org/apache/calcite/util/Bug.java
+++ b/core/src/main/java/org/apache/calcite/util/Bug.java
@@ -204,16 +204,6 @@ public abstract class Bug {
* is fixed. */
public static final boolean CALCITE_6391_FIXED = false;
- /** Whether
- * <a
href="https://issues.apache.org/jira/browse/CALCITE/issues/CALCITE-6293">
- * [CALCITE-6293] Support OR condition in Arrow adapter</a> is fixed. */
- public static final boolean CALCITE_6293_FIXED = false;
-
- /** Whether
- * <a
href="https://issues.apache.org/jira/browse/CALCITE/issues/CALCITE-6294">
- * [CALCITE-6294] Support IN filter in Arrow adapter</a> is fixed. */
- public static final boolean CALCITE_6294_FIXED = false;
-
/** Whether
* <a
href="https://issues.apache.org/jira/browse/CALCITE-6328">[CALCITE-6328]
* The BigQuery functions SAFE_* do not match the BigQuery specification</a>